Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/mode_decision.c
Line
Count
Source
1
/*
2
* Copyright(c) 2019 Intel Corporation
3
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
4
*
5
* This source code is subject to the terms of the BSD 3-Clause Clear License and
6
* the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License
7
* was not distributed with this source code in the LICENSE file, you can
8
* obtain it at https://www.aomedia.org/license. If the Alliance for Open
9
* Media Patent License 1.0 was not distributed with this source code in the
10
* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11
*/
12
13
/***************************************
14
* Includes
15
***************************************/
16
#include <stdbool.h>
17
#include <stdio.h>
18
#include <stdlib.h>
19
#include <limits.h>
20
21
#include "common_utils.h"
22
#include "enc_dec_process.h"
23
#include "definitions.h"
24
#include "sequence_control_set.h"
25
#include "mode_decision.h"
26
#include "md_process.h"
27
#include "motion_estimation.h"
28
29
#include "av1me.h"
30
#include "hash.h"
31
#include "enc_inter_prediction.h"
32
#include "rd_cost.h"
33
#include "aom_dsp_rtcd.h"
34
#include "svt_log.h"
35
#include "resize.h"
36
#include "mcomp.h"
37
#include "ac_bias.h"
38
#include "src_ops_process.h"
39
#include "utility.h"
40
#include "adaptive_mv_pred.h"
41
#include "av1me.h"
42
static const uint32_t intra_luma_to_chroma[INTRA_MODES] = {
43
    UV_DC_PRED, // Average of above and left pixels
44
    UV_V_PRED, // Vertical
45
    UV_H_PRED, // Horizontal
46
    UV_D45_PRED, // Directional 45  degree
47
    UV_D135_PRED, // Directional 135 degree
48
    UV_D113_PRED, // Directional 113 degree
49
    UV_D157_PRED, // Directional 157 degree
50
    UV_D203_PRED, // Directional 203 degree
51
    UV_D67_PRED, // Directional 67  degree
52
    UV_SMOOTH_PRED, // Combination of horizontal and vertical interpolation
53
    UV_SMOOTH_V_PRED, // Vertical interpolation
54
    UV_SMOOTH_H_PRED, // Horizontal interpolation
55
    UV_PAETH_PRED, // Predict from the direction of smallest gradient
56
};
57
58
void calc_target_weighted_pred(PictureControlSet* pcs, ModeDecisionContext* ctx, const Av1Common* cm,
59
                               const MacroBlockD* xd, int mi_row, int mi_col, const uint8_t* above, int above_stride,
60
                               const uint8_t* left, int left_stride);
61
#define INC_MD_CAND_CNT(cnt, max_can_count)                  \
62
233k
    MULTI_LINE_MACRO_BEGIN                                   \
63
233k
    if (cnt + 1 < max_can_count)                             \
64
236k
        cnt++;                                               \
65
233k
    else                                                     \
66
18.4E
        SVT_ERROR("Mode decision candidate count exceeded"); \
67
233k
    MULTI_LINE_MACRO_END
68
69
0
#define SUPERRES_INVALID_STATE 0x7fffffff
70
71
3.45M
bool svt_av1_is_lossless_segment(PictureControlSet* pcs, int8_t segment_id) {
72
3.45M
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
73
3.45M
    if (frm_hdr->segmentation_params.segmentation_enabled) {
74
0
        return pcs->lossless[segment_id];
75
3.45M
    } else {
76
3.45M
        return pcs->lossless[0];
77
3.45M
    }
78
3.45M
}
79
80
0
static bool check_mv_validity(int16_t x_mv, int16_t y_mv, uint8_t need_shift) {
81
0
    Mv mv;
82
    //go to 1/8th if input is 1/4pel
83
0
    mv.y = y_mv << need_shift;
84
0
    mv.x = x_mv << need_shift;
85
    /* AV1 limits
86
      -16384 < MV_x_in_1/8 or MV_y_in_1/8 < 16384
87
      which means in full pel:
88
      -2048 < MV_x_in_full_pel or MV_y_in_full_pel < 2048
89
    */
90
0
    if (!is_mv_valid(&mv)) {
91
0
        return false;
92
0
    }
93
0
    return true;
94
0
}
95
96
int svt_is_interintra_allowed(uint8_t enable_inter_intra, BlockSize bsize, PredictionMode mode,
97
0
                              const MvReferenceFrame ref_frame[2]) {
98
0
    return enable_inter_intra && svt_aom_is_interintra_allowed_bsize((const BlockSize)bsize) &&
99
0
        svt_aom_is_interintra_allowed_mode(mode) && svt_aom_is_interintra_allowed_ref(ref_frame);
100
0
}
101
102
0
int svt_aom_filter_intra_allowed_bsize(BlockSize bs) {
103
0
    return block_size_wide[bs] <= 32 && block_size_high[bs] <= 32;
104
0
}
105
106
238k
int svt_aom_filter_intra_allowed(uint8_t enable_filter_intra, BlockSize bsize, uint8_t palette_size, uint32_t mode) {
107
238k
    return enable_filter_intra && mode == DC_PRED && palette_size == 0 && svt_aom_filter_intra_allowed_bsize(bsize);
108
238k
}
109
110
// returns the max inter-inter compound type based on settings and block size
111
0
static MD_COMP_TYPE get_tot_comp_types_bsize(MD_COMP_TYPE tot_comp_types, BlockSize bsize) {
112
0
    return (svt_aom_get_wedge_params_bits(bsize) == 0) ? MIN(tot_comp_types, MD_COMP_WEDGE) : tot_comp_types;
113
0
}
114
115
/*
116
Get the ME offset for a given block (the offset used to locate the PA MVs from the parent PCS).
117
*/
118
uint32_t svt_aom_get_me_block_offset(const uint32_t org_x, const uint32_t org_y, const BlockSize bsize,
119
227k
                                     const uint8_t enable_me_8x8, const uint8_t enable_me_16x16) {
120
227k
    const int      bwidth     = block_size_wide[bsize];
121
227k
    const int      bheight    = block_size_high[bsize];
122
227k
    const uint32_t max_length = MAX(bwidth, bheight);
123
124
227k
    uint32_t me_idx = 0;
125
227k
    switch (max_length) {
126
0
    case 4:
127
222k
    case 8:
128
222k
        me_idx++;
129
222k
        if (org_x & 8) { // (org_x % 16) / 8
130
108k
            me_idx += 1;
131
108k
        }
132
222k
        if (org_y & 8) { // (org_y % 16) / 8
133
108k
            me_idx += 2;
134
108k
        }
135
222k
        AOM_FALLTHROUGH_INTENDED;
136
224k
    case 16:
137
224k
        me_idx++;
138
224k
        if (org_x & 16) { // (org_x % 32) / 16
139
107k
            me_idx += 5;
140
107k
        }
141
224k
        if (org_y & 16) { // (org_y % 32) / 16
142
107k
            me_idx += 10;
143
107k
        }
144
224k
        AOM_FALLTHROUGH_INTENDED;
145
224k
    case 32:
146
224k
        me_idx++;
147
224k
        if (org_x & 32) { // (org_x % 64) / 32
148
105k
            me_idx += 21;
149
105k
        }
150
224k
        if (org_y & 32) { // (org_y % 64) / 32
151
103k
            me_idx += 42;
152
103k
        }
153
224k
        break;
154
2.46k
    default:
155
        // me_idx = 0;
156
2.46k
        break;
157
227k
    }
158
159
227k
    uint32_t me_block_offset = me_idx_85[me_idx]; // convert idx to me_idx
160
161
228k
    if (!enable_me_8x8) {
162
228k
        if (me_block_offset >= MAX_SB64_PU_COUNT_NO_8X8) {
163
223k
            me_block_offset = me_idx_85_8x8_to_16x16_conversion[me_block_offset - MAX_SB64_PU_COUNT_NO_8X8];
164
223k
        }
165
228k
        assert(me_block_offset < 21);
166
228k
        if (!enable_me_16x16) {
167
0
            if (me_block_offset >= MAX_SB64_PU_COUNT_WO_16X16) {
168
0
                assert(me_block_offset < 21);
169
0
                me_block_offset = me_idx_16x16_to_parent_32x32_conversion[me_block_offset - MAX_SB64_PU_COUNT_WO_16X16];
170
0
            }
171
0
        }
172
228k
    }
173
174
227k
    return me_block_offset;
175
227k
}
176
177
//Given one reference frame identified by the pair (list_index,ref_index)
178
//indicate if ME data is valid
179
uint8_t svt_aom_is_me_data_present(uint32_t me_block_offset, uint32_t me_cand_offset, const MeSbResults* me_results,
180
0
                                   uint8_t list_idx, uint8_t ref_idx) {
181
0
    uint8_t            total_me_cnt     = me_results->total_me_candidate_index[me_block_offset];
182
0
    const MeCandidate* me_block_results = &me_results->me_candidate_array[me_cand_offset];
183
0
    for (uint32_t me_cand_i = 0; me_cand_i < total_me_cnt; ++me_cand_i) {
184
0
        const MeCandidate* me_cand = &me_block_results[me_cand_i];
185
0
        assert(me_cand->direction <= 2);
186
0
        if (me_cand->direction == 0 || me_cand->direction == 2) {
187
0
            if (list_idx == me_cand->ref0_list && ref_idx == me_cand->ref_idx_l0) {
188
0
                return 1;
189
0
            }
190
0
        }
191
0
        if (me_cand->direction == 1 || me_cand->direction == 2) {
192
0
            if (list_idx == me_cand->ref1_list && ref_idx == me_cand->ref_idx_l1) {
193
0
                return 1;
194
0
            }
195
0
        }
196
0
    }
197
0
    return 0;
198
0
}
199
200
/********************************************
201
* Constants
202
********************************************/
203
// 1 - Regular uni-pred ,
204
// 2 - Regular uni-pred + Wedge compound Inter Intra
205
// 3 - Regular uni-pred + Wedge compound Inter Intra + Smooth compound Inter Intra
206
207
#if CONFIG_ENABLE_OBMC
208
0
static bool warped_motion_mode_allowed(PictureControlSet* pcs, ModeDecisionContext* ctx) {
209
0
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
210
0
    return frm_hdr->allow_warped_motion && has_overlappable_candidates(ctx->blk_ptr) && ctx->blk_geom->bwidth >= 8 &&
211
0
        ctx->blk_geom->bheight >= 8 && ctx->wm_ctrls.enabled;
212
0
}
213
#endif
214
MotionMode svt_aom_obmc_motion_mode_allowed(
215
    const PictureControlSet* pcs, ModeDecisionContext* ctx, const BlockSize bsize,
216
    uint8_t          situation, // 0: candidate(s) preparation, 1: data preparation, 2: simple translation face-off
217
0
    MvReferenceFrame rf0, MvReferenceFrame rf1, PredictionMode mode) {
218
0
    if (ctx->obmc_ctrls.trans_face_off && !situation) {
219
0
        return SIMPLE_TRANSLATION;
220
0
    }
221
    // check if should cap the max block size for obmc
222
223
0
    if (block_size_wide[bsize] > ctx->obmc_ctrls.max_blk_size ||
224
0
        block_size_high[bsize] > ctx->obmc_ctrls.max_blk_size) {
225
0
        return SIMPLE_TRANSLATION;
226
0
    }
227
0
    if (!ctx->obmc_ctrls.enabled) {
228
0
        return SIMPLE_TRANSLATION;
229
0
    }
230
0
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
231
232
0
    if (!frm_hdr->is_motion_mode_switchable) {
233
0
        return SIMPLE_TRANSLATION;
234
0
    }
235
236
0
    if (frm_hdr->force_integer_mv == 0) {
237
0
        const TransformationType gm_type = pcs->ppcs->global_motion[rf0].wmtype;
238
0
        if (is_global_mv_block(mode, bsize, gm_type)) {
239
0
            return SIMPLE_TRANSLATION;
240
0
        }
241
0
    }
242
0
    if (is_motion_variation_allowed_bsize(bsize) && is_inter_singleref_mode(mode) && rf1 != INTRA_FRAME &&
243
0
        !(rf1 > INTRA_FRAME)) // is_motion_variation_allowed_compound
244
0
    {
245
0
        if (!has_overlappable_candidates(ctx->blk_ptr)) { // check_num_overlappable_neighbors
246
0
            return SIMPLE_TRANSLATION;
247
0
        }
248
249
0
        return OBMC_CAUSAL;
250
0
    } else {
251
0
        return SIMPLE_TRANSLATION;
252
0
    }
253
0
}
254
255
//static uint32_t  AntiContouringIntraMode[11] = { EB_INTRA_PLANAR, EB_INTRA_DC, EB_INTRA_HORIZONTAL, EB_INTRA_VERTICAL,
256
//EB_INTRA_MODE_2, EB_INTRA_MODE_6, EB_INTRA_MODE_14, EB_INTRA_MODE_18, EB_INTRA_MODE_22, EB_INTRA_MODE_30, EB_INTRA_MODE_34 };
257
0
int32_t svt_aom_have_newmv_in_inter_mode(PredictionMode mode) {
258
0
    return (mode == NEWMV || mode == NEW_NEWMV || mode == NEAREST_NEWMV || mode == NEW_NEARESTMV ||
259
0
            mode == NEAR_NEWMV || mode == NEW_NEARMV);
260
0
}
261
262
static MvReferenceFrame to_ref_frame[2][4] = {{LAST_FRAME, LAST2_FRAME, LAST3_FRAME, GOLDEN_FRAME},
263
                                              {BWDREF_FRAME, ALTREF2_FRAME, ALTREF_FRAME, INVALID_REF}};
264
265
0
MvReferenceFrame svt_get_ref_frame_type(uint8_t list, uint8_t ref_idx) {
266
0
    return to_ref_frame[list][ref_idx];
267
0
};
268
269
0
uint8_t svt_aom_get_max_drl_index(uint8_t refmvCnt, PredictionMode mode) {
270
0
    uint8_t max_drl = 0;
271
272
0
    if (mode == NEWMV || mode == NEW_NEWMV) {
273
0
        if (refmvCnt < 2) {
274
0
            max_drl = 1;
275
0
        } else if (refmvCnt == 2) {
276
0
            max_drl = 2;
277
0
        } else {
278
0
            max_drl = 3;
279
0
        }
280
0
    }
281
282
0
    if (mode == NEARMV || mode == NEAR_NEARMV || mode == NEAR_NEWMV || mode == NEW_NEARMV) {
283
0
        if (refmvCnt < 3) {
284
0
            max_drl = 1;
285
0
        } else if (refmvCnt == 3) {
286
0
            max_drl = 2;
287
0
        } else {
288
0
            max_drl = 3;
289
0
        }
290
0
    }
291
292
0
    return max_drl;
293
0
}
294
295
0
#define MV_COST_WEIGHT 108
296
297
static int64_t pick_interintra_wedge(PictureControlSet* pcs, ModeDecisionContext* ctx, const BlockSize bsize,
298
                                     const uint8_t* const p0, const uint8_t* const p1, uint8_t* src_buf,
299
0
                                     uint32_t src_stride, int8_t* wedge_index_out) {
300
0
    assert(svt_aom_is_interintra_wedge_used(bsize));
301
    // assert(cpi->common.seq_params.enable_interintra_compound);
302
303
0
    const int bw = block_size_wide[bsize];
304
0
    const int bh = block_size_high[bsize];
305
0
    DECLARE_ALIGNED(32, int16_t, residual1[MAX_INTERINTRA_SB_SQUARE]); // src - pred1
306
0
    DECLARE_ALIGNED(32, int16_t, diff10[MAX_INTERINTRA_SB_SQUARE]); // pred1 - pred0
307
0
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
308
0
    if (ctx->hbd_md) {
309
0
        svt_aom_highbd_subtract_block(bh, bw, residual1, bw, src_buf, src_stride, p1, bw, EB_TEN_BIT);
310
0
        svt_aom_highbd_subtract_block(bh, bw, diff10, bw, p1, bw, p0, bw, EB_TEN_BIT);
311
312
0
    } else
313
0
#endif
314
0
    {
315
0
        svt_aom_subtract_block(bh, bw, residual1, bw, src_buf, src_stride, p1, bw);
316
0
        svt_aom_subtract_block(bh, bw, diff10, bw, p1, bw, p0, bw);
317
0
    }
318
319
0
    int8_t  wedge_index = -1;
320
0
    int64_t rd          = pick_wedge_fixed_sign(pcs, ctx, bsize, residual1, diff10, 0, &wedge_index);
321
0
    *wedge_index_out    = wedge_index;
322
323
0
    return rd;
324
0
}
325
326
0
static void inter_intra_search(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand) {
327
0
    SequenceControlSet* scs = pcs->scs;
328
0
    DECLARE_ALIGNED(16, uint8_t, tmp_buf[2 * MAX_INTERINTRA_SB_SQUARE]);
329
0
    DECLARE_ALIGNED(16, uint8_t, ii_pred_buf[2 * MAX_INTERINTRA_SB_SQUARE]);
330
    // get inter pred for ref0
331
0
    EbPictureBufferDesc* src_pic = ctx->hbd_md ? pcs->input_frame16bit : pcs->ppcs->enhanced_pic;
332
0
    uint16_t* src_buf_hbd = (uint16_t*)src_pic->y_buffer + (ctx->blk_org_x) + (ctx->blk_org_y) * src_pic->y_stride;
333
0
    uint8_t*  src_buf     = src_pic->y_buffer + (ctx->blk_org_x) + (ctx->blk_org_y) * src_pic->y_stride;
334
335
0
    uint8_t  bit_depth   = ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT;
336
0
    uint32_t full_lambda = ctx->hbd_md ? ctx->full_lambda_md[EB_10_BIT_MD] : ctx->full_lambda_md[EB_8_BIT_MD];
337
338
0
    uint32_t            bwidth  = ctx->blk_geom->bwidth;
339
0
    uint32_t            bheight = ctx->blk_geom->bheight;
340
0
    EbPictureBufferDesc pred_desc;
341
0
    pred_desc.border   = 0;
342
0
    pred_desc.y_stride = bwidth;
343
344
0
    EbPictureBufferDesc* ref_pic_list0 = svt_aom_get_ref_pic_buffer(pcs, cand->block_mi.ref_frame[0]);
345
0
    EbPictureBufferDesc* ref_pic_list1 = NULL;
346
347
    // Use scaled references if resolution of the reference is different from that of the input
348
    // Only have one ref
349
0
    if (ref_pic_list0 != NULL) {
350
0
        uint8_t list_idx0  = get_list_idx(cand->block_mi.ref_frame[0]);
351
0
        int8_t  ref_idx_l0 = get_ref_frame_idx(cand->block_mi.ref_frame[0]);
352
0
        svt_aom_use_scaled_rec_refs_if_needed(
353
0
            pcs,
354
0
            pcs->ppcs->enhanced_pic,
355
0
            (EbReferenceObject*)pcs->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr,
356
0
            &ref_pic_list0,
357
0
            ctx->hbd_md);
358
0
    }
359
0
    pred_desc.y_buffer = tmp_buf;
360
361
    //we call the regular inter prediction path here (no compound)
362
0
    cand->block_mi.interp_filters     = 0;
363
0
    cand->block_mi.is_interintra_used = 0;
364
0
    svt_aom_inter_prediction(scs,
365
0
                             pcs,
366
0
                             &cand->block_mi,
367
0
                             &cand->wm_params_l0,
368
0
                             &cand->wm_params_l1,
369
0
                             ctx->blk_ptr,
370
0
                             ctx->blk_geom->bsize,
371
0
                             ctx->shape,
372
0
                             false, // use_precomputed_obmc
373
0
                             false, // use_precomputed_ii - ii not performed here
374
0
                             ctx,
375
0
                             NULL,
376
0
                             NULL,
377
0
                             NULL,
378
0
                             ref_pic_list0,
379
0
                             ref_pic_list1,
380
0
                             ctx->blk_org_x,
381
0
                             ctx->blk_org_y,
382
0
                             &pred_desc, //output
383
0
                             0, //output org_x,
384
0
                             0, //output org_y,
385
0
                             PICTURE_BUFFER_DESC_LUMA_MASK,
386
0
                             ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
387
0
                             0); // is_16bit_pipeline
388
389
0
    assert(svt_aom_is_interintra_wedge_used(ctx->blk_geom->bsize)); //if not I need to add nowedge path!!
390
391
0
    int64_t        best_interintra_rd   = INT64_MAX;
392
0
    InterIntraMode best_interintra_mode = INTERINTRA_MODES;
393
0
    for (int j = 0; j < INTERINTRA_MODES; ++j) {
394
        // if ((!cpi->oxcf.enable_smooth_intra || cpi->sf.disable_smooth_intra) &&
395
        //     (InterIntraMode)j == II_SMOOTH_PRED)
396
        //   continue;
397
0
        InterIntraMode interintra_mode = (InterIntraMode)j;
398
        // rmode = interintra_mode_cost[mbmi->interintra_mode];
399
0
        const int bsize_group = eb_size_group_lookup[ctx->blk_geom->bsize];
400
0
        const int rmode       = ctx->md_rate_est_ctx->inter_intra_mode_fac_bits[bsize_group][interintra_mode];
401
        // av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
402
0
        if (ctx->hbd_md) {
403
0
            svt_aom_combine_interintra_highbd(interintra_mode, // mode,
404
0
                                              0, // use_wedge_interintra,
405
0
                                              0, // cand->interintra_wedge_index,
406
0
                                              0, // int wedge_sign,
407
0
                                              ctx->blk_geom->bsize,
408
0
                                              ctx->blk_geom->bsize, // plane_bsize,
409
0
                                              ii_pred_buf,
410
0
                                              bwidth, /*uint8_t *comppred, int compstride,*/
411
0
                                              tmp_buf,
412
0
                                              bwidth, /*const uint8_t *interpred, int interstride,*/
413
0
                                              ctx->intrapred_buf[j],
414
0
                                              bwidth /*const uint8_t *intrapred,   int intrastride*/,
415
0
                                              bit_depth);
416
0
        } else {
417
0
            svt_aom_combine_interintra(interintra_mode, //mode,
418
0
                                       0, //use_wedge_interintra,
419
0
                                       0, //cand->interintra_wedge_index,
420
0
                                       0, //int wedge_sign,
421
0
                                       ctx->blk_geom->bsize,
422
0
                                       ctx->blk_geom->bsize, // plane_bsize,
423
0
                                       ii_pred_buf,
424
0
                                       bwidth, /*uint8_t *comppred, int compstride,*/
425
0
                                       tmp_buf,
426
0
                                       bwidth, /*const uint8_t *interpred, int interstride,*/
427
0
                                       ctx->intrapred_buf[j],
428
0
                                       bwidth /*const uint8_t *intrapred,   int intrastride*/);
429
0
        }
430
0
        int64_t rd;
431
0
        if (ctx->inter_intra_comp_ctrls.use_rd_model) {
432
0
            int     rate_sum;
433
0
            int64_t dist_sum;
434
0
            model_rd_for_sb_with_curvfit(pcs,
435
0
                                         ctx,
436
0
                                         ctx->blk_geom->bsize,
437
0
                                         bwidth,
438
0
                                         bheight,
439
0
                                         ctx->hbd_md ? (uint8_t*)src_buf_hbd : src_buf,
440
0
                                         src_pic->y_stride,
441
0
                                         ii_pred_buf,
442
0
                                         bwidth,
443
0
                                         0,
444
0
                                         0,
445
0
                                         0,
446
0
                                         0,
447
0
                                         &rate_sum,
448
0
                                         &dist_sum,
449
0
                                         NULL,
450
0
                                         NULL,
451
0
                                         NULL);
452
453
0
            rd = RDCOST(full_lambda, rate_sum + rmode, dist_sum);
454
0
        } else {
455
0
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
456
0
            if (ctx->hbd_md) {
457
0
                rd = svt_aom_highbd_sse((uint8_t*)src_buf_hbd, src_pic->y_stride, ii_pred_buf, bwidth, bwidth, bheight);
458
0
            } else
459
0
#endif
460
0
            {
461
0
                rd = svt_aom_sse(src_buf, src_pic->y_stride, ii_pred_buf, bwidth, bwidth, bheight);
462
0
            }
463
0
        }
464
0
        if (rd < best_interintra_rd) {
465
0
            best_interintra_rd             = rd;
466
0
            cand->block_mi.interintra_mode = best_interintra_mode = interintra_mode;
467
0
        }
468
0
    }
469
    // To test: Enable wedge search if source variance and edge strength are above the thresholds.
470
    //CHKN need to re-do intra pred using the winner, or have a separate intra serch for wedge
471
0
    int64_t       best_interintra_rd_wedge = INT64_MAX;
472
0
    const uint8_t ii_wedge_mode            = ctx->shape == PART_N ? ctx->inter_intra_comp_ctrls.wedge_mode_sq
473
0
                                                                  : ctx->inter_intra_comp_ctrls.wedge_mode_nsq;
474
0
    if (ii_wedge_mode) {
475
0
        best_interintra_rd_wedge = pick_interintra_wedge(pcs,
476
0
                                                         ctx,
477
0
                                                         ctx->blk_geom->bsize,
478
0
                                                         ctx->intrapred_buf[best_interintra_mode],
479
0
                                                         tmp_buf,
480
0
                                                         ctx->hbd_md ? (uint8_t*)src_buf_hbd : src_buf,
481
0
                                                         src_pic->y_stride,
482
0
                                                         &cand->block_mi.interintra_wedge_index);
483
0
    }
484
485
    // for ii_wedge_mode 1, always inject wedge as a separate candidate; for wedge mode 2 only inject
486
    // if wedge is better than non-wedge
487
0
    if (ii_wedge_mode == 1 || best_interintra_rd_wedge < best_interintra_rd) {
488
0
        cand->block_mi.use_wedge_interintra = 1;
489
0
    } else {
490
0
        cand->block_mi.use_wedge_interintra = 0;
491
0
    }
492
0
}
493
494
static COMPOUND_TYPE to_av1_compound_lut[] = {COMPOUND_AVERAGE, COMPOUND_DISTWTD, COMPOUND_DIFFWTD, COMPOUND_WEDGE};
495
496
static void determine_compound_mode(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand,
497
0
                                    MD_COMP_TYPE cur_type) {
498
0
    BlockModeInfo* block_mi        = &cand->block_mi;
499
0
    block_mi->interinter_comp.type = to_av1_compound_lut[cur_type];
500
0
    switch (cur_type) {
501
0
    case MD_COMP_AVG:
502
0
        block_mi->comp_group_idx = 0;
503
0
        block_mi->compound_idx   = 1;
504
0
        break;
505
0
    case MD_COMP_DIST:
506
0
        block_mi->comp_group_idx = 0;
507
0
        block_mi->compound_idx   = 0;
508
0
        break;
509
0
    case MD_COMP_DIFF0:
510
0
        block_mi->comp_group_idx            = 1;
511
0
        block_mi->compound_idx              = 1;
512
0
        block_mi->interinter_comp.mask_type = 55;
513
0
        svt_aom_search_compound_diff_wedge(pcs, ctx, cand);
514
0
        break;
515
0
    case MD_COMP_WEDGE:
516
0
        block_mi->comp_group_idx = 1;
517
0
        block_mi->compound_idx   = 1;
518
0
        svt_aom_search_compound_diff_wedge(pcs, ctx, cand);
519
0
        break;
520
0
    default:
521
0
        SVT_ERROR("not used comp type\n");
522
0
        assert(0);
523
0
        break;
524
0
    }
525
0
}
526
527
void svt_aom_choose_best_av1_mv_pred(ModeDecisionContext* ctx, MvReferenceFrame ref_frame,
528
                                     PredictionMode mode, // NEW or NEW_NEW
529
                                     Mv mv0, Mv mv1,
530
                                     uint8_t* bestDrlIndex, // output
531
                                     Mv       best_pred_mv[2] // output
532
0
) {
533
0
    if (ctx->shut_fast_rate) {
534
0
        return;
535
0
    }
536
0
    if (ctx->approx_inter_rate > 1) {
537
0
        *bestDrlIndex   = 0;
538
0
        best_pred_mv[0] = ctx->ref_mv_stack[ref_frame][0].this_mv;
539
0
        best_pred_mv[1] = ctx->ref_mv_stack[ref_frame][0].comp_mv;
540
0
        return;
541
0
    }
542
0
    int16_t mv0x = mv0.x;
543
0
    int16_t mv0y = mv0.y;
544
0
    int16_t mv1x = mv1.x;
545
0
    int16_t mv1y = mv1.y;
546
547
0
    uint8_t is_compound = is_inter_compound_mode(mode);
548
549
0
    struct MdRateEstimationContext* md_rate_est_ctx = ctx->md_rate_est_ctx;
550
0
    BlkStruct*                      blk_ptr         = ctx->blk_ptr;
551
0
    uint8_t                         max_drl_index;
552
0
    Mv                              nearestmv[2] = {{{0}}, {{0}}};
553
0
    Mv                              nearmv[2];
554
0
    Mv                              ref_mv[2];
555
0
    Mv                              mv;
556
557
0
    max_drl_index = svt_aom_get_max_drl_index(blk_ptr->av1xd->ref_mv_count[ref_frame], mode);
558
    // max_drl_index = 1;
559
560
0
    if (max_drl_index == 1) {
561
0
        *bestDrlIndex = 0;
562
563
0
        best_pred_mv[0] = ctx->ref_mv_stack[ref_frame][0].this_mv;
564
0
        best_pred_mv[1] = ctx->ref_mv_stack[ref_frame][0].comp_mv;
565
0
    } else {
566
0
        uint8_t  drli;
567
0
        uint32_t best_mv_cost = 0xFFFFFFFF;
568
0
        for (drli = 0; drli < max_drl_index; drli++) {
569
0
            svt_aom_get_av1_mv_pred_drl(ctx, blk_ptr, ref_frame, is_compound, mode, drli, nearestmv, nearmv, ref_mv);
570
571
            //compute the rate for this drli Cand
572
0
            mv.y             = mv0y;
573
0
            mv.x             = mv0x;
574
0
            uint32_t mv_rate = 0;
575
0
            if (ctx->approx_inter_rate) {
576
0
                mv_rate = (uint32_t)svt_av1_mv_bit_cost_light(&mv, &(ref_mv[0]));
577
0
            } else {
578
0
                mv_rate = (uint32_t)svt_av1_mv_bit_cost(
579
0
                    &mv, &(ref_mv[0]), md_rate_est_ctx->nmv_vec_cost, md_rate_est_ctx->nmvcoststack, MV_COST_WEIGHT);
580
0
            }
581
582
0
            if (is_compound) {
583
0
                mv.y = mv1y;
584
0
                mv.x = mv1x;
585
0
                if (ctx->approx_inter_rate) {
586
0
                    mv_rate += (uint32_t)svt_av1_mv_bit_cost_light(&mv, &(ref_mv[1]));
587
0
                } else {
588
0
                    mv_rate += (uint32_t)svt_av1_mv_bit_cost(&mv,
589
0
                                                             &(ref_mv[1]),
590
0
                                                             md_rate_est_ctx->nmv_vec_cost,
591
0
                                                             md_rate_est_ctx->nmvcoststack,
592
0
                                                             MV_COST_WEIGHT);
593
0
                }
594
0
            }
595
596
0
            const int32_t new_mv = (mode == NEWMV || mode == NEW_NEWMV);
597
0
            if (new_mv) {
598
0
                int32_t idx;
599
0
                for (idx = 0; idx < 2; ++idx) {
600
0
                    if (blk_ptr->av1xd->ref_mv_count[ref_frame] > idx + 1) {
601
0
                        uint8_t drl_1_ctx = av1_drl_ctx(&(ctx->ref_mv_stack[ref_frame][0]), idx);
602
0
                        mv_rate += ctx->md_rate_est_ctx->drl_mode_fac_bits[drl_1_ctx][drli != idx];
603
0
                        if (drli == idx) {
604
0
                            break;
605
0
                        }
606
0
                    }
607
0
                }
608
0
            }
609
610
0
            if (mv_rate < best_mv_cost) {
611
0
                best_mv_cost    = mv_rate;
612
0
                *bestDrlIndex   = drli;
613
0
                best_pred_mv[0] = ref_mv[0];
614
0
                best_pred_mv[1] = ref_mv[1];
615
0
            }
616
0
        }
617
0
    }
618
0
}
619
620
13.7k
static void mode_decision_cand_bf_dctor(EbPtr p) {
621
13.7k
    ModeDecisionCandidateBuffer* obj = (ModeDecisionCandidateBuffer*)p;
622
13.7k
    EB_DELETE(obj->pred);
623
13.7k
    EB_DELETE(obj->rec_coeff);
624
13.7k
    EB_DELETE(obj->quant);
625
13.7k
}
626
627
2.74k
static void mode_decision_scratch_cand_bf_dctor(EbPtr p) {
628
2.74k
    ModeDecisionCandidateBuffer* obj = (ModeDecisionCandidateBuffer*)p;
629
2.74k
    EB_DELETE(obj->pred);
630
2.74k
    EB_DELETE(obj->residual);
631
2.74k
    EB_DELETE(obj->rec_coeff);
632
2.74k
    EB_DELETE(obj->recon);
633
2.74k
    EB_DELETE(obj->quant);
634
2.74k
}
635
636
/***************************************
637
* Mode Decision Candidate Ctor
638
***************************************/
639
EbErrorType svt_aom_mode_decision_cand_bf_ctor(ModeDecisionCandidateBuffer* buffer_ptr, EbBitDepth max_bitdepth,
640
                                               uint8_t sb_size, uint32_t buffer_desc_mask,
641
                                               EbPictureBufferDesc* temp_residual, EbPictureBufferDesc* temp_recon_ptr,
642
13.7k
                                               uint64_t* fast_cost, uint64_t* full_cost, uint64_t* full_cost_ssim) {
643
13.7k
    EbPictureBufferDescInitData picture_buffer_desc_init_data;
644
645
13.7k
    EbPictureBufferDescInitData thirty_two_width_picture_buffer_desc_init_data;
646
647
13.7k
    buffer_ptr->dctor = mode_decision_cand_bf_dctor;
648
649
    // Init Picture Data
650
13.7k
    picture_buffer_desc_init_data.max_width          = sb_size;
651
13.7k
    picture_buffer_desc_init_data.max_height         = sb_size;
652
13.7k
    picture_buffer_desc_init_data.bit_depth          = max_bitdepth;
653
13.7k
    picture_buffer_desc_init_data.color_format       = EB_YUV420;
654
13.7k
    picture_buffer_desc_init_data.buffer_enable_mask = buffer_desc_mask;
655
13.7k
    picture_buffer_desc_init_data.border             = 0;
656
13.7k
    picture_buffer_desc_init_data.split_mode         = false;
657
13.7k
    picture_buffer_desc_init_data.is_16bit_pipeline  = max_bitdepth > EB_EIGHT_BIT;
658
659
13.7k
    thirty_two_width_picture_buffer_desc_init_data.max_width          = sb_size;
660
13.7k
    thirty_two_width_picture_buffer_desc_init_data.max_height         = sb_size;
661
13.7k
    thirty_two_width_picture_buffer_desc_init_data.bit_depth          = EB_THIRTYTWO_BIT;
662
13.7k
    thirty_two_width_picture_buffer_desc_init_data.color_format       = EB_YUV420;
663
13.7k
    thirty_two_width_picture_buffer_desc_init_data.buffer_enable_mask = buffer_desc_mask;
664
13.7k
    thirty_two_width_picture_buffer_desc_init_data.border             = 0;
665
13.7k
    thirty_two_width_picture_buffer_desc_init_data.split_mode         = false;
666
13.7k
    thirty_two_width_picture_buffer_desc_init_data.is_16bit_pipeline  = true;
667
668
    // Candidate Ptr
669
13.7k
    buffer_ptr->cand = NULL;
670
671
    // Video Buffers
672
13.7k
    EB_NEW(buffer_ptr->pred, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data);
673
    // Reuse the residual_ptr memory in MD context
674
13.7k
    buffer_ptr->residual = temp_residual;
675
13.7k
    EB_NEW(buffer_ptr->rec_coeff, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data);
676
13.7k
    EB_NEW(buffer_ptr->quant, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data);
677
    // Reuse the recon_ptr memory in MD context
678
13.7k
    buffer_ptr->recon = temp_recon_ptr;
679
680
    // Costs
681
13.7k
    buffer_ptr->fast_cost      = fast_cost;
682
13.7k
    buffer_ptr->full_cost      = full_cost;
683
13.7k
    buffer_ptr->full_cost_ssim = full_cost_ssim;
684
13.7k
    return EB_ErrorNone;
685
13.7k
}
686
687
EbErrorType svt_aom_mode_decision_scratch_cand_bf_ctor(ModeDecisionCandidateBuffer* buffer_ptr, uint8_t sb_size,
688
2.74k
                                                       EbBitDepth max_bitdepth) {
689
2.74k
    EbPictureBufferDescInitData picture_buffer_desc_init_data;
690
2.74k
    EbPictureBufferDescInitData double_width_picture_buffer_desc_init_data;
691
2.74k
    EbPictureBufferDescInitData thirty_two_width_picture_buffer_desc_init_data;
692
693
2.74k
    buffer_ptr->dctor = mode_decision_scratch_cand_bf_dctor;
694
695
    // Init Picture Data
696
2.74k
    picture_buffer_desc_init_data.max_width                           = sb_size;
697
2.74k
    picture_buffer_desc_init_data.max_height                          = sb_size;
698
2.74k
    picture_buffer_desc_init_data.bit_depth                           = max_bitdepth;
699
2.74k
    picture_buffer_desc_init_data.color_format                        = EB_YUV420;
700
2.74k
    picture_buffer_desc_init_data.buffer_enable_mask                  = PICTURE_BUFFER_DESC_FULL_MASK;
701
2.74k
    picture_buffer_desc_init_data.border                              = 0;
702
2.74k
    picture_buffer_desc_init_data.split_mode                          = false;
703
2.74k
    picture_buffer_desc_init_data.is_16bit_pipeline                   = max_bitdepth > EB_EIGHT_BIT;
704
2.74k
    double_width_picture_buffer_desc_init_data.max_width              = sb_size;
705
2.74k
    double_width_picture_buffer_desc_init_data.max_height             = sb_size;
706
2.74k
    double_width_picture_buffer_desc_init_data.bit_depth              = EB_SIXTEEN_BIT;
707
2.74k
    double_width_picture_buffer_desc_init_data.color_format           = EB_YUV420;
708
2.74k
    double_width_picture_buffer_desc_init_data.buffer_enable_mask     = PICTURE_BUFFER_DESC_FULL_MASK;
709
2.74k
    double_width_picture_buffer_desc_init_data.border                 = 0;
710
2.74k
    double_width_picture_buffer_desc_init_data.split_mode             = false;
711
2.74k
    double_width_picture_buffer_desc_init_data.is_16bit_pipeline      = true;
712
2.74k
    thirty_two_width_picture_buffer_desc_init_data.max_width          = sb_size;
713
2.74k
    thirty_two_width_picture_buffer_desc_init_data.max_height         = sb_size;
714
2.74k
    thirty_two_width_picture_buffer_desc_init_data.bit_depth          = EB_THIRTYTWO_BIT;
715
2.74k
    thirty_two_width_picture_buffer_desc_init_data.color_format       = EB_YUV420;
716
2.74k
    thirty_two_width_picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK;
717
2.74k
    thirty_two_width_picture_buffer_desc_init_data.border             = 0;
718
2.74k
    thirty_two_width_picture_buffer_desc_init_data.split_mode         = false;
719
2.74k
    thirty_two_width_picture_buffer_desc_init_data.is_16bit_pipeline  = true;
720
721
    // Candidate Ptr
722
2.74k
    buffer_ptr->cand = NULL;
723
724
    // Video Buffers
725
2.74k
    EB_NEW(buffer_ptr->pred, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data);
726
2.74k
    EB_NEW(buffer_ptr->residual, svt_picture_buffer_desc_ctor, (EbPtr)&double_width_picture_buffer_desc_init_data);
727
2.74k
    EB_NEW(buffer_ptr->rec_coeff, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data);
728
2.74k
    EB_NEW(buffer_ptr->quant, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data);
729
730
2.74k
    EB_NEW(buffer_ptr->recon, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data);
731
2.74k
    return EB_ErrorNone;
732
2.74k
}
733
734
/***************************************
735
* return true if the MV candidate is already injected
736
***************************************/
737
0
static bool mv_is_already_injected(ModeDecisionContext* ctx, Mv mv0, Mv mv1, uint8_t ref_type) {
738
0
    MvReferenceFrame rf[2];
739
0
    av1_set_ref_frame(rf, ref_type);
740
741
    // Unipred Candidate
742
0
    if (rf[1] <= INTRA_FRAME) {
743
        // First check the validity of the candidate MV, and exit if invalid MV
744
0
        if (ctx->corrupted_mv_check && !check_mv_validity(mv0.x, mv0.y, 0)) {
745
0
            return true;
746
0
        }
747
748
0
        for (int cand_idx = 0; cand_idx < ctx->injected_mv_count; cand_idx++) {
749
0
            if (ctx->injected_ref_types[cand_idx] == ref_type && ctx->injected_mvs[cand_idx][0].as_int == mv0.as_int) {
750
0
                return true;
751
0
            }
752
0
        }
753
0
    } else { // Bipred Candidate
754
        // First check the validity of the candidate MV, and exit if invalid MV
755
0
        if (ctx->corrupted_mv_check && (!check_mv_validity(mv0.x, mv0.y, 0) || !check_mv_validity(mv1.x, mv1.y, 0))) {
756
0
            return true;
757
0
        }
758
759
0
        RedundantCandCtrls* redund_ctrls = &ctx->cand_reduction_ctrls.redundant_cand_ctrls;
760
0
        if (redund_ctrls->score_th) {
761
0
            uint8_t is_high_mag = (ABS(mv0.x) > redund_ctrls->mag_th) && (ABS(mv0.y) > redund_ctrls->mag_th) &&
762
0
                (ABS(mv1.x) > redund_ctrls->mag_th) && (ABS(mv1.y) > redund_ctrls->mag_th);
763
0
            for (int cand_idx = 0; cand_idx < ctx->injected_mv_count; cand_idx++) {
764
0
                if (ctx->injected_ref_types[cand_idx] == ref_type) {
765
0
                    int score = ABS(ctx->injected_mvs[cand_idx][0].x - mv0.x) +
766
0
                        ABS(ctx->injected_mvs[cand_idx][0].y - mv0.y) + ABS(ctx->injected_mvs[cand_idx][1].x - mv1.x) +
767
0
                        ABS(ctx->injected_mvs[cand_idx][1].y - mv1.y);
768
769
0
                    if (score == 0 || (score < redund_ctrls->score_th && is_high_mag)) {
770
0
                        return true;
771
0
                    }
772
0
                }
773
0
            }
774
0
        } else {
775
0
            for (int cand_idx = 0; cand_idx < ctx->injected_mv_count; cand_idx++) {
776
0
                if (ctx->injected_ref_types[cand_idx] == ref_type &&
777
0
                    ctx->injected_mvs[cand_idx][0].as_int == mv0.as_int &&
778
0
                    ctx->injected_mvs[cand_idx][1].as_int == mv1.as_int) {
779
0
                    return true;
780
0
                }
781
0
            }
782
0
        }
783
0
    }
784
0
    return false;
785
0
}
786
787
bool svt_aom_is_valid_unipred_ref(ModeDecisionContext* ctx, uint8_t inter_cand_group, uint8_t list_idx,
788
0
                                  uint8_t ref_idx) {
789
0
    if (!ctx->ref_pruning_ctrls.enabled) {
790
0
        return true;
791
0
    }
792
0
    if (!ctx->ref_filtering_res[inter_cand_group][list_idx][ref_idx].do_ref &&
793
0
        (ref_idx || !ctx->ref_pruning_ctrls.closest_refs[inter_cand_group])) {
794
0
        return false;
795
0
    } else {
796
0
        return true;
797
0
    }
798
0
}
799
800
// Determine if the MV-to-MVP difference satisfies the mv_diff restriction
801
0
static bool is_valid_mv_diff(Mv best_pred_mv[2], Mv mv0, Mv mv1, uint8_t is_compound) {
802
0
    const uint8_t mv_diff_max_bit = MV_IN_USE_BITS;
803
804
0
    if (abs(mv0.x - best_pred_mv[0].x) > (1 << mv_diff_max_bit) ||
805
0
        abs(mv0.y - best_pred_mv[0].y) > (1 << mv_diff_max_bit)) {
806
0
        return false;
807
0
    }
808
809
0
    if (is_compound) {
810
0
        if (abs(mv1.x - best_pred_mv[1].x) > (1 << mv_diff_max_bit) ||
811
0
            abs(mv1.y - best_pred_mv[1].y) > (1 << mv_diff_max_bit)) {
812
0
            return false;
813
0
        }
814
0
    }
815
0
    return true;
816
0
}
817
818
static bool is_valid_bipred_ref(ModeDecisionContext* ctx, uint8_t inter_cand_group, uint8_t list_idx_0,
819
0
                                uint8_t ref_idx_0, uint8_t list_idx_1, uint8_t ref_idx_1) {
820
0
    if (!ctx->ref_pruning_ctrls.enabled) {
821
0
        return true;
822
0
    }
823
    // Both ref should be 1 for bipred refs to be valid: if 1 is not best_refs then there is a chance to exit the injection
824
0
    if (!ctx->ref_filtering_res[inter_cand_group][list_idx_0][ref_idx_0].do_ref ||
825
0
        !ctx->ref_filtering_res[inter_cand_group][list_idx_1][ref_idx_1].do_ref) {
826
        // Check whether we should check the closest, if no then there no need to move forward and return false
827
0
        if (!ctx->ref_pruning_ctrls.closest_refs[inter_cand_group]) {
828
0
            return false;
829
0
        }
830
831
        // Else check if ref are LAST and BWD, if not then return false
832
0
        if (ref_idx_0 || ref_idx_1) {
833
0
            return false;
834
0
        }
835
0
    }
836
0
    return true;
837
0
}
838
839
0
#define BIPRED_3x3_REFINMENT_POSITIONS 8
840
841
static int8_t allow_refinement_flag[BIPRED_3x3_REFINMENT_POSITIONS] = {1, 0, 1, 0, 1, 0, 1, 0};
842
static int8_t bipred_3x3_x_pos[BIPRED_3x3_REFINMENT_POSITIONS]      = {-1, -1, 0, 1, 1, 1, 0, -1};
843
static int8_t bipred_3x3_y_pos[BIPRED_3x3_REFINMENT_POSITIONS]      = {0, 1, 1, 1, 0, -1, -1, -1};
844
845
117k
static INLINE uint8_t is_dc_only_safe(PictureControlSet* pcs, ModeDecisionContext* ctx) {
846
    // Early exit if pruning not enabled, SB-128, NSQ, or 4x4 (no variance available)
847
117k
    if (!ctx->intra_ctrls.prune_using_edge_info || pcs->scs->super_block_size == 128 || ctx->shape != PART_N ||
848
117k
        ctx->blk_geom->sq_size == 4) {
849
0
        return 0;
850
0
    }
851
852
    // Block variance lookup
853
117k
    int            blk_idx;
854
117k
    int            sub_idx[4];
855
117k
    const Position blk_org = {.x = ctx->blk_org_x - ctx->sb_origin_x, .y = ctx->blk_org_y - ctx->sb_origin_y};
856
117k
    svt_aom_get_blk_var_map(ctx->blk_geom->sq_size, blk_org.x, blk_org.y, &blk_idx, sub_idx);
857
858
117k
    uint16_t* sb_var  = pcs->ppcs->variance[ctx->sb_index];
859
117k
    uint32_t  blk_var = sb_var[blk_idx];
860
861
    // For 8x8, we do not have 4x4 sub-variance, skip spread check
862
117k
    if (ctx->blk_geom->sq_size == 8) {
863
114k
        return (blk_var < 2000);
864
114k
    }
865
866
    // For 16x16 and above, compute spread from sub-blocks
867
2.53k
    uint32_t min_var = UINT32_MAX;
868
2.53k
    uint32_t max_var = 0;
869
870
19.7k
    for (int i = 0; i < 4; i++) {
871
17.2k
        uint32_t v = sb_var[sub_idx[i]];
872
17.2k
        min_var    = MIN(min_var, v);
873
17.2k
        max_var    = MAX(max_var, v);
874
17.2k
    }
875
876
2.53k
    uint32_t spread_var = max_var - min_var;
877
878
4.31k
    return (blk_var < 2000 && spread_var < 4000);
879
117k
}
880
881
// Inject inter-intra, WM, OBMC for unipred simple-trans candidate
882
//
883
// total_cand_count is the index to ctx->fast_cand_array for the next candidate injected (which is the
884
// same as the number of candidates injected so far).  It is assumed the simple-trans candidate to base
885
// the other candidtes on is the previously injected candidate (at index total_cand_count - 1).
886
//
887
// enable_ii, enable_wm, and enable_obmc allow the caller to disable some modes explicitly; if enabled, the
888
// mode will be injected if the block size/candidate type supports the mode. The enable signals are left as
889
// arguments because some candidates do not inject all modes (e.g. unipred does not inject WM/OBMC).
890
static void inj_non_simple_modes(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* total_cand_count,
891
0
                                 const bool enable_ii, const bool enable_wm, const bool enable_obmc) {
892
    // index of simple translation candidate (to be used to copy cand info for other modes)
893
    // assumes the simple trans cand is the previously injected candidate
894
0
    const uint32_t                     simple_trans_cand_idx = *total_cand_count - 1;
895
0
    const ModeDecisionCandidate* const simple_trans_cand     = &ctx->fast_cand_array[simple_trans_cand_idx];
896
897
    // The candidate count to be used to track number of inj cands, and the index of fast_cand_array for new candidates
898
0
    uint32_t cand_count = *total_cand_count;
899
900
0
    assert(simple_trans_cand->block_mi.ref_frame[1] == NONE_FRAME);
901
0
    const uint8_t list_idx = get_list_idx(simple_trans_cand->block_mi.ref_frame[0]);
902
0
    const uint8_t ref_idx  = get_ref_frame_idx(simple_trans_cand->block_mi.ref_frame[0]);
903
904
    // INJECT INTER-INTRA
905
0
    const uint8_t is_ii_allowed = svt_aom_is_valid_unipred_ref(ctx, INTER_INTRA_GROUP, list_idx, ref_idx) &&
906
0
        svt_is_interintra_allowed(ctx->inter_intra_comp_ctrls.enabled,
907
0
                                  ctx->blk_geom->bsize,
908
0
                                  simple_trans_cand->block_mi.mode,
909
0
                                  simple_trans_cand->block_mi.ref_frame);
910
0
    if (enable_ii && is_ii_allowed) {
911
0
        ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_count];
912
0
        svt_memcpy(cand, simple_trans_cand, sizeof(ModeDecisionCandidate));
913
914
0
        inter_intra_search(pcs, ctx, cand);
915
0
        cand->block_mi.is_interintra_used = 1;
916
0
        cand->block_mi.ref_frame[1]       = INTRA_FRAME;
917
0
        const InterIntraMode ii_mode      = cand->block_mi.interintra_mode;
918
0
        INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count);
919
920
        // if ii_wedge_mode is 1, then inject wedge/non-wedge as separate candidates; OW, only inject the best (above)
921
0
        const uint8_t ii_wedge_mode = ctx->shape == PART_N ? ctx->inter_intra_comp_ctrls.wedge_mode_sq
922
0
                                                           : ctx->inter_intra_comp_ctrls.wedge_mode_nsq;
923
0
        if (ii_wedge_mode == 1) {
924
0
            cand = &ctx->fast_cand_array[cand_count];
925
0
            svt_memcpy(cand, simple_trans_cand, sizeof(ModeDecisionCandidate));
926
927
0
            cand->block_mi.is_interintra_used   = 1;
928
0
            cand->block_mi.ref_frame[1]         = INTRA_FRAME;
929
0
            cand->block_mi.interintra_mode      = ii_mode;
930
0
            cand->block_mi.use_wedge_interintra = 0;
931
0
            INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count);
932
0
        }
933
0
    }
934
935
0
#if CONFIG_ENABLE_OBMC
936
    // INJECT WARP
937
0
    const uint8_t is_warp_allowed = warped_motion_mode_allowed(pcs, ctx) &&
938
0
        svt_aom_is_valid_unipred_ref(ctx, WARP_GROUP, list_idx, ref_idx);
939
0
    if (enable_wm && is_warp_allowed) {
940
0
        ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_count];
941
0
        svt_memcpy(cand, simple_trans_cand, sizeof(ModeDecisionCandidate));
942
943
0
        cand->block_mi.is_interintra_used = 0;
944
0
        cand->block_mi.motion_mode        = WARPED_CAUSAL;
945
0
        cand->wm_params_l0.wmtype         = AFFINE;
946
947
0
        uint8_t motion_mode_valid = 1;
948
0
        if (cand->block_mi.mode == NEWMV && ctx->wm_ctrls.refinement_iterations && ctx->wm_ctrls.refine_level == 0) {
949
            // Perform refinement; if refinement is off, then MV is valid, since it's been checked above
950
0
            motion_mode_valid = svt_aom_wm_motion_refinement(pcs, ctx, cand, 0);
951
0
        }
952
953
0
        if (motion_mode_valid) {
954
0
            motion_mode_valid = svt_aom_warped_motion_parameters(ctx,
955
0
                                                                 cand->block_mi.mv[0],
956
0
                                                                 ctx->blk_geom,
957
0
                                                                 cand->block_mi.ref_frame[0],
958
0
                                                                 &cand->wm_params_l0,
959
0
                                                                 &cand->block_mi.num_proj_ref,
960
0
                                                                 ctx->wm_ctrls.lower_band_th,
961
0
                                                                 ctx->wm_ctrls.upper_band_th,
962
0
                                                                 0);
963
0
        }
964
965
0
        if (motion_mode_valid) {
966
0
            INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count);
967
0
        }
968
0
    }
969
970
    // INJECT OBMC
971
0
    const uint8_t is_obmc_allowed = svt_aom_is_valid_unipred_ref(ctx, OBMC_GROUP, list_idx, ref_idx) &&
972
0
        (svt_aom_obmc_motion_mode_allowed(pcs,
973
0
                                          ctx,
974
0
                                          ctx->blk_geom->bsize,
975
0
                                          0,
976
0
                                          simple_trans_cand->block_mi.ref_frame[0],
977
0
                                          simple_trans_cand->block_mi.ref_frame[1],
978
0
                                          simple_trans_cand->block_mi.mode) == OBMC_CAUSAL);
979
0
    if (enable_obmc && is_obmc_allowed) {
980
0
        ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_count];
981
0
        svt_memcpy(cand, simple_trans_cand, sizeof(ModeDecisionCandidate));
982
983
0
        cand->block_mi.is_interintra_used = 0;
984
0
        cand->block_mi.motion_mode        = OBMC_CAUSAL;
985
986
0
        uint8_t motion_mode_valid = 1;
987
0
        if (cand->block_mi.mode == NEWMV && ctx->obmc_ctrls.refine_level == 0) {
988
0
            assert(cand->block_mi.ref_frame[1] == NONE_FRAME);
989
0
            motion_mode_valid = svt_aom_obmc_motion_refinement(pcs, ctx, cand, ctx->obmc_ctrls.refine_level);
990
0
        }
991
992
0
        if (motion_mode_valid) {
993
0
            INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count);
994
0
        }
995
0
    }
996
#else
997
    UNUSED(enable_wm);
998
    UNUSED(enable_obmc);
999
#endif // CONFIG_ENABLE_OBMC
1000
1001
0
    *total_cand_count = cand_count;
1002
0
}
1003
1004
// Determines if inter MVP compound modes should be skipped based on info from neighbouring blocks/ref frame types.
1005
0
static bool skip_compound_on_ref_types(ModeDecisionContext* ctx, MvReferenceFrame rf[2]) {
1006
0
    if (!ctx->inter_comp_ctrls.skip_on_ref_info) {
1007
0
        return false;
1008
0
    }
1009
1010
0
    MacroBlockD* xd = ctx->blk_ptr->av1xd;
1011
1012
    // If both references are from the same list, skip compound
1013
0
    const uint8_t list_idx_0 = get_list_idx(rf[0]);
1014
0
    const uint8_t list_idx_1 = get_list_idx(rf[1]);
1015
0
    if (list_idx_0 == list_idx_1) {
1016
0
        return true;
1017
0
    }
1018
1019
    // Skip compound unless neighbours selected the ref frames
1020
0
    bool skip_comp = true;
1021
0
    if (!xd->left_available && !xd->up_available) {
1022
0
        return false;
1023
0
    }
1024
1025
0
    if (xd->left_available) {
1026
0
        const BlockModeInfo* const left_mi = &xd->left_mbmi->block_mi;
1027
0
        if ((is_inter_singleref_mode(left_mi->mode) &&
1028
0
             (left_mi->ref_frame[0] == rf[0] || left_mi->ref_frame[0] == rf[1])) ||
1029
0
            (is_inter_compound_mode(left_mi->mode) &&
1030
0
             (left_mi->ref_frame[0] == rf[0] && left_mi->ref_frame[1] == rf[1]))) {
1031
0
            return false;
1032
0
        }
1033
0
    }
1034
0
    if (xd->up_available) {
1035
0
        const BlockModeInfo* const above_mi = &xd->above_mbmi->block_mi;
1036
0
        if ((is_inter_singleref_mode(above_mi->mode) &&
1037
0
             (above_mi->ref_frame[0] == rf[0] || above_mi->ref_frame[0] == rf[1])) ||
1038
0
            (is_inter_compound_mode(above_mi->mode) &&
1039
0
             (above_mi->ref_frame[0] == rf[0] && above_mi->ref_frame[1] == rf[1]))) {
1040
0
            return false;
1041
0
        }
1042
0
    }
1043
1044
0
    return skip_comp;
1045
0
}
1046
1047
// Inject inter-inter compound types (DIST, DIFF, WEDGE) for a bipred AVG candidate
1048
//
1049
// total_cand_count is the index to ctx->fast_cand_array for the next candidate injected (which is the
1050
// same as the number of candidates injected so far).  It is assumed the AVG candidate to base
1051
// the other candidtes on is the previously injected candidate (at index total_cand_count - 1).
1052
0
static void inj_comp_modes(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* total_cand_count) {
1053
    // index of MD_COMP_AVG candidate (to be used to copy cand info for other modes)
1054
    // assumes the avg cand is the previously injected candidate
1055
0
    const uint32_t         avg_cand_idx = *total_cand_count - 1;
1056
0
    ModeDecisionCandidate* avg_cand     = &ctx->fast_cand_array[avg_cand_idx];
1057
1058
    // Get allowable compound types based on settings and block size
1059
0
    MD_COMP_TYPE tot_comp_types = get_tot_comp_types_bsize(ctx->inter_comp_ctrls.tot_comp_types, ctx->blk_geom->bsize);
1060
0
    if (tot_comp_types == MD_COMP_DIST) {
1061
0
        return;
1062
0
    }
1063
1064
    // Distortion-based ref pruning for compound types
1065
0
    const uint8_t ref_idx_0  = get_ref_frame_idx(avg_cand->block_mi.ref_frame[0]);
1066
0
    const uint8_t ref_idx_1  = get_ref_frame_idx(avg_cand->block_mi.ref_frame[1]);
1067
0
    const uint8_t list_idx_0 = get_list_idx(avg_cand->block_mi.ref_frame[0]);
1068
0
    const uint8_t list_idx_1 = get_list_idx(avg_cand->block_mi.ref_frame[1]);
1069
0
    if (!is_valid_bipred_ref(ctx, INTER_COMP_GROUP, list_idx_0, ref_idx_0, list_idx_1, ref_idx_1)) {
1070
0
        return;
1071
0
    }
1072
1073
    // Skip compound on neighbour info
1074
0
    if (skip_compound_on_ref_types(ctx, avg_cand->block_mi.ref_frame)) {
1075
0
        return;
1076
0
    }
1077
1078
    // Skip compound on MV length
1079
0
    if (ctx->inter_comp_ctrls.max_mv_length) {
1080
0
        const uint16_t max_mv_length = ctx->inter_comp_ctrls.max_mv_length;
1081
0
        if (abs(avg_cand->block_mi.mv[0].x) > max_mv_length || abs(avg_cand->block_mi.mv[0].y) > max_mv_length ||
1082
0
            abs(avg_cand->block_mi.mv[1].x) > max_mv_length || abs(avg_cand->block_mi.mv[1].y) > max_mv_length) {
1083
0
            return;
1084
0
        }
1085
0
    }
1086
    // If compound modes are to be tested for this block, generate the buffers that will be used in the DIFF/WEDGE search.
1087
    // Even if DIFF/WEDGE are not used, still call the function because it is needed for pred0_to_pred1_mult to work.
1088
0
    if (tot_comp_types > MD_COMP_DIST) {
1089
0
        if (svt_aom_calc_pred_masked_compound(pcs, ctx, avg_cand)) {
1090
0
            return;
1091
0
        }
1092
0
    }
1093
1094
    // The candidate count to be used to track number of inj cands, and the index of fast_cand_array for new candidates
1095
0
    uint32_t cand_count = *total_cand_count;
1096
0
    for (MD_COMP_TYPE cur_type = MD_COMP_DIST; cur_type < tot_comp_types; cur_type++) {
1097
0
        if (ctx->inter_comp_ctrls.no_sym_dist && cur_type == MD_COMP_DIST && ref_idx_0 == 0 && ref_idx_1 == 0) {
1098
0
            continue;
1099
0
        }
1100
0
        ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_count];
1101
0
        svt_memcpy(cand, &ctx->fast_cand_array[avg_cand_idx], sizeof(ModeDecisionCandidate));
1102
0
        cand->skip_mode_allowed = false;
1103
0
        determine_compound_mode(pcs, ctx, cand, cur_type);
1104
0
        INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count);
1105
0
    }
1106
0
    *total_cand_count = cand_count;
1107
0
}
1108
1109
static void unipred_3x3_candidates_injection(PictureControlSet* pcs, ModeDecisionContext* ctx,
1110
0
                                             uint32_t* candidate_total_cnt) {
1111
0
    uint32_t               cand_total_cnt          = (*candidate_total_cnt);
1112
0
    const uint8_t          allow_high_precision_mv = pcs->ppcs->frm_hdr.allow_high_precision_mv;
1113
0
    MeSbResults*           me_results              = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr];
1114
0
    const uint8_t          total_me_cnt            = me_results->total_me_candidate_index[ctx->me_block_offset];
1115
0
    const MeCandidate*     me_block_results        = &me_results->me_candidate_array[ctx->me_cand_offset];
1116
0
    ModeDecisionCandidate* cand_array              = ctx->fast_cand_array;
1117
1118
    // (8 Best_L0 neighbors)
1119
0
    for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) {
1120
0
        const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index];
1121
0
        const uint8_t      inter_direction      = me_block_results_ptr->direction;
1122
0
        const uint8_t      list0_ref_index      = me_block_results_ptr->ref_idx_l0;
1123
0
        const uint8_t      list1_ref_index      = me_block_results_ptr->ref_idx_l1;
1124
0
        if (inter_direction == BI_PRED) {
1125
0
            continue;
1126
0
        }
1127
0
        assert(inter_direction == 0 || inter_direction == 1);
1128
0
        const uint8_t list_idx = inter_direction;
1129
0
        const uint8_t ref_idx  = list_idx == REF_LIST_0 ? list0_ref_index : list1_ref_index;
1130
0
        if (!svt_aom_is_valid_unipred_ref(ctx, MIN(TOT_INTER_GROUP - 1, UNI_3x3_GROUP), list_idx, ref_idx)) {
1131
0
            continue;
1132
0
        }
1133
0
        for (int unipred_index = 0; unipred_index < BIPRED_3x3_REFINMENT_POSITIONS; ++unipred_index) {
1134
            /**************
1135
            NEWMV L0
1136
            ************* */
1137
0
            if (ctx->unipred3x3_injection >= 2) {
1138
0
                if (allow_refinement_flag[unipred_index] == 0) {
1139
0
                    continue;
1140
0
                }
1141
0
            }
1142
0
            Mv to_inj_mv = ctx->sb_me_mv[list_idx][ref_idx];
1143
0
            to_inj_mv.x += (bipred_3x3_x_pos[unipred_index] << !allow_high_precision_mv);
1144
0
            to_inj_mv.y += (bipred_3x3_y_pos[unipred_index] << !allow_high_precision_mv);
1145
0
            const uint8_t    to_inject_ref_type = svt_get_ref_frame_type(list_idx, ref_idx);
1146
0
            MvReferenceFrame rf[2]              = {to_inject_ref_type, NONE_FRAME};
1147
0
            if ((ctx->injected_mv_count == 0 ||
1148
0
                 mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, to_inject_ref_type) == false)) {
1149
0
                uint8_t drl_index       = 0;
1150
0
                Mv      best_pred_mv[2] = {{{0}}, {{0}}};
1151
0
                svt_aom_choose_best_av1_mv_pred(
1152
0
                    ctx, to_inject_ref_type, NEWMV, to_inj_mv, (Mv){{0}}, &drl_index, best_pred_mv);
1153
0
                if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv, to_inj_mv, 0)) {
1154
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
1155
0
                    cand->block_mi.use_intrabc        = 0;
1156
0
                    cand->skip_mode_allowed           = false;
1157
0
                    cand->block_mi.mode               = NEWMV;
1158
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1159
0
                    cand->block_mi.is_interintra_used = 0;
1160
0
                    cand->drl_index                   = drl_index;
1161
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
1162
0
                    cand->block_mi.ref_frame[0]       = rf[0];
1163
0
                    cand->block_mi.ref_frame[1]       = rf[1];
1164
0
                    cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
1165
0
                    cand->block_mi.num_proj_ref       = ctx->wm_sample_info[to_inject_ref_type].num;
1166
1167
0
                    INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
1168
1169
0
                    const bool enable_ii = true;
1170
                    // OBMC and WM perform a refinement search around the ME MV, so they are not injected as unipred3x3 candidates,
1171
                    // since this is effectively a refinement search
1172
0
                    const bool enable_obmc = false;
1173
0
                    const bool enable_warp = false;
1174
0
                    inj_non_simple_modes(pcs, ctx, &cand_total_cnt, enable_ii, enable_warp, enable_obmc);
1175
1176
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
1177
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
1178
0
                    ++ctx->injected_mv_count;
1179
0
                }
1180
0
            }
1181
0
        }
1182
0
    }
1183
1184
    // update the total number of candidates injected
1185
0
    (*candidate_total_cnt) = cand_total_cnt;
1186
1187
0
    return;
1188
0
}
1189
1190
static void bipred_3x3_candidates_injection(PictureControlSet* pcs, ModeDecisionContext* ctx,
1191
0
                                            uint32_t* candidate_total_cnt) {
1192
0
    uint32_t               cand_total_cnt          = (*candidate_total_cnt);
1193
0
    const uint8_t          allow_high_precision_mv = pcs->ppcs->frm_hdr.allow_high_precision_mv;
1194
0
    const MeSbResults*     me_results              = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr];
1195
0
    const uint8_t          total_me_cnt            = me_results->total_me_candidate_index[ctx->me_block_offset];
1196
0
    const MeCandidate*     me_block_results        = &me_results->me_candidate_array[ctx->me_cand_offset];
1197
0
    ModeDecisionCandidate* cand_array              = ctx->fast_cand_array;
1198
0
    Mv                     best_pred_mv[2]         = {{{0}}, {{0}}};
1199
1200
    /**************
1201
    NEW_NEWMV
1202
    ************* */
1203
0
    for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) {
1204
0
        const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index];
1205
0
        const uint8_t      inter_direction      = me_block_results_ptr->direction;
1206
0
        const uint8_t      list0_ref_index      = me_block_results_ptr->ref_idx_l0;
1207
0
        const uint8_t      list1_ref_index      = me_block_results_ptr->ref_idx_l1;
1208
0
        if (inter_direction < BI_PRED) {
1209
0
            continue;
1210
0
        }
1211
0
        assert(inter_direction == BI_PRED);
1212
1213
0
        const uint8_t ref0_list = me_block_results_ptr->ref0_list;
1214
0
        const uint8_t ref1_list = me_block_results_ptr->ref1_list;
1215
0
        if (!is_valid_bipred_ref(ctx, BI_3x3_GROUP, ref0_list, list0_ref_index, ref1_list, list1_ref_index)) {
1216
0
            continue;
1217
0
        }
1218
1219
0
        int8_t best_list = -1;
1220
0
        int    diff      = ((int)ctx->post_subpel_me_mv_cost[ref0_list][list0_ref_index] -
1221
0
                    (int)ctx->post_subpel_me_mv_cost[ref1_list][list1_ref_index]) *
1222
0
            100;
1223
1224
0
        if (ctx->bipred3x3_ctrls.use_l0_l1_dev != (uint8_t)~0) {
1225
0
            if (abs(diff) >
1226
0
                (ctx->bipred3x3_ctrls.use_l0_l1_dev * (int)ctx->post_subpel_me_mv_cost[ref0_list][list0_ref_index])) {
1227
0
                return;
1228
0
            }
1229
0
        }
1230
1231
        // Best list in terms of distortion reduction
1232
0
        if (ctx->bipred3x3_ctrls.use_best_list) {
1233
0
            best_list = ref0_list;
1234
0
            if (diff > 0) {
1235
0
                best_list = ref1_list;
1236
0
            }
1237
0
        }
1238
1239
0
        MvReferenceFrame rf[2]              = {svt_get_ref_frame_type(ref0_list, list0_ref_index),
1240
0
                                               svt_get_ref_frame_type(ref1_list, list1_ref_index)};
1241
0
        const uint8_t    to_inject_ref_type = av1_ref_frame_type(rf);
1242
0
        if (best_list == -1 || best_list == ref0_list) {
1243
            // (Best_L0, 8 Best_L1 neighbors)
1244
0
            for (uint32_t bipred_index = 0; bipred_index < BIPRED_3x3_REFINMENT_POSITIONS; ++bipred_index) {
1245
0
                if (!ctx->bipred3x3_ctrls.search_diag) {
1246
0
                    if (allow_refinement_flag[bipred_index] == 0) {
1247
0
                        continue;
1248
0
                    }
1249
0
                }
1250
0
                Mv to_inj_mv0 = ctx->sb_me_mv[ref0_list][list0_ref_index];
1251
0
                Mv to_inj_mv1 = ctx->sb_me_mv[ref1_list][list1_ref_index];
1252
0
                to_inj_mv1.x += (bipred_3x3_x_pos[bipred_index] << !allow_high_precision_mv);
1253
0
                to_inj_mv1.y += (bipred_3x3_y_pos[bipred_index] << !allow_high_precision_mv);
1254
0
                if ((ctx->injected_mv_count == 0 ||
1255
0
                     mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) {
1256
0
                    uint8_t drl_index = 0;
1257
0
                    svt_aom_choose_best_av1_mv_pred(
1258
0
                        ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv);
1259
0
                    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) {
1260
0
                        ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
1261
0
                        cand->block_mi.use_intrabc        = 0;
1262
0
                        cand->skip_mode_allowed           = false;
1263
0
                        cand->drl_index                   = drl_index;
1264
0
                        cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1265
0
                        cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1266
0
                        cand->block_mi.mode               = NEW_NEWMV;
1267
0
                        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1268
0
                        cand->block_mi.is_interintra_used = 0;
1269
0
                        cand->block_mi.ref_frame[0]       = rf[0];
1270
0
                        cand->block_mi.ref_frame[1]       = rf[1];
1271
0
                        cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
1272
0
                        cand->pred_mv[1].as_int           = best_pred_mv[1].as_int;
1273
0
                        determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1274
0
                        INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
1275
1276
0
                        if (ctx->inter_comp_ctrls.do_3x3_bi) {
1277
0
                            ctx->cmp_store.pred0_cnt = 0;
1278
0
                            ctx->cmp_store.pred1_cnt = 0;
1279
0
                            inj_comp_modes(pcs, ctx, &cand_total_cnt);
1280
0
                        }
1281
0
                        ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1282
0
                        ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1283
0
                        ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
1284
0
                        ++ctx->injected_mv_count;
1285
0
                    }
1286
0
                }
1287
0
            }
1288
0
        }
1289
0
        if (best_list == -1 || best_list == ref1_list) {
1290
            // (8 Best_L0 neighbors, Best_L1) :
1291
0
            for (uint32_t bipred_index = 0; bipred_index < BIPRED_3x3_REFINMENT_POSITIONS; ++bipred_index) {
1292
0
                if (!ctx->bipred3x3_ctrls.search_diag) {
1293
0
                    if (allow_refinement_flag[bipred_index] == 0) {
1294
0
                        continue;
1295
0
                    }
1296
0
                }
1297
0
                Mv to_inj_mv0 = ctx->sb_me_mv[ref0_list][list0_ref_index];
1298
0
                to_inj_mv0.x += (bipred_3x3_x_pos[bipred_index] << !allow_high_precision_mv);
1299
0
                to_inj_mv0.y += (bipred_3x3_y_pos[bipred_index] << !allow_high_precision_mv);
1300
0
                Mv to_inj_mv1 = ctx->sb_me_mv[ref1_list][list1_ref_index];
1301
0
                if ((ctx->injected_mv_count == 0 ||
1302
0
                     mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) {
1303
0
                    uint8_t drl_index = 0;
1304
0
                    svt_aom_choose_best_av1_mv_pred(
1305
0
                        ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv);
1306
0
                    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) {
1307
0
                        ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
1308
0
                        cand->block_mi.use_intrabc        = 0;
1309
0
                        cand->skip_mode_allowed           = false;
1310
0
                        cand->drl_index                   = drl_index;
1311
0
                        cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1312
0
                        cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1313
0
                        cand->block_mi.mode               = NEW_NEWMV;
1314
0
                        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1315
0
                        cand->block_mi.is_interintra_used = 0;
1316
0
                        cand->block_mi.ref_frame[0]       = rf[0];
1317
0
                        cand->block_mi.ref_frame[1]       = rf[1];
1318
0
                        cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
1319
0
                        cand->pred_mv[1].as_int           = best_pred_mv[1].as_int;
1320
0
                        determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1321
0
                        INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
1322
1323
0
                        if (ctx->inter_comp_ctrls.do_3x3_bi) {
1324
0
                            ctx->cmp_store.pred0_cnt = 0;
1325
0
                            ctx->cmp_store.pred1_cnt = 0;
1326
0
                            inj_comp_modes(pcs, ctx, &cand_total_cnt);
1327
0
                        }
1328
0
                        ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1329
0
                        ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1330
0
                        ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
1331
0
                        ++ctx->injected_mv_count;
1332
0
                    }
1333
0
                }
1334
0
            }
1335
0
        }
1336
0
    }
1337
1338
    // update the total number of candidates injected
1339
0
    (*candidate_total_cnt) = cand_total_cnt;
1340
1341
0
    return;
1342
0
}
1343
1344
/*********************************************************************
1345
**********************************************************************
1346
        Upto 12 inter Candidated injected
1347
        Min 6 inter Candidated injected
1348
UniPred L0 : NEARST         + upto 3x NEAR
1349
UniPred L1 : NEARST         + upto 3x NEAR
1350
BIPred     : NEARST_NEARST  + upto 3x NEAR_NEAR
1351
**********************************************************************
1352
**********************************************************************/
1353
static void inject_mvp_candidates_ii_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candTotCnt,
1354
0
                                               const bool allow_bipred) {
1355
0
    FrameHeader*           frm_hdr    = &pcs->ppcs->frm_hdr;
1356
0
    uint32_t               cand_idx   = *candTotCnt;
1357
0
    ModeDecisionCandidate* cand_array = ctx->fast_cand_array;
1358
0
    MacroBlockD*           xd         = ctx->blk_ptr->av1xd;
1359
1360
    //all of ref pairs: (1)single-ref List0  (2)single-ref List1  (3)compound Bi-Dir List0-List1
1361
0
    for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) {
1362
0
        MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it];
1363
0
        MvReferenceFrame rf[2];
1364
0
        av1_set_ref_frame(rf, ref_pair);
1365
1366
        //single ref/list
1367
0
        if (rf[1] == NONE_FRAME) {
1368
0
            MvReferenceFrame frame_type = rf[0];
1369
0
            uint8_t          list_idx   = get_list_idx(rf[0]);
1370
0
            if (ctx->cand_reduction_ctrls.lpd1_mvp_best_me_list) {
1371
0
                const MeSbResults* me_results           = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr];
1372
0
                const uint8_t      total_me_cnt         = me_results->total_me_candidate_index[ctx->me_block_offset];
1373
0
                const MeCandidate* me_block_results     = &me_results->me_candidate_array[ctx->me_cand_offset];
1374
0
                const MeCandidate* me_block_results_ptr = &me_block_results[0];
1375
0
                const uint8_t      inter_direction      = me_block_results_ptr->direction;
1376
0
                if (total_me_cnt && list_idx != inter_direction) {
1377
0
                    continue;
1378
0
                }
1379
0
            }
1380
            //NEAREST
1381
            // Don't check if MV is already injected b/c NEAREST is the first INTER MV injected
1382
0
            Mv to_inj_mv = {.as_int = ctx->ref_mv_stack[frame_type][0].this_mv.as_int};
1383
1384
0
            ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1385
0
            cand->block_mi.mode               = NEARESTMV;
1386
0
            cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1387
0
            cand->skip_mode_allowed           = false;
1388
0
            cand->drl_index                   = 0;
1389
0
            cand->block_mi.ref_frame[0]       = rf[0];
1390
0
            cand->block_mi.ref_frame[1]       = rf[1];
1391
0
            cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
1392
0
            cand->block_mi.num_proj_ref       = ctx->wm_sample_info[frame_type].num;
1393
0
            cand->block_mi.use_intrabc        = 0;
1394
0
            cand->block_mi.is_interintra_used = 0;
1395
0
            INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1396
1397
0
            ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
1398
0
            ctx->injected_ref_types[ctx->injected_mv_count]     = frame_type;
1399
0
            ++ctx->injected_mv_count;
1400
            //NEAR
1401
0
            const uint8_t max_drl_index     = svt_aom_get_max_drl_index(xd->ref_mv_count[frame_type], NEARMV);
1402
0
            uint8_t       cap_max_drl_index = 0;
1403
0
            if (ctx->cand_reduction_ctrls.near_count_ctrls.enabled) {
1404
0
                cap_max_drl_index = MIN(ctx->cand_reduction_ctrls.near_count_ctrls.near_count, max_drl_index);
1405
0
            }
1406
0
            for (uint8_t drli = 0; drli < cap_max_drl_index; drli++) {
1407
0
                to_inj_mv.as_int = ctx->ref_mv_stack[frame_type][1 + drli].this_mv.as_int;
1408
1409
0
                if ((ctx->injected_mv_count == 0 ||
1410
0
                     mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, frame_type) == false)) {
1411
0
                    cand                              = &cand_array[cand_idx];
1412
0
                    cand->block_mi.mode               = NEARMV;
1413
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1414
0
                    cand->skip_mode_allowed           = false;
1415
0
                    cand->drl_index                   = drli;
1416
0
                    cand->block_mi.use_intrabc        = 0;
1417
0
                    cand->block_mi.is_interintra_used = 0;
1418
0
                    cand->block_mi.ref_frame[0]       = rf[0];
1419
0
                    cand->block_mi.ref_frame[1]       = rf[1];
1420
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
1421
0
                    cand->block_mi.num_proj_ref       = ctx->wm_sample_info[frame_type].num;
1422
0
                    INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1423
1424
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
1425
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = frame_type;
1426
0
                    ++ctx->injected_mv_count;
1427
0
                }
1428
0
            }
1429
0
        } else if (allow_bipred) {
1430
            //NEAREST_NEAREST
1431
            // Don't check if MV is already injected b/c NEAREST_NEAREST is the first bipred INTER candidate injected
1432
0
            Mv         to_inj_mv0   = {.as_int = ctx->ref_mv_stack[ref_pair][0].this_mv.as_int};
1433
0
            Mv         to_inj_mv1   = {.as_int = ctx->ref_mv_stack[ref_pair][0].comp_mv.as_int};
1434
0
            const bool is_skip_mode = !svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) &&
1435
0
                frm_hdr->skip_mode_params.skip_mode_flag && (rf[0] == frm_hdr->skip_mode_params.ref_frame_idx_0) &&
1436
0
                (rf[1] == frm_hdr->skip_mode_params.ref_frame_idx_1);
1437
0
            ModeDecisionCandidate* cand         = &cand_array[cand_idx];
1438
0
            cand->block_mi.mode                 = NEAREST_NEARESTMV;
1439
0
            cand->block_mi.motion_mode          = SIMPLE_TRANSLATION;
1440
0
            cand->skip_mode_allowed             = is_skip_mode;
1441
0
            cand->block_mi.mv[0].as_int         = to_inj_mv0.as_int;
1442
0
            cand->block_mi.mv[1].as_int         = to_inj_mv1.as_int;
1443
0
            cand->drl_index                     = 0;
1444
0
            cand->block_mi.use_intrabc          = 0;
1445
0
            cand->block_mi.is_interintra_used   = 0;
1446
0
            cand->block_mi.ref_frame[0]         = rf[0];
1447
0
            cand->block_mi.ref_frame[1]         = rf[1];
1448
0
            cand->block_mi.comp_group_idx       = 0;
1449
0
            cand->block_mi.compound_idx         = 1;
1450
0
            cand->block_mi.interinter_comp.type = COMPOUND_AVERAGE;
1451
1452
0
            INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1453
1454
0
            ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1455
0
            ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1456
0
            ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1457
0
            ++ctx->injected_mv_count;
1458
1459
            //NEAR_NEAR
1460
0
            const uint8_t max_drl_index     = svt_aom_get_max_drl_index(xd->ref_mv_count[ref_pair], NEAR_NEARMV);
1461
0
            uint8_t       cap_max_drl_index = 0;
1462
0
            if (ctx->cand_reduction_ctrls.near_count_ctrls.enabled) {
1463
0
                cap_max_drl_index = MIN(ctx->cand_reduction_ctrls.near_count_ctrls.near_near_count, max_drl_index);
1464
0
            }
1465
0
            for (uint8_t drli = 0; drli < cap_max_drl_index; drli++) {
1466
0
                to_inj_mv0.as_int = ctx->ref_mv_stack[ref_pair][1 + drli].this_mv.as_int;
1467
0
                to_inj_mv1.as_int = ctx->ref_mv_stack[ref_pair][1 + drli].comp_mv.as_int;
1468
0
                if ((ctx->injected_mv_count == 0 ||
1469
0
                     mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair) == false)) {
1470
0
                    cand                                = &cand_array[cand_idx];
1471
0
                    cand->block_mi.mode                 = NEAR_NEARMV;
1472
0
                    cand->block_mi.motion_mode          = SIMPLE_TRANSLATION;
1473
0
                    cand->skip_mode_allowed             = false;
1474
0
                    cand->block_mi.use_intrabc          = 0;
1475
0
                    cand->block_mi.is_interintra_used   = 0;
1476
0
                    cand->block_mi.mv[0].as_int         = to_inj_mv0.as_int;
1477
0
                    cand->block_mi.mv[1].as_int         = to_inj_mv1.as_int;
1478
0
                    cand->drl_index                     = drli;
1479
0
                    cand->block_mi.ref_frame[0]         = rf[0];
1480
0
                    cand->block_mi.ref_frame[1]         = rf[1];
1481
0
                    cand->block_mi.comp_group_idx       = 0;
1482
0
                    cand->block_mi.compound_idx         = 1;
1483
0
                    cand->block_mi.interinter_comp.type = COMPOUND_AVERAGE;
1484
1485
0
                    INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1486
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1487
0
                    ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1488
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1489
0
                    ++ctx->injected_mv_count;
1490
0
                }
1491
0
            }
1492
0
        }
1493
0
    }
1494
    //update tot Candidate count
1495
0
    *candTotCnt = cand_idx;
1496
0
}
1497
1498
/*********************************************************************
1499
**********************************************************************
1500
        Upto 12 inter Candidated injected
1501
        Min 6 inter Candidated injected
1502
UniPred L0 : NEARST         + upto 3x NEAR
1503
UniPred L1 : NEARST         + upto 3x NEAR
1504
BIPred     : NEARST_NEARST  + upto 3x NEAR_NEAR
1505
**********************************************************************
1506
**********************************************************************/
1507
static void inject_mvp_candidates_ii(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* cand_total_cnt,
1508
0
                                     const bool allow_bipred) {
1509
0
    BlkStruct*             blk_ptr    = ctx->blk_ptr;
1510
0
    FrameHeader*           frm_hdr    = &pcs->ppcs->frm_hdr;
1511
0
    uint32_t               cand_idx   = *cand_total_cnt;
1512
0
    ModeDecisionCandidate* cand_array = ctx->fast_cand_array;
1513
0
    MacroBlockD*           xd         = blk_ptr->av1xd;
1514
0
    Mv                     nearestmv[2], nearmv[2], ref_mv[2];
1515
1516
    //all of ref pairs: (1)single-ref List0  (2)single-ref List1  (3)compound Bi-Dir List0-List1  (4)compound Uni-Dir List0-List0  (5)compound Uni-Dir List1-List1
1517
0
    for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) {
1518
0
        MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it];
1519
0
        MvReferenceFrame rf[2];
1520
0
        av1_set_ref_frame(rf, ref_pair);
1521
        //single ref/list
1522
0
        if (rf[1] == NONE_FRAME) {
1523
0
            MvReferenceFrame frame_type = rf[0];
1524
0
            uint8_t          list_idx   = get_list_idx(rf[0]);
1525
0
            uint8_t          ref_idx    = get_ref_frame_idx(rf[0]);
1526
            // Always consider the 2 closet ref frames (i.e. ref_idx=0) @ MVP cand generation
1527
0
            if (!svt_aom_is_valid_unipred_ref(ctx, MIN(TOT_INTER_GROUP - 1, NRST_NEAR_GROUP), list_idx, ref_idx)) {
1528
0
                continue;
1529
0
            }
1530
            //NEAREST
1531
0
            Mv to_inj_mv = {.as_int = ctx->ref_mv_stack[frame_type][0].this_mv.as_int};
1532
0
            if ((ctx->injected_mv_count == 0 ||
1533
0
                 mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, frame_type) == false)) {
1534
0
                assert(list_idx == 0 || list_idx == 1);
1535
0
                ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1536
0
                cand->block_mi.mode               = NEARESTMV;
1537
0
                cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1538
0
                cand->block_mi.use_intrabc        = 0;
1539
0
                cand->skip_mode_allowed           = false;
1540
0
                cand->drl_index                   = 0;
1541
0
                cand->block_mi.ref_frame[0]       = rf[0];
1542
0
                cand->block_mi.ref_frame[1]       = rf[1];
1543
0
                cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
1544
0
                cand->block_mi.is_interintra_used = 0;
1545
0
                cand->block_mi.num_proj_ref       = ctx->wm_sample_info[frame_type].num;
1546
0
                INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1547
1548
0
                const bool enable_ii   = true;
1549
0
                const bool enable_obmc = true;
1550
0
                const bool enable_warp = ctx->wm_ctrls.use_wm_for_mvp ? true : false;
1551
0
                inj_non_simple_modes(pcs, ctx, &cand_idx, enable_ii, enable_warp, enable_obmc);
1552
0
                ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
1553
0
                ctx->injected_ref_types[ctx->injected_mv_count]     = frame_type;
1554
0
                ++ctx->injected_mv_count;
1555
0
            }
1556
1557
            //NEAR
1558
0
            const uint8_t max_drl_index     = svt_aom_get_max_drl_index(xd->ref_mv_count[frame_type], NEARMV);
1559
0
            uint8_t       cap_max_drl_index = 0;
1560
0
            if (ctx->cand_reduction_ctrls.near_count_ctrls.enabled) {
1561
0
                cap_max_drl_index = MIN(ctx->cand_reduction_ctrls.near_count_ctrls.near_count, max_drl_index);
1562
0
            }
1563
0
            for (uint8_t drli = 0; drli < cap_max_drl_index; drli++) {
1564
0
                svt_aom_get_av1_mv_pred_drl(ctx, blk_ptr, frame_type, 0, NEARMV, drli, nearestmv, nearmv, ref_mv);
1565
1566
0
                to_inj_mv.as_int = nearmv[0].as_int;
1567
0
                if ((ctx->injected_mv_count == 0 ||
1568
0
                     mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, frame_type) == false)) {
1569
0
                    assert(list_idx == 0 || list_idx == 1);
1570
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1571
0
                    cand->block_mi.mode               = NEARMV;
1572
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1573
0
                    cand->block_mi.use_intrabc        = 0;
1574
0
                    cand->skip_mode_allowed           = false;
1575
0
                    cand->drl_index                   = drli;
1576
0
                    cand->block_mi.ref_frame[0]       = rf[0];
1577
0
                    cand->block_mi.ref_frame[1]       = rf[1];
1578
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
1579
0
                    cand->block_mi.is_interintra_used = 0;
1580
0
                    cand->block_mi.num_proj_ref       = ctx->wm_sample_info[frame_type].num;
1581
0
                    INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1582
1583
0
                    const bool enable_ii   = true;
1584
0
                    const bool enable_obmc = true;
1585
0
                    const bool enable_warp = ctx->wm_ctrls.use_wm_for_mvp ? true : false;
1586
0
                    inj_non_simple_modes(pcs, ctx, &cand_idx, enable_ii, enable_warp, enable_obmc);
1587
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
1588
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = frame_type;
1589
0
                    ++ctx->injected_mv_count;
1590
0
                }
1591
0
            }
1592
0
        } else if (allow_bipred) {
1593
0
            const uint8_t ref_idx_0 = get_ref_frame_idx(rf[0]);
1594
0
            const uint8_t ref_idx_1 = get_ref_frame_idx(rf[1]);
1595
1596
0
            const uint8_t list_idx_0 = get_list_idx(rf[0]);
1597
0
            const uint8_t list_idx_1 = get_list_idx(rf[1]);
1598
1599
0
            ctx->cmp_store.pred0_cnt = 0;
1600
0
            ctx->cmp_store.pred1_cnt = 0;
1601
1602
            // Always consider the 2 closet ref frames (i.e. ref_idx=0) @ MVP cand generation
1603
0
            if (!is_valid_bipred_ref(ctx, NRST_NEAR_GROUP, list_idx_0, ref_idx_0, list_idx_1, ref_idx_1)) {
1604
0
                continue;
1605
0
            }
1606
1607
            //NEAREST_NEAREST
1608
0
            Mv to_inj_mv0 = {.as_int = ctx->ref_mv_stack[ref_pair][0].this_mv.as_int};
1609
0
            Mv to_inj_mv1 = {.as_int = ctx->ref_mv_stack[ref_pair][0].comp_mv.as_int};
1610
0
            if ((ctx->injected_mv_count == 0 ||
1611
0
                 mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair) == false)) {
1612
0
                const bool is_skip_mode = !svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) &&
1613
0
                    frm_hdr->skip_mode_params.skip_mode_flag && (rf[0] == frm_hdr->skip_mode_params.ref_frame_idx_0) &&
1614
0
                    (rf[1] == frm_hdr->skip_mode_params.ref_frame_idx_1);
1615
0
                ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1616
0
                cand->block_mi.mode               = NEAREST_NEARESTMV;
1617
0
                cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1618
0
                cand->block_mi.is_interintra_used = 0;
1619
0
                cand->block_mi.use_intrabc        = 0;
1620
0
                cand->skip_mode_allowed           = /*cur_type == MD_COMP_AVG &&*/ is_skip_mode ? true : false;
1621
0
                cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1622
0
                cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1623
0
                cand->drl_index                   = 0;
1624
0
                cand->block_mi.ref_frame[0]       = rf[0];
1625
0
                cand->block_mi.ref_frame[1]       = rf[1];
1626
0
                determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1627
0
                INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1628
1629
0
                if (ctx->inter_comp_ctrls.do_nearest_nearest) {
1630
                    // Don't reset ctx->cmp_store.pred0_cnt for MVP
1631
0
                    inj_comp_modes(pcs, ctx, &cand_idx);
1632
0
                }
1633
0
                ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1634
0
                ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1635
0
                ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1636
0
                ++ctx->injected_mv_count;
1637
0
            }
1638
1639
            //NEAR_NEAR
1640
0
            const uint8_t max_drl_index     = svt_aom_get_max_drl_index(xd->ref_mv_count[ref_pair], NEAR_NEARMV);
1641
0
            uint8_t       cap_max_drl_index = 0;
1642
0
            if (ctx->cand_reduction_ctrls.near_count_ctrls.enabled) {
1643
0
                cap_max_drl_index = MIN(ctx->cand_reduction_ctrls.near_count_ctrls.near_near_count, max_drl_index);
1644
0
            }
1645
0
            for (uint8_t drli = 0; drli < cap_max_drl_index; drli++) {
1646
0
                svt_aom_get_av1_mv_pred_drl(ctx, blk_ptr, ref_pair, 1, NEAR_NEARMV, drli, nearestmv, nearmv, ref_mv);
1647
1648
0
                to_inj_mv0.as_int = nearmv[0].as_int;
1649
0
                to_inj_mv1.as_int = nearmv[1].as_int;
1650
0
                if ((ctx->injected_mv_count == 0 ||
1651
0
                     mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair) == false)) {
1652
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1653
0
                    cand->block_mi.mode               = NEAR_NEARMV;
1654
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1655
0
                    cand->block_mi.is_interintra_used = 0;
1656
0
                    cand->block_mi.use_intrabc        = 0;
1657
0
                    cand->skip_mode_allowed           = false;
1658
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1659
0
                    cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1660
0
                    cand->drl_index                   = drli;
1661
0
                    cand->block_mi.ref_frame[0]       = rf[0];
1662
0
                    cand->block_mi.ref_frame[1]       = rf[1];
1663
0
                    determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1664
0
                    INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1665
1666
0
                    if (ctx->inter_comp_ctrls.do_near_near) {
1667
                        // Don't reset ctx->cmp_store.pred0_cnt for MVP
1668
0
                        inj_comp_modes(pcs, ctx, &cand_idx);
1669
0
                    }
1670
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1671
0
                    ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1672
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1673
0
                    ++ctx->injected_mv_count;
1674
0
                }
1675
0
            }
1676
0
        }
1677
0
    }
1678
    //update tot Candidate count
1679
0
    *cand_total_cnt = cand_idx;
1680
0
}
1681
1682
static void inject_new_nearest_new_comb_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx,
1683
0
                                                   uint32_t* cand_tot_cnt) {
1684
0
    uint32_t               cand_idx   = *cand_tot_cnt;
1685
0
    ModeDecisionCandidate* cand_array = ctx->fast_cand_array;
1686
0
    MacroBlockD*           xd         = ctx->blk_ptr->av1xd;
1687
0
    Mv                     nearestmv[2], nearmv[2], ref_mv[2];
1688
1689
    //all of ref pairs: (1)single-ref List0  (2)single-ref List1  (3)compound Bi-Dir List0-List1  (4)compound Uni-Dir List0-List0  (5)compound Uni-Dir List1-List1
1690
0
    for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) {
1691
0
        MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it];
1692
0
        MvReferenceFrame rf[2];
1693
0
        av1_set_ref_frame(rf, ref_pair);
1694
0
        if (rf[1] != NONE_FRAME) {
1695
0
            const uint8_t ref_idx_0  = get_ref_frame_idx(rf[0]);
1696
0
            const uint8_t ref_idx_1  = get_ref_frame_idx(rf[1]);
1697
0
            const uint8_t list_idx_0 = get_list_idx(rf[0]);
1698
0
            const uint8_t list_idx_1 = get_list_idx(rf[1]);
1699
0
            if (!svt_aom_is_valid_unipred_ref(
1700
0
                    ctx, MIN(TOT_INTER_GROUP - 1, NRST_NEW_NEAR_GROUP), list_idx_0, ref_idx_0) ||
1701
0
                !svt_aom_is_valid_unipred_ref(
1702
0
                    ctx, MIN(TOT_INTER_GROUP - 1, NRST_NEW_NEAR_GROUP), list_idx_1, ref_idx_1)) {
1703
0
                continue;
1704
0
            }
1705
1706
0
            {
1707
                //NEAREST_NEWMV
1708
0
                const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr];
1709
0
                Mv                 to_inj_mv0 = {.as_int = ctx->ref_mv_stack[ref_pair][0].this_mv.as_int};
1710
0
                Mv                 to_inj_mv1 = ctx->sb_me_mv[list_idx_1][ref_idx_1];
1711
0
                bool               inj_mv =
1712
0
                    (ctx->injected_mv_count == 0 || !mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair)) &&
1713
0
                    svt_aom_is_me_data_present(
1714
0
                        ctx->me_block_offset, ctx->me_cand_offset, me_results, get_list_idx(rf[1]), ref_idx_1);
1715
0
                if (inj_mv) {
1716
0
                    svt_aom_get_av1_mv_pred_drl(ctx,
1717
0
                                                ctx->blk_ptr,
1718
0
                                                ref_pair,
1719
0
                                                1, // is_compound
1720
0
                                                NEAREST_NEWMV,
1721
0
                                                0, //not needed drli,
1722
0
                                                nearestmv,
1723
0
                                                nearmv,
1724
0
                                                ref_mv);
1725
1726
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1727
0
                    cand->block_mi.mode               = NEAREST_NEWMV;
1728
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1729
0
                    cand->block_mi.is_interintra_used = 0;
1730
0
                    cand->block_mi.use_intrabc        = 0;
1731
0
                    cand->skip_mode_allowed           = false;
1732
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1733
0
                    cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1734
0
                    cand->drl_index                   = 0;
1735
0
                    cand->block_mi.ref_frame[0]       = rf[0];
1736
0
                    cand->block_mi.ref_frame[1]       = rf[1];
1737
0
                    cand->pred_mv[1].as_int           = ref_mv[1].as_int;
1738
0
                    determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1739
0
                    INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1740
1741
0
                    if (ctx->inter_comp_ctrls.do_nearest_near_new) {
1742
0
                        ctx->cmp_store.pred0_cnt = 0;
1743
0
                        ctx->cmp_store.pred1_cnt = 0;
1744
0
                        inj_comp_modes(pcs, ctx, &cand_idx);
1745
0
                    }
1746
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1747
0
                    ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1748
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1749
0
                    ++ctx->injected_mv_count;
1750
0
                }
1751
0
            }
1752
1753
0
            {
1754
                //NEW_NEARESTMV
1755
0
                const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr];
1756
0
                Mv                 to_inj_mv0 = ctx->sb_me_mv[list_idx_0][ref_idx_0];
1757
0
                Mv                 to_inj_mv1 = {.as_int = ctx->ref_mv_stack[ref_pair][0].comp_mv.as_int};
1758
0
                bool               inj_mv     = (ctx->injected_mv_count == 0 ||
1759
0
                               !mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair)) &&
1760
0
                    svt_aom_is_me_data_present(ctx->me_block_offset, ctx->me_cand_offset, me_results, 0, ref_idx_0);
1761
0
                if (inj_mv) {
1762
0
                    svt_aom_get_av1_mv_pred_drl(ctx,
1763
0
                                                ctx->blk_ptr,
1764
0
                                                ref_pair,
1765
0
                                                1, // is_compound
1766
0
                                                NEW_NEARESTMV,
1767
0
                                                0, //not needed drli,
1768
0
                                                nearestmv,
1769
0
                                                nearmv,
1770
0
                                                ref_mv);
1771
1772
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1773
0
                    cand->block_mi.mode               = NEW_NEARESTMV;
1774
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1775
0
                    cand->block_mi.is_interintra_used = 0;
1776
0
                    cand->block_mi.use_intrabc        = 0;
1777
0
                    cand->skip_mode_allowed           = false;
1778
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1779
0
                    cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1780
0
                    cand->drl_index                   = 0;
1781
0
                    cand->block_mi.ref_frame[0]       = rf[0];
1782
0
                    cand->block_mi.ref_frame[1]       = rf[1];
1783
0
                    cand->pred_mv[0].as_int           = ref_mv[0].as_int;
1784
0
                    determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1785
0
                    INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1786
1787
0
                    if (ctx->inter_comp_ctrls.do_nearest_near_new) {
1788
0
                        ctx->cmp_store.pred0_cnt = 0;
1789
0
                        ctx->cmp_store.pred1_cnt = 0;
1790
0
                        inj_comp_modes(pcs, ctx, &cand_idx);
1791
0
                    }
1792
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1793
0
                    ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1794
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1795
0
                    ++ctx->injected_mv_count;
1796
0
                }
1797
0
            }
1798
            // For level 2, only inject NEAREST_NEW/NEW_NEAREST candidates
1799
0
            if (ctx->new_nearest_near_comb_injection >= 2) {
1800
0
                continue;
1801
0
            }
1802
1803
            //NEW_NEARMV
1804
0
            {
1805
0
                const uint8_t max_drl_index = svt_aom_get_max_drl_index(xd->ref_mv_count[ref_pair], NEW_NEARMV);
1806
1807
0
                for (uint8_t drli = 0; drli < max_drl_index; drli++) {
1808
0
                    svt_aom_get_av1_mv_pred_drl(
1809
0
                        ctx, ctx->blk_ptr, ref_pair, 1, NEW_NEARMV, drli, nearestmv, nearmv, ref_mv);
1810
1811
                    //NEW_NEARMV
1812
0
                    const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr];
1813
0
                    Mv                 to_inj_mv0 = ctx->sb_me_mv[list_idx_0][ref_idx_0];
1814
0
                    Mv                 to_inj_mv1 = {.as_int = nearmv[1].as_int};
1815
0
                    bool               inj_mv     = (ctx->injected_mv_count == 0 ||
1816
0
                                   !mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair)) &&
1817
0
                        svt_aom_is_me_data_present(ctx->me_block_offset, ctx->me_cand_offset, me_results, 0, ref_idx_0);
1818
0
                    if (inj_mv) {
1819
0
                        ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1820
0
                        cand->block_mi.mode               = NEW_NEARMV;
1821
0
                        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1822
0
                        cand->block_mi.is_interintra_used = 0;
1823
0
                        cand->block_mi.use_intrabc        = 0;
1824
0
                        cand->skip_mode_allowed           = false;
1825
0
                        cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1826
0
                        cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1827
0
                        cand->drl_index                   = drli;
1828
0
                        cand->block_mi.ref_frame[0]       = rf[0];
1829
0
                        cand->block_mi.ref_frame[1]       = rf[1];
1830
0
                        cand->pred_mv[0].as_int           = ref_mv[0].as_int;
1831
0
                        determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1832
0
                        INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1833
1834
0
                        if (ctx->inter_comp_ctrls.do_nearest_near_new) {
1835
0
                            ctx->cmp_store.pred0_cnt = 0;
1836
0
                            ctx->cmp_store.pred1_cnt = 0;
1837
0
                            inj_comp_modes(pcs, ctx, &cand_idx);
1838
0
                        }
1839
0
                        ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1840
0
                        ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1841
0
                        ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1842
0
                        ++ctx->injected_mv_count;
1843
0
                    }
1844
0
                }
1845
0
            }
1846
            //NEAR_NEWMV
1847
0
            {
1848
0
                uint8_t max_drl_index = svt_aom_get_max_drl_index(xd->ref_mv_count[ref_pair], NEAR_NEWMV);
1849
1850
0
                for (uint8_t drli = 0; drli < max_drl_index; drli++) {
1851
0
                    svt_aom_get_av1_mv_pred_drl(
1852
0
                        ctx, ctx->blk_ptr, ref_pair, 1, NEAR_NEWMV, drli, nearestmv, nearmv, ref_mv);
1853
1854
                    //NEAR_NEWMV
1855
0
                    const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr];
1856
0
                    Mv                 to_inj_mv0 = {.as_int = nearmv[0].as_int};
1857
0
                    Mv                 to_inj_mv1 = ctx->sb_me_mv[list_idx_1][ref_idx_1];
1858
0
                    bool               inj_mv     = (ctx->injected_mv_count == 0 ||
1859
0
                                   !mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair)) &&
1860
0
                        svt_aom_is_me_data_present(
1861
0
                                      ctx->me_block_offset, ctx->me_cand_offset, me_results, list_idx_1, ref_idx_1);
1862
1863
0
                    if (inj_mv) {
1864
0
                        ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1865
0
                        cand->block_mi.mode               = NEAR_NEWMV;
1866
0
                        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1867
0
                        cand->block_mi.is_interintra_used = 0;
1868
0
                        cand->block_mi.use_intrabc        = 0;
1869
0
                        cand->skip_mode_allowed           = false;
1870
0
                        cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1871
0
                        cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1872
0
                        cand->drl_index                   = drli;
1873
0
                        cand->block_mi.ref_frame[0]       = rf[0];
1874
0
                        cand->block_mi.ref_frame[1]       = rf[1];
1875
0
                        cand->pred_mv[1].as_int           = ref_mv[1].as_int;
1876
0
                        determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1877
0
                        INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1878
1879
0
                        if (ctx->inter_comp_ctrls.do_nearest_near_new) {
1880
0
                            ctx->cmp_store.pred0_cnt = 0;
1881
0
                            ctx->cmp_store.pred1_cnt = 0;
1882
0
                            inj_comp_modes(pcs, ctx, &cand_idx);
1883
0
                        }
1884
0
                        ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1885
0
                        ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1886
0
                        ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1887
0
                        ++ctx->injected_mv_count;
1888
0
                    }
1889
0
                }
1890
0
            }
1891
0
        }
1892
0
    }
1893
    //update tot Candidate count
1894
0
    *cand_tot_cnt = cand_idx;
1895
0
}
1896
1897
// Refine the WM MV (8 bit search).  Return true if search found a valid MV; false otherwise
1898
uint8_t svt_aom_wm_motion_refinement(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand,
1899
0
                                     const bool shut_approx) {
1900
0
    PictureParentControlSet* ppcs         = pcs->ppcs;
1901
0
    const Mv                 neighbors[9] = {
1902
0
        {{0, 0}}, {{-1, 0}}, {{0, 1}}, {{1, 0}}, {{0, -1}}, {{1, -1}}, {{1, 1}}, {{-1, 1}}, {{-1, -1}}};
1903
1904
    // Set info used to get MV cost
1905
0
    int*        mvjcost       = ctx->md_rate_est_ctx->nmv_vec_cost;
1906
0
    const int** mvcost        = ctx->md_rate_est_ctx->nmvcoststack;
1907
0
    uint32_t    full_lambda   = ctx->full_lambda_md[EB_8_BIT_MD]; // 8bit only
1908
0
    int         error_per_bit = full_lambda >> RD_EPB_SHIFT;
1909
0
    error_per_bit += (error_per_bit == 0);
1910
0
    EbPictureBufferDesc*    input_pic          = ppcs->enhanced_pic; // 10BIT not supported
1911
0
    uint32_t                input_origin_index = (ctx->blk_org_y) * input_pic->y_stride + (ctx->blk_org_x);
1912
0
    const AomVarianceFnPtr* fn_ptr             = &svt_aom_mefn_ptr[ctx->blk_geom->bsize];
1913
0
    unsigned int            sse;
1914
0
    uint8_t*                src_y = input_pic->y_buffer + input_origin_index;
1915
1916
0
    int mv_prec_shift = ppcs->frm_hdr.allow_high_precision_mv ? 0 : 1;
1917
0
    int best_cost     = INT_MAX;
1918
    // local WM always uses one ref - MV for ref0 stored in idx0
1919
0
    assert(cand->block_mi.ref_frame[1] == NONE_FRAME);
1920
0
    Mv       search_centre_mv = {.as_int = cand->block_mi.mv[0].as_int};
1921
0
    Mv       best_mv          = {.as_int = cand->block_mi.mv[0].as_int};
1922
0
    Mv       prev_mv          = {.as_int = cand->block_mi.mv[0].as_int};
1923
0
    const Mv ref_mv           = {.as_int = cand->pred_mv[0].as_int};
1924
1925
0
    int      max_iterations  = ctx->wm_ctrls.refinement_iterations;
1926
0
    int      tot_checked_pos = 0;
1927
0
    uint32_t mv_record[256];
1928
0
    for (int iter = 0; iter < max_iterations; iter++) {
1929
        // search the (0,0) offset position only for the first search iteration
1930
0
        for (int i = (iter ? 1 : 0); i < (ctx->wm_ctrls.refine_diag ? 9 : 5); i++) {
1931
0
            const Mv test_mv = (Mv){{search_centre_mv.x + (neighbors[i].x << mv_prec_shift),
1932
0
                                     search_centre_mv.y + (neighbors[i].y << mv_prec_shift)}};
1933
1934
            // Don't re-test previously tested positions
1935
0
            if (iter) {
1936
0
                if (prev_mv.as_int == test_mv.as_int) {
1937
0
                    continue;
1938
0
                }
1939
0
                int match_found = 0;
1940
0
                for (int j = 0; j < tot_checked_pos; j++) {
1941
0
                    if (test_mv.as_int == mv_record[j]) {
1942
0
                        match_found = 1;
1943
0
                    }
1944
0
                }
1945
0
                if (match_found) {
1946
0
                    continue;
1947
0
                }
1948
0
            }
1949
0
            mv_record[tot_checked_pos++] = test_mv.as_int;
1950
0
            uint8_t local_warp_valid     = svt_aom_warped_motion_parameters(ctx,
1951
0
                                                                        test_mv,
1952
0
                                                                        ctx->blk_geom,
1953
0
                                                                        cand->block_mi.ref_frame[0],
1954
0
                                                                        &cand->wm_params_l0,
1955
0
                                                                        &cand->block_mi.num_proj_ref,
1956
0
                                                                        ctx->wm_ctrls.lower_band_th,
1957
0
                                                                        ctx->wm_ctrls.upper_band_th,
1958
0
                                                                        shut_approx);
1959
0
            if (!local_warp_valid) {
1960
0
                continue;
1961
0
            }
1962
0
            assert(cand->block_mi.ref_frame[1] == NONE_FRAME);
1963
0
            EbPictureBufferDesc* ref_pic_0 = svt_aom_get_ref_pic_buffer(pcs, cand->block_mi.ref_frame[0]);
1964
0
            EbPictureBufferDesc* ref_pic_1 = NULL; // will stay NULL b/c this is unipred candidate
1965
1966
            // update MV to be testing MV before calling prediction function
1967
0
            cand->block_mi.mv[0].as_int = test_mv.as_int;
1968
0
            svt_aom_inter_prediction(pcs->scs,
1969
0
                                     pcs,
1970
0
                                     &cand->block_mi,
1971
0
                                     &cand->wm_params_l0,
1972
0
                                     &cand->wm_params_l1,
1973
0
                                     ctx->blk_ptr,
1974
0
                                     ctx->blk_geom->bsize,
1975
0
                                     ctx->shape,
1976
                                     // If using 8bit MD for HBD content, can't use pre-computed OBMC/II to
1977
                                     // generate conformant recon
1978
0
                                     true, //use_precomputed_obmc - not used here
1979
0
                                     true, //use_precomputed_ii - not used here
1980
0
                                     ctx,
1981
0
                                     ctx->recon_neigh_y,
1982
0
                                     ctx->recon_neigh_cb,
1983
0
                                     ctx->recon_neigh_cr,
1984
0
                                     ref_pic_0,
1985
0
                                     ref_pic_1, // this is NULL
1986
0
                                     ctx->blk_org_x,
1987
0
                                     ctx->blk_org_y,
1988
0
                                     ctx->scratch_prediction_ptr,
1989
0
                                     0,
1990
0
                                     0,
1991
0
                                     PICTURE_BUFFER_DESC_LUMA_MASK,
1992
0
                                     EB_EIGHT_BIT,
1993
0
                                     0); // is_16bit_pipeline
1994
1995
0
            int var = fn_ptr->vf(ctx->scratch_prediction_ptr->y_buffer,
1996
0
                                 ctx->scratch_prediction_ptr->y_stride,
1997
0
                                 src_y,
1998
0
                                 input_pic->y_stride,
1999
0
                                 &sse);
2000
0
            if (ctx->approx_inter_rate) {
2001
0
                var += svt_aom_mv_err_cost_light(&test_mv, &ref_mv);
2002
0
            } else {
2003
0
                var += svt_aom_mv_err_cost(&test_mv, &ref_mv, mvjcost, mvcost, error_per_bit);
2004
0
            }
2005
2006
0
            if (var < best_cost) {
2007
0
                best_mv.as_int = test_mv.as_int;
2008
0
                best_cost      = var;
2009
0
            }
2010
0
        }
2011
0
        prev_mv.as_int          = search_centre_mv.as_int;
2012
0
        search_centre_mv.as_int = best_mv.as_int;
2013
0
        if (prev_mv.as_int == best_mv.as_int) {
2014
0
            break;
2015
0
        }
2016
0
    }
2017
0
    cand->block_mi.mv[0].as_int = best_mv.as_int;
2018
2019
    // Derive pred MV for best WM position
2020
0
    Mv best_pred_mv[2] = {{{0}}, {{0}}};
2021
0
    svt_aom_choose_best_av1_mv_pred(ctx,
2022
0
                                    cand->block_mi.ref_frame[0], // WM only allowed for unipred cands
2023
0
                                    cand->block_mi.mode,
2024
0
                                    cand->block_mi.mv[0],
2025
0
                                    (Mv){{0}},
2026
0
                                    &cand->drl_index,
2027
0
                                    best_pred_mv);
2028
0
    cand->pred_mv[0].as_int = best_pred_mv[0].as_int;
2029
2030
    // Check that final chosen MV is valid
2031
0
    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, best_mv, best_mv, 0)) {
2032
0
        return 1;
2033
0
    }
2034
2035
0
    return 0;
2036
0
}
2037
2038
static INLINE void setup_pred_plane(Buf2D* dst, BlockSize bsize, uint8_t* src, int width, int height, int stride,
2039
0
                                    int mi_row, int mi_col, int subsampling_x, int subsampling_y) {
2040
    // Offset the buffer pointer
2041
0
    if (subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1)) {
2042
0
        mi_row -= 1;
2043
0
    }
2044
0
    if (subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1)) {
2045
0
        mi_col -= 1;
2046
0
    }
2047
2048
0
    const int x = (MI_SIZE * mi_col) >> subsampling_x;
2049
0
    const int y = (MI_SIZE * mi_row) >> subsampling_y;
2050
0
    dst->buf    = src + (y * stride + x); // scaled_buffer_offset(x, y, stride, scale);
2051
0
    dst->buf0   = src;
2052
0
    dst->width  = width;
2053
0
    dst->height = height;
2054
0
    dst->stride = stride;
2055
0
}
2056
2057
void svt_av1_setup_pred_block(BlockSize bsize, Buf2D dst[MAX_PLANES], const Yv12BufferConfig* src, int mi_row,
2058
0
                              int mi_col) {
2059
0
    dst[0].buf    = src->y_buffer;
2060
0
    dst[0].stride = src->y_stride;
2061
0
    dst[1].buf    = src->u_buffer;
2062
0
    dst[2].buf    = src->v_buffer;
2063
0
    dst[1].stride = dst[2].stride = src->uv_stride;
2064
2065
0
    setup_pred_plane(
2066
0
        dst, bsize, dst[0].buf, src->y_crop_width, src->y_crop_height, dst[0].stride, mi_row, mi_col, 0, 0);
2067
0
}
2068
2069
static int sad_per_bit_lut_8[QINDEX_RANGE];
2070
static int sad_per_bit_lut_10[QINDEX_RANGE];
2071
2072
// Get the sad per bit for the relevant qindex and bit depth
2073
0
int svt_aom_get_sad_per_bit(int qidx, EbBitDepth is_hbd) {
2074
0
    return is_hbd ? sad_per_bit_lut_10[qidx] : sad_per_bit_lut_8[qidx];
2075
0
}
2076
2077
2
static void init_me_luts_bd(int* bit16lut, int range, EbBitDepth bit_depth) {
2078
2
    int i;
2079
    // Initialize the sad lut tables using a formulaic calculation for now.
2080
    // This is to make it easier to resolve the impact of experimental changes
2081
    // to the quantizer tables.
2082
514
    for (i = 0; i < range; i++) {
2083
512
        const double q = svt_av1_convert_qindex_to_q(i, bit_depth);
2084
512
        bit16lut[i]    = (int)(0.0418 * q + 2.4107);
2085
512
    }
2086
2
}
2087
2088
1
void svt_av1_init_me_luts(void) {
2089
1
    init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, EB_EIGHT_BIT);
2090
1
    init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, EB_TEN_BIT);
2091
1
}
2092
2093
#if CONFIG_ENABLE_OBMC
2094
static void single_motion_search(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand,
2095
                                 Mv best_pred_mv, IntraBcContext* x, BlockSize bsize, Mv* ref_mv, int* rate_mv,
2096
0
                                 int refine_level) {
2097
0
    bool do_full_refine = 0;
2098
0
    bool do_frac_refine = 0;
2099
0
    switch (refine_level) {
2100
0
    case 0:
2101
0
    case 1:
2102
0
    case 3:
2103
0
        do_full_refine = 1;
2104
0
        do_frac_refine = 1;
2105
0
        break;
2106
0
    case 2:
2107
0
    case 4:
2108
0
        do_full_refine = 0;
2109
0
        do_frac_refine = 1;
2110
0
        break;
2111
0
    default:
2112
0
        break;
2113
0
    }
2114
0
    const Av1Common* const cm      = pcs->ppcs->av1_cm;
2115
0
    FrameHeader*           frm_hdr = &pcs->ppcs->frm_hdr;
2116
    // single_motion_search supports 8bit path only
2117
0
    uint32_t full_lambda = ctx->full_lambda_md[EB_8_BIT_MD];
2118
2119
0
    x->xd            = ctx->blk_ptr->av1xd;
2120
0
    const int mi_row = -x->xd->mb_to_top_edge / (8 * MI_SIZE);
2121
0
    const int mi_col = -x->xd->mb_to_left_edge / (8 * MI_SIZE);
2122
2123
0
    x->nmv_vec_cost  = ctx->md_rate_est_ctx->nmv_vec_cost;
2124
0
    x->mv_cost_stack = ctx->md_rate_est_ctx->nmvcoststack;
2125
    // Set up limit values for MV components.
2126
    // Mv beyond the range do not produce new/different prediction block.
2127
0
    const int mi_width   = mi_size_wide[bsize];
2128
0
    const int mi_height  = mi_size_high[bsize];
2129
0
    x->mv_limits.row_min = -(((mi_row + mi_height) * MI_SIZE) + AOM_INTERP_EXTEND);
2130
0
    x->mv_limits.col_min = -(((mi_col + mi_width) * MI_SIZE) + AOM_INTERP_EXTEND);
2131
0
    x->mv_limits.row_max = (cm->mi_rows - mi_row) * MI_SIZE + AOM_INTERP_EXTEND;
2132
0
    x->mv_limits.col_max = (cm->mi_cols - mi_col) * MI_SIZE + AOM_INTERP_EXTEND;
2133
    //set search paramters
2134
0
    x->sadperbit16 = svt_aom_get_sad_per_bit(frm_hdr->quantization_params.base_q_idx, 0);
2135
0
    x->errorperbit = full_lambda >> RD_EPB_SHIFT;
2136
0
    x->errorperbit += (x->errorperbit == 0);
2137
0
    if (do_full_refine) {
2138
0
        int      sadpb         = x->sadperbit16;
2139
0
        MvLimits tmp_mv_limits = x->mv_limits;
2140
2141
        // Note: MV limits are modified here. Always restore the original values
2142
        // after full-pixel motion search.
2143
0
        svt_av1_set_mv_search_range(&x->mv_limits, ref_mv);
2144
2145
0
        Mv mvp_full = best_pred_mv; // mbmi->mv[0].as_mv;
2146
2147
        // TODO: should use get_fullmv_from_mv instead of shifting
2148
0
        mvp_full.x >>= 3;
2149
0
        mvp_full.y >>= 3;
2150
2151
0
        x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV; //D
2152
2153
0
        switch (cand->block_mi.motion_mode) {
2154
0
        case OBMC_CAUSAL:
2155
0
            svt_av1_obmc_full_pixel_search(
2156
0
                ctx, x, &mvp_full, sadpb, &svt_aom_mefn_ptr[bsize], ref_mv, &(x->best_mv), 0);
2157
0
            break;
2158
0
        default:
2159
0
            assert(0 && "Invalid motion mode!\n");
2160
0
        }
2161
2162
0
        x->mv_limits = tmp_mv_limits;
2163
0
    } else { // round-up the default
2164
0
        x->best_mv.x = best_pred_mv.x >> 3;
2165
0
        x->best_mv.y = best_pred_mv.y >> 3;
2166
0
    }
2167
2168
0
    if (do_frac_refine) {
2169
0
        int          dis; /* TODO: use dis in distortion calculation later. */
2170
0
        unsigned int sse1; //unused
2171
0
        switch (cand->block_mi.motion_mode) {
2172
0
        case OBMC_CAUSAL:
2173
0
            svt_av1_find_best_obmc_sub_pixel_tree_up(ctx,
2174
0
                                                     x,
2175
0
                                                     cm,
2176
0
                                                     mi_row,
2177
0
                                                     mi_col,
2178
0
                                                     &x->best_mv,
2179
0
                                                     ref_mv,
2180
0
                                                     frm_hdr->allow_high_precision_mv,
2181
0
                                                     x->errorperbit,
2182
0
                                                     &svt_aom_mefn_ptr[bsize],
2183
0
                                                     0, // mv.subpel_force_stop
2184
0
                                                     2, //  mv.subpel_iters_per_step
2185
0
                                                     x->nmv_vec_cost,
2186
0
                                                     x->mv_cost_stack,
2187
0
                                                     &dis,
2188
0
                                                     &sse1,
2189
0
                                                     0,
2190
0
                                                     USE_8_TAPS);
2191
2192
0
            break;
2193
0
        default:
2194
0
            assert(0 && "Invalid motion mode!\n");
2195
0
        }
2196
0
    } else {
2197
0
        x->best_mv.x *= 8;
2198
0
        x->best_mv.y *= 8;
2199
0
    }
2200
0
    if (ctx->approx_inter_rate) {
2201
0
        *rate_mv = svt_av1_mv_bit_cost_light(&x->best_mv, ref_mv);
2202
0
    } else {
2203
0
        *rate_mv = svt_av1_mv_bit_cost(&x->best_mv, ref_mv, x->nmv_vec_cost, x->mv_cost_stack, MV_COST_WEIGHT);
2204
0
    }
2205
0
}
2206
2207
// Refine the OBMC MV (8 bit search). Return true if search found a valid MV; false otherwise
2208
uint8_t svt_aom_obmc_motion_refinement(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand,
2209
0
                                       int refine_level) {
2210
0
    if (block_size_wide[ctx->blk_geom->bsize] > ctx->obmc_ctrls.max_blk_size_to_refine ||
2211
0
        block_size_high[ctx->blk_geom->bsize] > ctx->obmc_ctrls.max_blk_size_to_refine) {
2212
0
        return 1;
2213
0
    }
2214
2215
0
    if (ctx->obmc_weighted_pred_ready == false) {
2216
0
        int mi_row = ctx->blk_org_y >> 2;
2217
0
        int mi_col = ctx->blk_org_x >> 2;
2218
2219
0
        DECLARE_ALIGNED(16, uint8_t, dst_buf1_8b[4 * MAX_PLANES * MAX_SB_SQUARE]);
2220
2221
0
        uint8_t* dst_buf2_8b = dst_buf1_8b + 2 * MAX_PLANES * MAX_SB_SQUARE;
2222
0
        if (ctx->obmc_is_luma_neigh_10bit) {
2223
0
            svt_aom_un_pack2d((uint16_t*)ctx->obmc_buff_0,
2224
0
                              ctx->blk_geom->bwidth,
2225
0
                              dst_buf1_8b,
2226
0
                              ctx->blk_geom->bwidth,
2227
0
                              NULL,
2228
0
                              ctx->blk_geom->bwidth,
2229
0
                              ctx->blk_geom->bwidth,
2230
0
                              ctx->blk_geom->bheight);
2231
2232
0
            svt_aom_un_pack2d((uint16_t*)ctx->obmc_buff_1,
2233
0
                              ctx->blk_geom->bwidth,
2234
0
                              dst_buf2_8b,
2235
0
                              ctx->blk_geom->bwidth,
2236
0
                              NULL,
2237
0
                              ctx->blk_geom->bwidth,
2238
0
                              ctx->blk_geom->bwidth,
2239
0
                              ctx->blk_geom->bheight);
2240
0
        }
2241
2242
0
        calc_target_weighted_pred(pcs,
2243
0
                                  ctx,
2244
0
                                  pcs->ppcs->av1_cm,
2245
0
                                  ctx->blk_ptr->av1xd,
2246
0
                                  mi_row,
2247
0
                                  mi_col,
2248
0
                                  ctx->obmc_is_luma_neigh_10bit ? dst_buf1_8b : ctx->obmc_buff_0,
2249
0
                                  ctx->blk_geom->bwidth,
2250
0
                                  ctx->obmc_is_luma_neigh_10bit ? dst_buf2_8b : ctx->obmc_buff_1,
2251
0
                                  ctx->blk_geom->bwidth);
2252
2253
0
        ctx->obmc_weighted_pred_ready = true;
2254
0
    }
2255
0
    Mv              best_pred_mv[2] = {{{0}}, {{0}}};
2256
0
    IntraBcContext  x_st;
2257
0
    IntraBcContext* x = &x_st;
2258
2259
0
    MacroBlockD* xd;
2260
0
    xd = x->xd       = ctx->blk_ptr->av1xd;
2261
0
    const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
2262
0
    const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
2263
2264
0
    {
2265
0
        assert(cand->block_mi.ref_frame[1] == NONE_FRAME); // OBMC only allowed for unipred cands
2266
0
        uint8_t ref_idx  = get_ref_frame_idx(cand->block_mi.ref_frame[0]);
2267
0
        uint8_t list_idx = get_list_idx(cand->block_mi.ref_frame[0]);
2268
2269
0
        assert(list_idx < MAX_NUM_OF_REF_PIC_LIST);
2270
0
        EbPictureBufferDesc* reference_picture =
2271
0
            ((EbReferenceObject*)pcs->ref_pic_ptr_array[list_idx][ref_idx]->object_ptr)->reference_picture;
2272
2273
0
        svt_aom_use_scaled_rec_refs_if_needed(pcs,
2274
0
                                              pcs->ppcs->enhanced_pic,
2275
0
                                              (EbReferenceObject*)pcs->ref_pic_ptr_array[list_idx][ref_idx]->object_ptr,
2276
0
                                              &reference_picture,
2277
0
                                              EB_8_BIT_MD);
2278
0
        Yv12BufferConfig ref_buf;
2279
0
        svt_aom_link_eb_to_aom_buffer_desc_8bit(reference_picture, &ref_buf);
2280
2281
0
        Buf2D yv12_mb[MAX_PLANES];
2282
0
        svt_av1_setup_pred_block(ctx->blk_geom->bsize, yv12_mb, &ref_buf, mi_row, mi_col);
2283
0
        for (int i = 0; i < 1; ++i) {
2284
0
            x->xdplane[i].pre[0] = yv12_mb[i]; //ref in ME
2285
0
        }
2286
2287
0
        x->plane[0].src.buf  = 0; // x->xdplane[0].pre[0];
2288
0
        x->plane[0].src.buf0 = 0;
2289
0
    }
2290
2291
0
    Mv  best_mv = {.as_int = cand->block_mi.mv[0].as_int};
2292
0
    int tmp_rate_mv;
2293
2294
0
    Mv ref_mv = {.as_int = cand->pred_mv[0].as_int};
2295
2296
0
    single_motion_search(pcs, ctx, cand, best_mv, x, ctx->blk_geom->bsize, &ref_mv, &tmp_rate_mv, refine_level);
2297
0
    cand->block_mi.mv[0].as_int = x->best_mv.as_int;
2298
0
    svt_aom_choose_best_av1_mv_pred(ctx,
2299
0
                                    cand->block_mi.ref_frame[0], // OBMC only allowed for unipred candidtes
2300
0
                                    cand->block_mi.mode,
2301
0
                                    cand->block_mi.mv[0],
2302
0
                                    (Mv){{0}},
2303
0
                                    &cand->drl_index,
2304
0
                                    best_pred_mv);
2305
0
    cand->pred_mv[0].as_int = best_pred_mv[0].as_int;
2306
    // Check that final chosen MV is valid
2307
0
    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, cand->block_mi.mv[0], cand->block_mi.mv[0], 0)) {
2308
0
        return 1;
2309
0
    }
2310
2311
0
    return 0;
2312
0
}
2313
#endif // CONFIG_ENABLE_OBMC
2314
2315
/*
2316
   inject ME candidates for Light PD0
2317
*/
2318
static void inject_new_candidates_light_pd0(PictureControlSet* pcs, ModeDecisionContext* ctx,
2319
0
                                            uint32_t* candidate_total_cnt, const bool allow_bipred) {
2320
0
    const uint32_t         me_sb_addr       = ctx->me_sb_addr;
2321
0
    const uint32_t         me_block_offset  = ctx->me_block_offset;
2322
0
    ModeDecisionCandidate* cand_array       = ctx->fast_cand_array;
2323
0
    uint32_t               cand_total_cnt   = (*candidate_total_cnt);
2324
0
    const MeSbResults*     me_results       = pcs->ppcs->pa_me_data->me_results[me_sb_addr];
2325
0
    const uint8_t          total_me_cnt     = me_results->total_me_candidate_index[me_block_offset];
2326
0
    const MeCandidate*     me_block_results = &me_results->me_candidate_array[ctx->me_cand_offset];
2327
2328
0
    const uint8_t max_refs = pcs->ppcs->pa_me_data->max_refs;
2329
0
    const uint8_t max_l0   = pcs->ppcs->pa_me_data->max_l0;
2330
2331
0
    for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) {
2332
0
        const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index];
2333
0
        const uint8_t      inter_direction      = me_block_results_ptr->direction;
2334
0
        const uint8_t      list0_ref_index      = me_block_results_ptr->ref_idx_l0;
2335
0
        const uint8_t      list1_ref_index      = me_block_results_ptr->ref_idx_l1;
2336
2337
0
        if (ctx->lpd0_ctrls.pd0_level == VERY_LIGHT_PD0 && inter_direction == BI_PRED) {
2338
0
            continue;
2339
0
        }
2340
2341
        /**************
2342
            NEWMV
2343
        ************* */
2344
0
        if (inter_direction < BI_PRED) {
2345
0
            const uint8_t list_idx = inter_direction;
2346
0
            const uint8_t ref_idx  = inter_direction ? list1_ref_index : list0_ref_index;
2347
0
            const int16_t to_inject_mv_x =
2348
0
                (me_results->me_mv_array[me_block_offset * max_refs + (inter_direction ? max_l0 : 0) + ref_idx].x) * 8;
2349
0
            const int16_t to_inject_mv_y =
2350
0
                (me_results->me_mv_array[me_block_offset * max_refs + (inter_direction ? max_l0 : 0) + ref_idx].y) * 8;
2351
0
            const uint8_t to_inject_ref_type = svt_get_ref_frame_type(list_idx, ref_idx);
2352
2353
0
            ModeDecisionCandidate* cand = &cand_array[cand_total_cnt];
2354
0
            cand->block_mi.mode         = NEWMV;
2355
0
            cand->block_mi.mv[0]        = (Mv){{to_inject_mv_x, to_inject_mv_y}};
2356
0
            cand->block_mi.ref_frame[0] = to_inject_ref_type;
2357
0
            cand->block_mi.ref_frame[1] = NONE_FRAME;
2358
0
            INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2359
0
            if (cand_total_cnt > 2) {
2360
0
                break;
2361
0
            }
2362
0
        } else if (allow_bipred) {
2363
0
            assert(inter_direction == BI_PRED);
2364
            /**************
2365
               NEW_NEWMV
2366
            ************* */
2367
0
            const uint32_t ref0_offset = me_block_offset * max_refs +
2368
0
                (me_block_results_ptr->ref0_list > 0 ? max_l0 : 0) + list0_ref_index;
2369
0
            const uint32_t ref1_offset = me_block_offset * max_refs +
2370
0
                (me_block_results_ptr->ref1_list > 0 ? max_l0 : 0) + list1_ref_index;
2371
0
            const int16_t to_inject_mv_x_l0 = (me_results->me_mv_array[ref0_offset].x) * 8;
2372
0
            const int16_t to_inject_mv_y_l0 = (me_results->me_mv_array[ref0_offset].y) * 8;
2373
0
            const int16_t to_inject_mv_x_l1 = (me_results->me_mv_array[ref1_offset].x) * 8;
2374
0
            const int16_t to_inject_mv_y_l1 = (me_results->me_mv_array[ref1_offset].y) * 8;
2375
2376
0
            MvReferenceFrame rf[2] = {svt_get_ref_frame_type(me_block_results_ptr->ref0_list, list0_ref_index),
2377
0
                                      svt_get_ref_frame_type(me_block_results_ptr->ref1_list, list1_ref_index)};
2378
2379
            // Inject AVG candidate only
2380
0
            ModeDecisionCandidate* cand   = &cand_array[cand_total_cnt];
2381
0
            cand->block_mi.mv[REF_LIST_0] = (Mv){{to_inject_mv_x_l0, to_inject_mv_y_l0}};
2382
0
            cand->block_mi.mv[REF_LIST_1] = (Mv){{to_inject_mv_x_l1, to_inject_mv_y_l1}};
2383
0
            cand->block_mi.mode           = NEW_NEWMV;
2384
0
            cand->block_mi.ref_frame[0]   = rf[0];
2385
0
            cand->block_mi.ref_frame[1]   = rf[1];
2386
0
            determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
2387
0
            INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2388
0
            if (cand_total_cnt > 2) {
2389
0
                break;
2390
0
            }
2391
0
        }
2392
0
    }
2393
    // update the total number of candidates injected
2394
0
    (*candidate_total_cnt) = cand_total_cnt;
2395
0
}
2396
2397
static void inject_new_candidates_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx,
2398
0
                                            uint32_t* candidate_total_cnt, const bool allow_bipred) {
2399
0
    const uint32_t         me_sb_addr       = ctx->me_sb_addr;
2400
0
    const uint32_t         me_block_offset  = ctx->me_block_offset;
2401
0
    ModeDecisionCandidate* cand_array       = ctx->fast_cand_array;
2402
0
    Mv                     best_pred_mv[2]  = {{{0}}, {{0}}};
2403
0
    uint32_t               cand_total_cnt   = (*candidate_total_cnt);
2404
0
    const MeSbResults*     me_results       = pcs->ppcs->pa_me_data->me_results[me_sb_addr];
2405
0
    const uint8_t          total_me_cnt     = me_results->total_me_candidate_index[me_block_offset];
2406
0
    const MeCandidate*     me_block_results = &me_results->me_candidate_array[ctx->me_cand_offset];
2407
2408
0
    for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) {
2409
0
        const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index];
2410
0
        const uint8_t      inter_direction      = me_block_results_ptr->direction;
2411
0
        const uint8_t      list0_ref_index      = me_block_results_ptr->ref_idx_l0;
2412
0
        const uint8_t      list1_ref_index      = me_block_results_ptr->ref_idx_l1;
2413
2414
0
        if (ctx->cand_reduction_ctrls.reduce_unipred_candidates >= 2) {
2415
0
            if ((total_me_cnt > 1) && (inter_direction != 2)) {
2416
0
                continue;
2417
0
            }
2418
0
        } else if (ctx->cand_reduction_ctrls.reduce_unipred_candidates) {
2419
0
            if ((total_me_cnt > 3) && (inter_direction != 2)) {
2420
0
                continue;
2421
0
            }
2422
0
        }
2423
2424
        /**************
2425
            NEWMV
2426
        ************* */
2427
0
        if (inter_direction < BI_PRED) {
2428
0
            const uint8_t list_idx           = inter_direction;
2429
0
            const uint8_t ref_idx            = inter_direction ? list1_ref_index : list0_ref_index;
2430
0
            Mv            to_inj_mv          = ctx->sb_me_mv[list_idx][ref_idx];
2431
0
            const uint8_t to_inject_ref_type = svt_get_ref_frame_type(list_idx, ref_idx);
2432
0
            if (ctx->injected_mv_count == 0 ||
2433
0
                mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, to_inject_ref_type) == false) {
2434
0
                uint8_t drl_index = 0;
2435
0
                svt_aom_choose_best_av1_mv_pred(
2436
0
                    ctx, to_inject_ref_type, NEWMV, to_inj_mv, (Mv){{0}}, &drl_index, best_pred_mv);
2437
0
                if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv, to_inj_mv, 0)) {
2438
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
2439
0
                    cand->block_mi.use_intrabc        = 0;
2440
0
                    cand->block_mi.is_interintra_used = 0;
2441
0
                    cand->skip_mode_allowed           = false;
2442
0
                    cand->block_mi.mode               = NEWMV;
2443
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
2444
0
                    cand->drl_index                   = drl_index;
2445
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
2446
0
                    cand->block_mi.ref_frame[0]       = to_inject_ref_type;
2447
0
                    cand->block_mi.ref_frame[1]       = NONE_FRAME;
2448
0
                    cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
2449
0
                    cand->block_mi.num_proj_ref       = ctx->wm_sample_info[to_inject_ref_type].num;
2450
0
                    INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2451
                    // Add the injected MV to the list of injected MVs
2452
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
2453
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
2454
0
                    ++ctx->injected_mv_count;
2455
0
                }
2456
0
            }
2457
0
        } else if (allow_bipred && inter_direction == 2 &&
2458
0
                   !(ctx->is_intra_bordered && ctx->cand_reduction_ctrls.use_neighbouring_mode_ctrls.enabled)) {
2459
            /**************
2460
               NEW_NEWMV
2461
            ************* */
2462
0
            Mv               to_inj_mv0 = ctx->sb_me_mv[me_block_results_ptr->ref0_list][list0_ref_index];
2463
0
            Mv               to_inj_mv1 = ctx->sb_me_mv[me_block_results_ptr->ref1_list][list1_ref_index];
2464
0
            MvReferenceFrame rf[2]      = {svt_get_ref_frame_type(me_block_results_ptr->ref0_list, list0_ref_index),
2465
0
                                           svt_get_ref_frame_type(me_block_results_ptr->ref1_list, list1_ref_index)};
2466
0
            uint8_t          to_inject_ref_type = av1_ref_frame_type(rf);
2467
0
            if ((ctx->injected_mv_count == 0 ||
2468
0
                 mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) {
2469
0
                uint8_t drl_index = 0;
2470
0
                svt_aom_choose_best_av1_mv_pred(
2471
0
                    ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv);
2472
0
                if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) {
2473
0
                    ModeDecisionCandidate* cand         = &cand_array[cand_total_cnt];
2474
0
                    cand->block_mi.use_intrabc          = 0;
2475
0
                    cand->block_mi.is_interintra_used   = 0;
2476
0
                    cand->skip_mode_allowed             = false;
2477
0
                    cand->drl_index                     = drl_index;
2478
0
                    cand->block_mi.mv[0].as_int         = to_inj_mv0.as_int;
2479
0
                    cand->block_mi.mv[1].as_int         = to_inj_mv1.as_int;
2480
0
                    cand->block_mi.mode                 = NEW_NEWMV;
2481
0
                    cand->block_mi.motion_mode          = SIMPLE_TRANSLATION;
2482
0
                    cand->block_mi.ref_frame[0]         = rf[0];
2483
0
                    cand->block_mi.ref_frame[1]         = rf[1];
2484
0
                    cand->pred_mv[0].as_int             = best_pred_mv[0].as_int;
2485
0
                    cand->pred_mv[1].as_int             = best_pred_mv[1].as_int;
2486
0
                    cand->block_mi.comp_group_idx       = 0;
2487
0
                    cand->block_mi.compound_idx         = 1;
2488
0
                    cand->block_mi.interinter_comp.type = COMPOUND_AVERAGE;
2489
0
                    INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2490
2491
                    // Add the injected MV to the list of injected MVs
2492
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
2493
0
                    ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
2494
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
2495
0
                    ++ctx->injected_mv_count;
2496
0
                }
2497
0
            }
2498
0
        }
2499
0
    }
2500
    // update the total number of candidates injected
2501
0
    (*candidate_total_cnt) = cand_total_cnt;
2502
0
}
2503
2504
static void inject_new_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candidate_total_cnt,
2505
0
                                  const bool allow_bipred) {
2506
0
    const uint32_t         me_sb_addr       = ctx->me_sb_addr;
2507
0
    const uint32_t         me_block_offset  = ctx->me_block_offset;
2508
0
    ModeDecisionCandidate* cand_array       = ctx->fast_cand_array;
2509
0
    Mv                     best_pred_mv[2]  = {{{0}}, {{0}}};
2510
0
    uint32_t               cand_total_cnt   = (*candidate_total_cnt);
2511
0
    const MeSbResults*     me_results       = pcs->ppcs->pa_me_data->me_results[me_sb_addr];
2512
0
    const uint8_t          total_me_cnt     = me_results->total_me_candidate_index[me_block_offset];
2513
0
    const MeCandidate*     me_block_results = &me_results->me_candidate_array[ctx->me_cand_offset];
2514
2515
0
    for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) {
2516
0
        const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index];
2517
0
        const uint8_t      inter_direction      = me_block_results_ptr->direction;
2518
0
        const uint8_t      list0_ref_index      = me_block_results_ptr->ref_idx_l0;
2519
0
        const uint8_t      list1_ref_index      = me_block_results_ptr->ref_idx_l1;
2520
2521
0
        if (ctx->cand_reduction_ctrls.reduce_unipred_candidates) {
2522
0
            if ((total_me_cnt > 3) && (inter_direction != 2)) {
2523
0
                continue;
2524
0
            }
2525
0
        }
2526
2527
        /**************
2528
            NEWMV unipred
2529
        ************* */
2530
0
        if (inter_direction < BI_PRED) {
2531
0
            const uint8_t list_idx = inter_direction;
2532
0
            const uint8_t ref_idx  = list_idx == REF_LIST_0 ? list0_ref_index : list1_ref_index;
2533
0
            if (!svt_aom_is_valid_unipred_ref(ctx, MIN(TOT_INTER_GROUP - 1, PA_ME_GROUP), list_idx, ref_idx)) {
2534
0
                continue;
2535
0
            }
2536
0
            Mv      to_inj_mv          = ctx->sb_me_mv[list_idx][ref_idx];
2537
0
            uint8_t to_inject_ref_type = svt_get_ref_frame_type(list_idx, ref_idx);
2538
0
            if ((ctx->injected_mv_count == 0 ||
2539
0
                 mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, to_inject_ref_type) == false)) {
2540
0
                uint8_t drl_index = 0;
2541
0
                svt_aom_choose_best_av1_mv_pred(
2542
0
                    ctx, to_inject_ref_type, NEWMV, to_inj_mv, (Mv){{0}}, &drl_index, best_pred_mv);
2543
0
                if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv, to_inj_mv, 0)) {
2544
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
2545
0
                    cand->block_mi.use_intrabc        = 0;
2546
0
                    cand->skip_mode_allowed           = false;
2547
0
                    cand->block_mi.mode               = NEWMV;
2548
0
                    cand->drl_index                   = drl_index;
2549
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
2550
0
                    cand->block_mi.ref_frame[0]       = to_inject_ref_type;
2551
0
                    cand->block_mi.ref_frame[1]       = NONE_FRAME;
2552
0
                    cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
2553
0
                    cand->block_mi.is_interintra_used = 0;
2554
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
2555
0
                    cand->block_mi.num_proj_ref       = ctx->wm_sample_info[to_inject_ref_type].num;
2556
0
                    INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2557
2558
0
                    const bool enable_ii   = true;
2559
0
                    const bool enable_obmc = true;
2560
0
                    const bool enable_warp = true;
2561
0
                    inj_non_simple_modes(pcs, ctx, &cand_total_cnt, enable_ii, enable_warp, enable_obmc);
2562
2563
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
2564
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
2565
0
                    ++ctx->injected_mv_count;
2566
0
                }
2567
0
            }
2568
0
        } else if (allow_bipred &&
2569
0
                   !(ctx->is_intra_bordered && ctx->cand_reduction_ctrls.use_neighbouring_mode_ctrls.enabled)) {
2570
0
            assert(inter_direction == BI_PRED);
2571
            /**************
2572
               NEW_NEWMV
2573
            ************* */
2574
0
            if (!is_valid_bipred_ref(ctx,
2575
0
                                     PA_ME_GROUP,
2576
0
                                     me_block_results_ptr->ref0_list,
2577
0
                                     list0_ref_index,
2578
0
                                     me_block_results_ptr->ref1_list,
2579
0
                                     list1_ref_index)) {
2580
0
                continue;
2581
0
            }
2582
0
            Mv      to_inj_mv0         = ctx->sb_me_mv[me_block_results_ptr->ref0_list][list0_ref_index];
2583
0
            Mv      to_inj_mv1         = ctx->sb_me_mv[me_block_results_ptr->ref1_list][list1_ref_index];
2584
0
            uint8_t to_inject_ref_type = av1_ref_frame_type(
2585
0
                (const MvReferenceFrame[]){svt_get_ref_frame_type(me_block_results_ptr->ref0_list, list0_ref_index),
2586
0
                                           svt_get_ref_frame_type(me_block_results_ptr->ref1_list, list1_ref_index)});
2587
0
            if ((ctx->injected_mv_count == 0 ||
2588
0
                 mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) {
2589
0
                uint8_t drl_index = 0;
2590
0
                svt_aom_choose_best_av1_mv_pred(
2591
0
                    ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv);
2592
0
                if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) {
2593
0
                    MvReferenceFrame rf[2] = {svt_get_ref_frame_type(me_block_results_ptr->ref0_list, list0_ref_index),
2594
0
                                              svt_get_ref_frame_type(me_block_results_ptr->ref1_list, list1_ref_index)};
2595
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
2596
0
                    cand->block_mi.use_intrabc        = 0;
2597
0
                    cand->skip_mode_allowed           = false;
2598
0
                    cand->drl_index                   = drl_index;
2599
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
2600
0
                    cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
2601
0
                    cand->block_mi.mode               = NEW_NEWMV;
2602
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
2603
0
                    cand->block_mi.is_interintra_used = 0;
2604
0
                    cand->block_mi.ref_frame[0]       = rf[0];
2605
0
                    cand->block_mi.ref_frame[1]       = rf[1];
2606
0
                    cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
2607
0
                    cand->pred_mv[1].as_int           = best_pred_mv[1].as_int;
2608
0
                    determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
2609
0
                    INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2610
2611
0
                    if (ctx->inter_comp_ctrls.do_me) {
2612
0
                        ctx->cmp_store.pred0_cnt = 0;
2613
0
                        ctx->cmp_store.pred1_cnt = 0;
2614
0
                        inj_comp_modes(pcs, ctx, &cand_total_cnt);
2615
0
                    }
2616
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
2617
0
                    ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
2618
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
2619
0
                    ++ctx->injected_mv_count;
2620
0
                }
2621
0
            }
2622
0
        }
2623
0
    }
2624
    // update the total number of candidates injected
2625
0
    (*candidate_total_cnt) = cand_total_cnt;
2626
0
}
2627
2628
static void inject_global_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candidate_total_cnt,
2629
0
                                     const bool allow_bipred) {
2630
0
    ModeDecisionCandidate* cand_array     = ctx->fast_cand_array;
2631
0
    uint32_t               cand_total_cnt = (*candidate_total_cnt);
2632
0
    uint32_t               mi_row         = ctx->blk_org_y >> MI_SIZE_LOG2;
2633
0
    uint32_t               mi_col         = ctx->blk_org_x >> MI_SIZE_LOG2;
2634
2635
0
    for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) {
2636
0
        MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it];
2637
0
        MvReferenceFrame rf[2];
2638
0
        av1_set_ref_frame(rf, ref_pair);
2639
2640
        //single ref/list
2641
0
        if (rf[1] == NONE_FRAME) {
2642
0
            MvReferenceFrame frame_type = rf[0];
2643
0
            uint8_t          list_idx   = get_list_idx(rf[0]);
2644
0
            uint8_t          ref_idx    = get_ref_frame_idx(rf[0]);
2645
2646
0
            if (!svt_aom_is_valid_unipred_ref(ctx, GLOBAL_GROUP, list_idx, ref_idx)) {
2647
0
                continue;
2648
0
            }
2649
            // Get gm params
2650
0
            WarpedMotionParams* gm_params = &pcs->ppcs->global_motion[frame_type];
2651
0
            if (pcs->ppcs->gm_ctrls.skip_identity && gm_params->wmtype == IDENTITY) {
2652
0
                continue;
2653
0
            }
2654
0
            Mv to_inj_mv = svt_aom_gm_get_motion_vector_enc(gm_params,
2655
0
                                                            pcs->ppcs->frm_hdr.allow_high_precision_mv,
2656
0
                                                            ctx->blk_geom->bsize,
2657
0
                                                            mi_col,
2658
0
                                                            mi_row,
2659
0
                                                            0 /* force_integer_mv */);
2660
2661
0
            assert(list_idx == 0 || list_idx == 1);
2662
0
            ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
2663
0
            cand->block_mi.mode               = GLOBALMV;
2664
0
            cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
2665
0
            cand->block_mi.is_interintra_used = 0;
2666
0
            cand->wm_params_l0                = *gm_params;
2667
0
            cand->wm_params_l1                = *gm_params;
2668
0
            cand->block_mi.use_intrabc        = 0;
2669
0
            cand->skip_mode_allowed           = false;
2670
0
            cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
2671
0
            cand->drl_index                   = 0;
2672
0
            cand->block_mi.ref_frame[0]       = rf[0];
2673
0
            cand->block_mi.ref_frame[1]       = rf[1];
2674
0
            cand->block_mi.num_proj_ref       = ctx->wm_sample_info[frame_type].num;
2675
0
            INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2676
2677
0
            const bool enable_ii   = true;
2678
0
            const bool enable_obmc = false;
2679
0
            const bool enable_warp = false;
2680
0
            inj_non_simple_modes(pcs, ctx, &cand_total_cnt, enable_ii, enable_warp, enable_obmc);
2681
0
            ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
2682
0
            ctx->injected_ref_types[ctx->injected_mv_count]     = frame_type;
2683
0
            ++ctx->injected_mv_count;
2684
0
        } else if (allow_bipred) {
2685
0
            uint8_t ref_idx_0  = get_ref_frame_idx(rf[0]);
2686
0
            uint8_t ref_idx_1  = get_ref_frame_idx(rf[1]);
2687
0
            uint8_t list_idx_0 = get_list_idx(rf[0]);
2688
0
            uint8_t list_idx_1 = get_list_idx(rf[1]);
2689
2690
0
            if (!is_valid_bipred_ref(ctx, GLOBAL_GROUP, list_idx_0, ref_idx_0, list_idx_1, ref_idx_1)) {
2691
0
                return;
2692
0
            }
2693
            // Get gm params
2694
0
            WarpedMotionParams* gm_params_0 = &pcs->ppcs->global_motion[svt_get_ref_frame_type(list_idx_0, ref_idx_0)];
2695
2696
0
            WarpedMotionParams* gm_params_1 = &pcs->ppcs->global_motion[svt_get_ref_frame_type(list_idx_1, ref_idx_1)];
2697
2698
0
            if (pcs->ppcs->gm_ctrls.skip_identity &&
2699
0
                (gm_params_0->wmtype == IDENTITY || gm_params_1->wmtype == IDENTITY)) {
2700
0
                continue;
2701
0
            }
2702
0
            Mv to_inj_mv0 = svt_aom_gm_get_motion_vector_enc(gm_params_0,
2703
0
                                                             pcs->ppcs->frm_hdr.allow_high_precision_mv,
2704
0
                                                             ctx->blk_geom->bsize,
2705
0
                                                             mi_col,
2706
0
                                                             mi_row,
2707
0
                                                             0 /* force_integer_mv */);
2708
2709
0
            Mv      to_inj_mv1         = svt_aom_gm_get_motion_vector_enc(gm_params_1,
2710
0
                                                             pcs->ppcs->frm_hdr.allow_high_precision_mv,
2711
0
                                                             ctx->blk_geom->bsize,
2712
0
                                                             mi_col,
2713
0
                                                             mi_row,
2714
0
                                                             0 /* force_integer_mv */);
2715
0
            uint8_t to_inject_ref_type = av1_ref_frame_type(rf);
2716
2717
0
            ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
2718
0
            cand->block_mi.use_intrabc        = 0;
2719
0
            cand->skip_mode_allowed           = false;
2720
0
            cand->block_mi.mode               = GLOBAL_GLOBALMV;
2721
0
            cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
2722
0
            cand->wm_params_l0                = *gm_params_0;
2723
0
            cand->wm_params_l1                = *gm_params_1;
2724
0
            cand->block_mi.is_interintra_used = 0;
2725
0
            cand->drl_index                   = 0;
2726
0
            cand->block_mi.ref_frame[0]       = rf[0];
2727
0
            cand->block_mi.ref_frame[1]       = rf[1];
2728
0
            cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
2729
0
            cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
2730
0
            determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
2731
0
            INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2732
2733
0
            if (ctx->inter_comp_ctrls.do_global) {
2734
0
                ctx->cmp_store.pred0_cnt = 0;
2735
0
                ctx->cmp_store.pred1_cnt = 0;
2736
0
                inj_comp_modes(pcs, ctx, &cand_total_cnt);
2737
0
            }
2738
0
            ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
2739
0
            ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
2740
0
            ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
2741
0
            ++ctx->injected_mv_count;
2742
0
        }
2743
0
    }
2744
    // update the total number of candidates injected
2745
0
    (*candidate_total_cnt) = cand_total_cnt;
2746
0
}
2747
2748
static void inject_pme_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candidate_total_cnt,
2749
0
                                  const bool allow_bipred) {
2750
0
    ModeDecisionCandidate* cand_array      = ctx->fast_cand_array;
2751
0
    Mv                     best_pred_mv[2] = {{{0}}, {{0}}};
2752
0
    uint32_t               cand_total_cnt  = (*candidate_total_cnt);
2753
0
    for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) {
2754
0
        MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it];
2755
0
        MvReferenceFrame rf[2];
2756
0
        av1_set_ref_frame(rf, ref_pair);
2757
2758
        //single ref/list
2759
0
        if (rf[1] == NONE_FRAME) {
2760
0
            MvReferenceFrame frame_type = rf[0];
2761
0
            uint8_t          list_idx   = get_list_idx(rf[0]);
2762
0
            uint8_t          ref_idx    = get_ref_frame_idx(rf[0]);
2763
2764
0
            if (ctx->valid_pme_mv[list_idx][ref_idx]) {
2765
0
                Mv to_inj_mv = ctx->best_pme_mv[list_idx][ref_idx];
2766
0
                if ((ctx->injected_mv_count == 0 ||
2767
0
                     mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, frame_type) == false)) {
2768
0
                    uint8_t drl_index = 0;
2769
0
                    svt_aom_choose_best_av1_mv_pred(
2770
0
                        ctx, frame_type, NEWMV, to_inj_mv, (Mv){{0}}, &drl_index, best_pred_mv);
2771
0
                    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv, to_inj_mv, 0)) {
2772
0
                        ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
2773
0
                        cand->block_mi.use_intrabc        = 0;
2774
0
                        cand->skip_mode_allowed           = false;
2775
0
                        cand->block_mi.mode               = NEWMV;
2776
0
                        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
2777
0
                        cand->block_mi.is_interintra_used = 0;
2778
0
                        cand->drl_index                   = drl_index;
2779
0
                        cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
2780
0
                        cand->block_mi.ref_frame[0]       = rf[0];
2781
0
                        cand->block_mi.ref_frame[1]       = rf[1];
2782
0
                        cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
2783
0
                        cand->block_mi.num_proj_ref       = ctx->wm_sample_info[frame_type].num;
2784
0
                        INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2785
2786
0
                        const bool enable_ii   = true;
2787
0
                        const bool enable_obmc = true;
2788
0
                        const bool enable_warp = true;
2789
0
                        inj_non_simple_modes(pcs, ctx, &cand_total_cnt, enable_ii, enable_warp, enable_obmc);
2790
0
                        ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
2791
0
                        ctx->injected_ref_types[ctx->injected_mv_count]     = frame_type;
2792
0
                        ++ctx->injected_mv_count;
2793
0
                    }
2794
0
                }
2795
0
            }
2796
0
        } else if (allow_bipred) {
2797
0
            uint8_t ref_idx_0  = get_ref_frame_idx(rf[0]);
2798
0
            uint8_t ref_idx_1  = get_ref_frame_idx(rf[1]);
2799
0
            uint8_t list_idx_0 = get_list_idx(rf[0]);
2800
0
            uint8_t list_idx_1 = get_list_idx(rf[1]);
2801
2802
0
            if (ctx->valid_pme_mv[list_idx_0][ref_idx_0] && ctx->valid_pme_mv[list_idx_1][ref_idx_1]) {
2803
0
                Mv            to_inj_mv0         = ctx->best_pme_mv[list_idx_0][ref_idx_0];
2804
0
                Mv            to_inj_mv1         = ctx->best_pme_mv[list_idx_1][ref_idx_1];
2805
0
                const uint8_t to_inject_ref_type = av1_ref_frame_type((const MvReferenceFrame[]){
2806
0
                    svt_get_ref_frame_type(list_idx_0, ref_idx_0),
2807
0
                    svt_get_ref_frame_type(list_idx_1, ref_idx_1),
2808
0
                });
2809
0
                if ((ctx->injected_mv_count == 0 ||
2810
0
                     mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) {
2811
0
                    uint8_t drl_index = 0;
2812
0
                    svt_aom_choose_best_av1_mv_pred(
2813
0
                        ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv);
2814
0
                    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) {
2815
0
                        ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
2816
0
                        cand->block_mi.use_intrabc        = 0;
2817
0
                        cand->skip_mode_allowed           = false;
2818
0
                        cand->drl_index                   = drl_index;
2819
0
                        cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
2820
0
                        cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
2821
0
                        cand->block_mi.mode               = NEW_NEWMV;
2822
0
                        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
2823
0
                        cand->block_mi.is_interintra_used = 0;
2824
0
                        cand->block_mi.ref_frame[0]       = rf[0];
2825
0
                        cand->block_mi.ref_frame[1]       = rf[1];
2826
0
                        cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
2827
0
                        cand->pred_mv[1].as_int           = best_pred_mv[1].as_int;
2828
0
                        determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
2829
0
                        INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2830
2831
0
                        if (ctx->inter_comp_ctrls.do_pme) {
2832
0
                            ctx->cmp_store.pred0_cnt = 0;
2833
0
                            ctx->cmp_store.pred1_cnt = 0;
2834
0
                            inj_comp_modes(pcs, ctx, &cand_total_cnt);
2835
0
                        }
2836
0
                        ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
2837
0
                        ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
2838
0
                        ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
2839
0
                        ++ctx->injected_mv_count;
2840
0
                    }
2841
0
                }
2842
0
            }
2843
0
        }
2844
0
    }
2845
0
    (*candidate_total_cnt) = cand_total_cnt;
2846
0
}
2847
2848
static void inject_inter_candidates_light_pd0(PictureControlSet* pcs, ModeDecisionContext* ctx,
2849
0
                                              uint32_t* candidate_total_cnt) {
2850
0
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
2851
    // Bipred prediction is only allowed when both dimensions are > 4 and the frame-header reference mode allows it.
2852
    // See AV1 spec 5.11.25
2853
0
    const bool allow_bipred = (frm_hdr->reference_mode == SINGLE_REFERENCE || ctx->blk_geom->bwidth == 4 ||
2854
0
                               ctx->blk_geom->bheight == 4)
2855
0
        ? false
2856
0
        : true;
2857
2858
0
    inject_new_candidates_light_pd0(pcs, ctx, candidate_total_cnt, allow_bipred);
2859
0
}
2860
2861
static void inject_inter_candidates_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx,
2862
0
                                              uint32_t* cand_total_cnt) {
2863
0
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
2864
    // Bipred prediction is only allowed when both dimensions are > 4 and the frame-header reference mode allows it.
2865
    // See AV1 spec 5.11.25
2866
0
    const bool allow_bipred = (frm_hdr->reference_mode == SINGLE_REFERENCE || ctx->blk_geom->bwidth == 4 ||
2867
0
                               ctx->blk_geom->bheight == 4)
2868
0
        ? false
2869
0
        : true;
2870
    // Needed in case WM/OBMC is on at the frame level (even though not used in light-PD1 path)
2871
0
    if (frm_hdr->is_motion_mode_switchable) {
2872
0
        const uint16_t mi_row = ctx->blk_org_y >> MI_SIZE_LOG2;
2873
0
        const uint16_t mi_col = ctx->blk_org_x >> MI_SIZE_LOG2;
2874
0
        svt_av1_count_overlappable_neighbors(pcs, ctx->blk_ptr, ctx->blk_geom->bsize, mi_row, mi_col);
2875
0
    } else {
2876
        // Overlappable neighbours only needed for non-"SIMPLE_TRANSLATION" candidates
2877
0
        ctx->blk_ptr->overlappable_neighbors = 0;
2878
0
    }
2879
0
    svt_aom_init_wm_samples(pcs, ctx);
2880
    // Inject MVP candidates
2881
0
    if (ctx->new_nearest_injection &&
2882
0
        !(ctx->is_intra_bordered && ctx->cand_reduction_ctrls.use_neighbouring_mode_ctrls.enabled)) {
2883
0
        inject_mvp_candidates_ii_light_pd1(pcs, ctx, cand_total_cnt, allow_bipred);
2884
0
    }
2885
2886
    // Inject ME candidates
2887
0
    if (ctx->inject_new_me) {
2888
0
        inject_new_candidates_light_pd1(pcs, ctx, cand_total_cnt, allow_bipred);
2889
0
    }
2890
0
}
2891
2892
static void svt_aom_inject_inter_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx,
2893
0
                                            uint32_t* cand_total_cnt) {
2894
0
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
2895
    // Bipred prediction is only allowed when both dimensions are > 4 and the frame-header reference mode allows it.
2896
    // See AV1 spec 5.11.25
2897
0
    const bool allow_bipred = (frm_hdr->reference_mode == SINGLE_REFERENCE || ctx->blk_geom->bwidth == 4 ||
2898
0
                               ctx->blk_geom->bheight == 4)
2899
0
        ? false
2900
0
        : true;
2901
2902
0
    const uint32_t mi_row = ctx->blk_org_y >> MI_SIZE_LOG2;
2903
0
    const uint32_t mi_col = ctx->blk_org_x >> MI_SIZE_LOG2;
2904
2905
0
    svt_av1_count_overlappable_neighbors(pcs, ctx->blk_ptr, ctx->blk_geom->bsize, mi_row, mi_col);
2906
0
    svt_aom_init_wm_samples(pcs, ctx);
2907
0
#if CONFIG_ENABLE_OBMC
2908
0
    if (ctx->obmc_ctrls.enabled && ctx->obmc_ctrls.refine_level == 0) {
2909
0
        const uint8_t is_obmc_allowed = svt_aom_obmc_motion_mode_allowed(
2910
0
                                            pcs, ctx, ctx->blk_geom->bsize, 1, LAST_FRAME, -1, NEWMV) == OBMC_CAUSAL;
2911
0
        if (is_obmc_allowed) {
2912
0
            svt_aom_precompute_obmc_data(pcs, ctx, PICTURE_BUFFER_DESC_LUMA_MASK);
2913
0
        }
2914
0
    }
2915
0
#endif
2916
    /**************
2917
         MVP
2918
    ************* */
2919
0
    if (ctx->new_nearest_injection &&
2920
0
        !(ctx->is_intra_bordered && ctx->cand_reduction_ctrls.use_neighbouring_mode_ctrls.enabled)) {
2921
0
        inject_mvp_candidates_ii(pcs, ctx, cand_total_cnt, allow_bipred);
2922
0
    }
2923
    //----------------------
2924
    //    NEAREST_NEWMV, NEW_NEARESTMV, NEAR_NEWMV, NEW_NEARMV.
2925
    //----------------------
2926
0
    if (ctx->new_nearest_near_comb_injection && allow_bipred) {
2927
0
        inject_new_nearest_new_comb_candidates(pcs, ctx, cand_total_cnt);
2928
0
    }
2929
0
    if (ctx->inject_new_me) {
2930
0
        inject_new_candidates(pcs, ctx, cand_total_cnt, allow_bipred);
2931
0
    }
2932
0
    if (ctx->global_mv_injection) {
2933
0
        inject_global_candidates(pcs, ctx, cand_total_cnt, allow_bipred);
2934
0
    }
2935
0
    if (ctx->bipred3x3_ctrls.enabled && allow_bipred) {
2936
0
        bipred_3x3_candidates_injection(pcs, ctx, cand_total_cnt);
2937
0
    }
2938
2939
0
    if (ctx->unipred3x3_injection) {
2940
0
        unipred_3x3_candidates_injection(pcs, ctx, cand_total_cnt);
2941
0
    }
2942
2943
    // determine when to inject pme candidates based on size and resolution of block
2944
0
    if (ctx->inject_new_pme && ctx->updated_enable_pme) {
2945
0
        inject_pme_candidates(pcs, ctx, cand_total_cnt, allow_bipred);
2946
0
    }
2947
0
}
2948
2949
static const TxType g_intra_mode_to_tx_type[INTRA_MODES] = {
2950
    DCT_DCT, // DC
2951
    ADST_DCT, // V
2952
    DCT_ADST, // H
2953
    DCT_DCT, // D45
2954
    ADST_ADST, // D135
2955
    ADST_DCT, // D117
2956
    DCT_ADST, // D153
2957
    DCT_ADST, // D207
2958
    ADST_DCT, // D63
2959
    ADST_ADST, // SMOOTH
2960
    ADST_DCT, // SMOOTH_V
2961
    DCT_ADST, // SMOOTH_H
2962
    ADST_ADST, // PAETH
2963
};
2964
2965
static INLINE TxType intra_mode_to_tx_type(PredictionMode pred_mode, UvPredictionMode pred_mode_uv,
2966
228k
                                           PlaneType plane_type) {
2967
228k
    const PredictionMode mode = (plane_type == PLANE_TYPE_Y) ? pred_mode : get_uv_mode(pred_mode_uv);
2968
228k
    assert(mode < INTRA_MODES);
2969
228k
    return g_intra_mode_to_tx_type[mode];
2970
228k
}
2971
2972
/* For intra prediction, the chroma transform type may not follow the luma type.
2973
This function will return the intra chroma TX type to be used, which is based on TX size and chroma mode.
2974
Refer to section 5.11.40 of the AV1 spec (compute_tx_type). */
2975
228k
TxType svt_aom_get_intra_uv_tx_type(UvPredictionMode pred_mode_uv, TxSize tx_size, int32_t reduced_tx_set) {
2976
228k
    if (txsize_sqr_up_map[tx_size] > TX_32X32) {
2977
0
        return DCT_DCT;
2978
0
    }
2979
2980
    // In intra mode, uv planes don't share the same prediction mode as y
2981
    // plane, so the tx_type should not be shared. Pass DC_PRED as luma mode because the argument
2982
    // will not be used.
2983
228k
    TxType tx_type = intra_mode_to_tx_type(DC_PRED, pred_mode_uv, PLANE_TYPE_UV);
2984
228k
    assert(tx_type < TX_TYPES);
2985
228k
    const TxSetType tx_set_type = get_ext_tx_set_type(tx_size, /*is_inter*/ 0, reduced_tx_set);
2986
228k
    return !av1_ext_tx_used[tx_set_type][tx_type] ? DCT_DCT : tx_type;
2987
228k
}
2988
2989
// Values are now correlated to quantizer.
2990
0
static INLINE int mv_check_bounds(const MvLimits* mv_limits, const Mv* mv) {
2991
0
    return (mv->y >> 3) < mv_limits->row_min || (mv->y >> 3) > mv_limits->row_max ||
2992
0
        (mv->x >> 3) < mv_limits->col_min || (mv->x >> 3) > mv_limits->col_max;
2993
0
}
2994
2995
0
static void assert_release(int statement) {
2996
0
    if (statement == 0) {
2997
0
        SVT_LOG("ASSERT_ERRRR\n");
2998
0
    }
2999
0
}
3000
3001
static void intra_bc_search(PictureControlSet* pcs, ModeDecisionContext* ctx, const SequenceControlSet* scs,
3002
0
                            BlkStruct* blk_ptr, Mv* dv_cand, uint8_t* num_dv_cand) {
3003
0
    IntraBcContext  x_st;
3004
0
    IntraBcContext* x           = &x_st;
3005
0
    uint32_t        full_lambda = ctx->hbd_md ? ctx->full_lambda_md[EB_10_BIT_MD] : ctx->full_lambda_md[EB_8_BIT_MD];
3006
3007
0
    svt_memcpy(&x->crc_calculator, &pcs->crc_calculator, sizeof(pcs->crc_calculator));
3008
0
    x->approx_inter_rate = ctx->approx_inter_rate;
3009
0
    x->xd                = blk_ptr->av1xd;
3010
0
    x->nmv_vec_cost      = ctx->md_rate_est_ctx->nmv_vec_cost;
3011
0
    x->mv_cost_stack     = ctx->md_rate_est_ctx->nmvcoststack;
3012
0
    BlockSize bsize      = ctx->blk_geom->bsize;
3013
0
    assert(bsize < BLOCK_SIZES_ALL);
3014
0
    FrameHeader*           frm_hdr    = &pcs->ppcs->frm_hdr;
3015
0
    const Av1Common* const cm         = pcs->ppcs->av1_cm;
3016
0
    MvReferenceFrame       ref_frame  = INTRA_FRAME;
3017
0
    const int              num_planes = 3;
3018
0
    MacroBlockD*           xd         = blk_ptr->av1xd;
3019
0
    const TileInfo*        tile       = &xd->tile;
3020
0
    const int              mi_row     = -xd->mb_to_top_edge / (8 * MI_SIZE);
3021
0
    const int              mi_col     = -xd->mb_to_left_edge / (8 * MI_SIZE);
3022
0
    const int              w          = block_size_wide[bsize];
3023
0
    const int              h          = block_size_high[bsize];
3024
0
    const int              sb_row     = mi_row >> scs->seq_header.sb_size_log2;
3025
0
    const int              sb_col     = mi_col >> scs->seq_header.sb_size_log2;
3026
3027
    // Set up limit values for MV components.
3028
    // Mv beyond the range do not produce new/different prediction block.
3029
0
    const int mi_width   = mi_size_wide[bsize];
3030
0
    const int mi_height  = mi_size_high[bsize];
3031
0
    x->mv_limits.row_min = -(((mi_row + mi_height) * MI_SIZE) + AOM_INTERP_EXTEND);
3032
0
    x->mv_limits.col_min = -(((mi_col + mi_width) * MI_SIZE) + AOM_INTERP_EXTEND);
3033
0
    x->mv_limits.row_max = (cm->mi_rows - mi_row) * MI_SIZE + AOM_INTERP_EXTEND;
3034
0
    x->mv_limits.col_max = (cm->mi_cols - mi_col) * MI_SIZE + AOM_INTERP_EXTEND;
3035
    //set search paramters
3036
0
    x->sadperbit16 = svt_aom_get_sad_per_bit(frm_hdr->quantization_params.base_q_idx, 0);
3037
0
    x->errorperbit = full_lambda >> RD_EPB_SHIFT;
3038
0
    x->errorperbit += (x->errorperbit == 0);
3039
    //temp buffer for hash me
3040
0
    for (int i = 0; i < 2; i++) {
3041
0
        EB_MALLOC_ARRAY_NO_CHECK(x->hash_value_buffer[i], AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
3042
0
    }
3043
3044
0
    Mv nearestmv, nearmv;
3045
0
    svt_av1_find_best_ref_mvs_from_stack(0, ctx->ref_mv_stack /*mbmi_ext*/, xd, ref_frame, &nearestmv, &nearmv, 0);
3046
0
    if (nearestmv.as_int == INVALID_MV) {
3047
0
        nearestmv.as_int = 0;
3048
0
    }
3049
0
    if (nearmv.as_int == INVALID_MV) {
3050
0
        nearmv.as_int = 0;
3051
0
    }
3052
0
    Mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3053
0
    if (dv_ref.as_int == 0) {
3054
0
        svt_aom_find_ref_dv(&dv_ref, tile, scs->seq_header.sb_mi_size, mi_row, mi_col);
3055
0
    }
3056
    // Ref DV should not have sub-pel.
3057
0
    assert((dv_ref.x & 7) == 0);
3058
0
    assert((dv_ref.y & 7) == 0);
3059
0
    ctx->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3060
3061
    /* pointer to current frame */
3062
0
    Yv12BufferConfig cur_buf;
3063
0
    svt_aom_link_eb_to_aom_buffer_desc_8bit(pcs->ppcs->enhanced_pic, &cur_buf);
3064
0
    struct Buf2D yv12_mb[MAX_PLANES];
3065
0
    svt_av1_setup_pred_block(bsize, yv12_mb, &cur_buf, mi_row, mi_col);
3066
0
    for (int i = 0; i < num_planes; ++i) {
3067
0
        x->xdplane[i].pre[0] = yv12_mb[i]; // ref in ME
3068
0
    }
3069
    // setup src for DV search same as ref
3070
0
    x->plane[0].src = x->xdplane[0].pre[0];
3071
3072
0
    enum IntrabcMotionDirection max_dir = pcs->ppcs->intrabc_ctrls.search_dir ? IBC_MOTION_LEFT : IBC_MOTION_DIRECTIONS;
3073
3074
0
    for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE; dir < max_dir; ++dir) {
3075
0
        const MvLimits tmp_mv_limits = x->mv_limits;
3076
3077
0
        switch (dir) {
3078
0
        case IBC_MOTION_ABOVE:
3079
0
            x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
3080
0
            x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w;
3081
0
            x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
3082
0
            x->mv_limits.row_max = (sb_row * scs->seq_header.sb_mi_size - mi_row) * MI_SIZE - h;
3083
0
            break;
3084
0
        case IBC_MOTION_LEFT:
3085
0
            x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
3086
0
            x->mv_limits.col_max = (sb_col * scs->seq_header.sb_mi_size - mi_col) * MI_SIZE - w;
3087
            // TODO: Minimize the overlap between above and
3088
            // left areas.
3089
0
            x->mv_limits.row_min     = (tile->mi_row_start - mi_row) * MI_SIZE;
3090
0
            int bottom_coded_mi_edge = AOMMIN((sb_row + 1) * scs->seq_header.sb_mi_size, tile->mi_row_end);
3091
0
            x->mv_limits.row_max     = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3092
0
            break;
3093
0
        default:
3094
0
            assert(0);
3095
0
        }
3096
0
        assert_release(x->mv_limits.col_min >= tmp_mv_limits.col_min);
3097
0
        assert_release(x->mv_limits.col_max <= tmp_mv_limits.col_max);
3098
0
        assert_release(x->mv_limits.row_min >= tmp_mv_limits.row_min);
3099
0
        assert_release(x->mv_limits.row_max <= tmp_mv_limits.row_max);
3100
3101
0
        svt_av1_set_mv_search_range(&x->mv_limits, &dv_ref);
3102
3103
0
        if (x->mv_limits.col_max < x->mv_limits.col_min || x->mv_limits.row_max < x->mv_limits.row_min) {
3104
0
            x->mv_limits = tmp_mv_limits;
3105
0
            continue;
3106
0
        }
3107
0
        Mv mvp_full = dv_ref;
3108
0
        mvp_full.x >>= 3;
3109
0
        mvp_full.y >>= 3;
3110
0
        x->best_mv.as_int = 0;
3111
3112
        // Hash Search
3113
0
        const AomVarianceFnPtr* fn_ptr = &svt_aom_mefn_ptr[bsize];
3114
3115
0
        int best_hash_cost = INT_MAX;
3116
0
        Mv  best_hash_mv   = {{0, 0}};
3117
3118
0
        svt_av1_intrabc_hash_search(
3119
0
            pcs, x, bsize, mi_col * MI_SIZE, mi_row * MI_SIZE, &dv_ref, 1, fn_ptr, &best_hash_cost, &best_hash_mv);
3120
3121
        // Hash produced a candidate
3122
0
        if (best_hash_cost < INT_MAX) {
3123
0
            Mv dv;
3124
0
            dv.x = best_hash_mv.x * 8;
3125
0
            dv.y = best_hash_mv.y * 8;
3126
3127
0
            dv_cand[*num_dv_cand] = dv;
3128
0
            (*num_dv_cand)++;
3129
3130
0
            x->best_mv = best_hash_mv;
3131
0
        }
3132
        // Full-pixel fallback if hash didn't produce a candidate
3133
0
        else {
3134
0
            svt_av1_full_pixel_search(pcs, x, bsize, &mvp_full, 0, x->sadperbit16, NULL, &dv_ref);
3135
3136
0
            Mv dv = {{x->best_mv.x * 8, x->best_mv.y * 8}};
3137
3138
0
            if (!mv_check_bounds(&x->mv_limits, &dv) &&
3139
0
                svt_aom_is_dv_valid(dv, xd, mi_row, mi_col, bsize, scs->seq_header.sb_size_log2)) {
3140
0
                dv_cand[*num_dv_cand] = dv;
3141
0
                (*num_dv_cand)++;
3142
0
            }
3143
0
        }
3144
3145
0
        x->mv_limits = tmp_mv_limits;
3146
0
    }
3147
3148
0
    for (int i = 0; i < 2; i++) {
3149
0
        EB_FREE_ARRAY(x->hash_value_buffer[i]);
3150
0
    }
3151
0
}
3152
3153
static void inject_intra_bc_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, const SequenceControlSet* scs,
3154
0
                                       BlkStruct* blk_ptr, uint32_t* cand_cnt) {
3155
0
    Mv      dv_cand[2];
3156
0
    uint8_t num_dv_cand = 0;
3157
3158
    //perform dv-pred + search up to 2 dv(s)
3159
0
    intra_bc_search(pcs, ctx, scs, blk_ptr, dv_cand, &num_dv_cand);
3160
3161
0
    ModeDecisionCandidate* cand_array = ctx->fast_cand_array;
3162
3163
0
    for (uint32_t dv_i = 0; dv_i < num_dv_cand; dv_i++) {
3164
0
        ModeDecisionCandidate* cand               = &cand_array[*cand_cnt];
3165
0
        cand->palette_info                        = NULL;
3166
0
        cand->block_mi.use_intrabc                = 1;
3167
0
        cand->block_mi.angle_delta[PLANE_TYPE_Y]  = 0;
3168
0
        cand->block_mi.angle_delta[PLANE_TYPE_UV] = 0;
3169
0
        cand->block_mi.uv_mode                    = UV_DC_PRED;
3170
0
        cand->block_mi.cfl_alpha_signs            = 0;
3171
0
        cand->block_mi.cfl_alpha_idx              = 0;
3172
0
        cand->transform_type[0]                   = DCT_DCT;
3173
0
        cand->transform_type_uv                   = DCT_DCT;
3174
0
        cand->block_mi.ref_frame[0]               = INTRA_FRAME;
3175
0
        cand->block_mi.ref_frame[1]               = NONE_FRAME;
3176
0
        cand->block_mi.mode                       = DC_PRED;
3177
0
        cand->block_mi.filter_intra_mode          = FILTER_INTRA_MODES;
3178
        //inter ralated
3179
0
        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
3180
0
        cand->block_mi.is_interintra_used = 0;
3181
0
        cand->skip_mode_allowed           = false;
3182
0
        cand->block_mi.mv[0].as_int       = dv_cand[dv_i].as_int;
3183
0
        cand->pred_mv[0].as_int           = ctx->ref_mv_stack[INTRA_FRAME][0].this_mv.as_int;
3184
0
        cand->drl_index                   = 0;
3185
0
        cand->block_mi.interp_filters     = av1_broadcast_interp_filter(BILINEAR);
3186
0
        INC_MD_CAND_CNT((*cand_cnt), pcs->ppcs->max_can_count);
3187
0
    }
3188
0
}
3189
3190
static void inject_intra_candidates_light_pd0(PictureControlSet* pcs, ModeDecisionContext* ctx,
3191
7.29k
                                              uint32_t* candidate_total_cnt) {
3192
7.29k
    uint32_t               cand_total_cnt     = 0;
3193
7.29k
    ModeDecisionCandidate* cand               = &ctx->fast_cand_array[cand_total_cnt];
3194
7.29k
    cand->skip_mode_allowed                   = false;
3195
7.29k
    cand->palette_info                        = NULL;
3196
7.29k
    cand->block_mi.use_intrabc                = 0;
3197
7.29k
    cand->block_mi.filter_intra_mode          = FILTER_INTRA_MODES;
3198
7.29k
    cand->block_mi.angle_delta[PLANE_TYPE_Y]  = 0;
3199
7.29k
    cand->block_mi.uv_mode                    = UV_DC_PRED;
3200
7.29k
    cand->block_mi.angle_delta[PLANE_TYPE_UV] = 0;
3201
7.29k
    cand->block_mi.cfl_alpha_signs            = 0;
3202
7.29k
    cand->block_mi.cfl_alpha_idx              = 0;
3203
7.29k
    cand->transform_type[0]                   = DCT_DCT;
3204
7.29k
    cand->transform_type_uv                   = DCT_DCT;
3205
7.29k
    cand->block_mi.ref_frame[0]               = INTRA_FRAME;
3206
7.29k
    cand->block_mi.ref_frame[1]               = NONE_FRAME;
3207
7.29k
    cand->block_mi.mode                       = DC_PRED;
3208
7.29k
    cand->block_mi.motion_mode                = SIMPLE_TRANSLATION;
3209
7.29k
    cand->block_mi.is_interintra_used         = 0;
3210
7.29k
    INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
3211
    // update the total number of candidates injected
3212
7.29k
    (*candidate_total_cnt) = cand_total_cnt;
3213
7.29k
    return;
3214
7.29k
}
3215
3216
static void inject_intra_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, const bool dc_cand_only_flag,
3217
226k
                                    uint32_t* candidate_total_cnt) {
3218
226k
    FrameHeader*           frm_hdr          = &pcs->ppcs->frm_hdr;
3219
226k
    PredictionMode         intra_mode_start = DC_PRED;
3220
18.4E
    PredictionMode         intra_mode_end   = dc_cand_only_flag ? DC_PRED : ctx->intra_ctrls.intra_mode_end;
3221
226k
    uint32_t               cand_total_cnt   = *candidate_total_cnt;
3222
226k
    ModeDecisionCandidate* cand_array       = ctx->fast_cand_array;
3223
226k
    const bool    use_angle_delta = ctx->intra_ctrls.angular_pred_level ? av1_use_angle_delta(ctx->blk_geom->bsize) : 0;
3224
226k
    const uint8_t disable_angle_prediction                = (ctx->intra_ctrls.angular_pred_level == 0);
3225
226k
    uint8_t       directional_mode_skip_mask[INTRA_MODES] = {0};
3226
226k
    if (ctx->intra_ctrls.angular_pred_level >= 4) {
3227
1.28M
        for (uint8_t i = D45_PRED; i < INTRA_MODE_END; i++) {
3228
1.16M
            directional_mode_skip_mask[i] = 1;
3229
1.16M
        }
3230
116k
    }
3231
226k
    const TxSize tx_size_uv = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1);
3232
3233
452k
    for (PredictionMode intra_mode = intra_mode_start; intra_mode <= intra_mode_end; ++intra_mode) {
3234
226k
        if (av1_is_directional_mode(intra_mode) &&
3235
0
            (disable_angle_prediction || directional_mode_skip_mask[intra_mode])) {
3236
0
            continue;
3237
0
        }
3238
3239
226k
        const uint8_t angle_delta_count = av1_is_directional_mode(intra_mode) &&
3240
0
                ctx->intra_ctrls.angular_pred_level <= 2 && use_angle_delta
3241
226k
            ? 7
3242
226k
            : 1;
3243
3244
452k
        for (uint8_t angle_delta_counter = 0; angle_delta_counter < angle_delta_count; ++angle_delta_counter) {
3245
226k
            int32_t angle_delta = CLIP((angle_delta_count == 1 ? 0 : angle_delta_counter - MAX_ANGLE_DELTA),
3246
226k
                                       -MAX_ANGLE_DELTA,
3247
226k
                                       MAX_ANGLE_DELTA);
3248
226k
            if ((ctx->intra_ctrls.angular_pred_level >= 2 &&
3249
116k
                 (angle_delta == -1 || angle_delta == 1 || angle_delta == -2 || angle_delta == 2)) ||
3250
227k
                (ctx->intra_ctrls.angular_pred_level >= 3 && angle_delta != 0)) {
3251
0
                continue;
3252
0
            }
3253
226k
            ModeDecisionCandidate* cand               = &cand_array[cand_total_cnt];
3254
226k
            cand->skip_mode_allowed                   = false;
3255
226k
            cand->palette_info                        = NULL;
3256
226k
            cand->block_mi.mode                       = intra_mode;
3257
226k
            cand->block_mi.use_intrabc                = 0;
3258
226k
            cand->block_mi.filter_intra_mode          = FILTER_INTRA_MODES;
3259
226k
            cand->block_mi.angle_delta[PLANE_TYPE_Y]  = angle_delta;
3260
226k
            cand->block_mi.uv_mode                    = ctx->ind_uv_avail ? ctx->best_uv_mode[intra_mode]
3261
226k
                                                                          : intra_luma_to_chroma[intra_mode];
3262
226k
            cand->block_mi.angle_delta[PLANE_TYPE_UV] = ctx->ind_uv_avail ? ctx->best_uv_angle[intra_mode]
3263
226k
                                                                          : cand->block_mi.angle_delta[PLANE_TYPE_Y];
3264
226k
            cand->block_mi.cfl_alpha_signs            = 0;
3265
226k
            cand->block_mi.cfl_alpha_idx              = 0;
3266
226k
            cand->transform_type[0]                   = DCT_DCT;
3267
226k
            cand->transform_type_uv                   = svt_aom_get_intra_uv_tx_type(
3268
226k
                cand->block_mi.uv_mode, tx_size_uv, frm_hdr->reduced_tx_set);
3269
3270
226k
            if (svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && cand->transform_type_uv != DCT_DCT) {
3271
0
                continue;
3272
0
            }
3273
226k
            cand->block_mi.ref_frame[0]       = INTRA_FRAME;
3274
226k
            cand->block_mi.ref_frame[1]       = NONE_FRAME;
3275
226k
            cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
3276
226k
            cand->block_mi.is_interintra_used = 0;
3277
226k
            INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
3278
226k
        }
3279
226k
    }
3280
3281
    // update the total number of candidates injected
3282
226k
    (*candidate_total_cnt) = cand_total_cnt;
3283
3284
226k
    return;
3285
226k
}
3286
3287
static void inject_filter_intra_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx,
3288
0
                                           uint32_t* candidate_total_cnt) {
3289
0
    FilterIntraMode intra_mode_start = FILTER_DC_PRED;
3290
0
    FilterIntraMode intra_mode_end   = ctx->intra_ctrls.intra_mode_end == PAETH_PRED ? FILTER_PAETH_PRED
3291
0
          : ctx->intra_ctrls.intra_mode_end >= D157_PRED                             ? FILTER_D157_PRED
3292
0
          : ctx->intra_ctrls.intra_mode_end >= H_PRED                                ? FILTER_H_PRED
3293
0
          : ctx->intra_ctrls.intra_mode_end >= V_PRED                                ? FILTER_V_PRED
3294
0
                                                                                     : FILTER_DC_PRED;
3295
0
    intra_mode_end                   = MIN(intra_mode_end, ctx->filter_intra_ctrls.max_filter_intra_mode);
3296
3297
0
    const TxSize           tx_size_uv     = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1);
3298
0
    uint32_t               cand_total_cnt = *candidate_total_cnt;
3299
0
    ModeDecisionCandidate* cand_array     = ctx->fast_cand_array;
3300
0
    FrameHeader*           frm_hdr        = &pcs->ppcs->frm_hdr;
3301
3302
0
    for (FilterIntraMode filter_intra_mode = intra_mode_start; filter_intra_mode <= intra_mode_end;
3303
0
         filter_intra_mode++) {
3304
0
        ModeDecisionCandidate* cand              = &cand_array[cand_total_cnt];
3305
0
        cand->skip_mode_allowed                  = false;
3306
0
        cand->block_mi.mode                      = DC_PRED;
3307
0
        cand->block_mi.use_intrabc               = 0;
3308
0
        cand->block_mi.filter_intra_mode         = filter_intra_mode;
3309
0
        cand->palette_info                       = NULL;
3310
0
        cand->block_mi.angle_delta[PLANE_TYPE_Y] = 0;
3311
3312
0
        cand->block_mi.uv_mode = ctx->ind_uv_avail ? ctx->best_uv_mode[fimode_to_intramode[filter_intra_mode]]
3313
0
                                                   : intra_luma_to_chroma[fimode_to_intramode[filter_intra_mode]];
3314
0
        cand->block_mi.angle_delta[PLANE_TYPE_UV] = ctx->ind_uv_avail
3315
0
            ? ctx->best_uv_angle[fimode_to_intramode[filter_intra_mode]]
3316
0
            : cand->block_mi.angle_delta[PLANE_TYPE_Y];
3317
3318
0
        cand->block_mi.cfl_alpha_signs = 0;
3319
0
        cand->block_mi.cfl_alpha_idx   = 0;
3320
0
        cand->transform_type[0]        = DCT_DCT;
3321
0
        cand->transform_type_uv        = svt_aom_get_intra_uv_tx_type(
3322
0
            cand->block_mi.uv_mode, tx_size_uv, frm_hdr->reduced_tx_set);
3323
0
        if (svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && cand->transform_type_uv != DCT_DCT) {
3324
0
            continue;
3325
0
        }
3326
0
        cand->block_mi.ref_frame[0]       = INTRA_FRAME;
3327
0
        cand->block_mi.ref_frame[1]       = NONE_FRAME;
3328
0
        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
3329
0
        cand->block_mi.is_interintra_used = 0;
3330
0
        INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
3331
0
    }
3332
3333
    // update the total number of candidates injected
3334
0
    (*candidate_total_cnt) = cand_total_cnt;
3335
3336
0
    return;
3337
0
}
3338
3339
static void inject_zz_backup_candidate(PictureControlSet* pcs, ModeDecisionContext* ctx,
3340
0
                                       uint32_t* candidate_total_cnt) {
3341
0
    ModeDecisionCandidate* cand_array      = ctx->fast_cand_array;
3342
0
    Mv                     best_pred_mv[2] = {{{0}}, {{0}}};
3343
0
    uint32_t               cand_total_cnt  = (*candidate_total_cnt);
3344
0
    cand_array[cand_total_cnt].drl_index   = 0;
3345
0
    svt_aom_choose_best_av1_mv_pred(ctx,
3346
0
                                    svt_get_ref_frame_type(REF_LIST_0, 0),
3347
0
                                    NEWMV,
3348
0
                                    (Mv){{0}},
3349
0
                                    (Mv){{0}},
3350
0
                                    &cand_array[cand_total_cnt].drl_index,
3351
0
                                    best_pred_mv);
3352
0
    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, (Mv){{0, 0}}, (Mv){{0, 0}}, 0)) {
3353
0
        ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
3354
0
        cand->block_mi.use_intrabc        = 0;
3355
0
        cand->skip_mode_allowed           = false;
3356
0
        cand->block_mi.mode               = NEWMV;
3357
0
        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
3358
0
        cand->block_mi.mv[0]              = (Mv){{0, 0}};
3359
0
        cand->block_mi.ref_frame[0]       = svt_get_ref_frame_type(REF_LIST_0, 0);
3360
0
        cand->block_mi.ref_frame[1]       = NONE_FRAME;
3361
0
        cand->transform_type[0]           = DCT_DCT;
3362
0
        cand->transform_type_uv           = DCT_DCT;
3363
0
        cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
3364
0
        cand->block_mi.is_interintra_used = 0;
3365
0
        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
3366
0
        cand->block_mi.num_proj_ref       = ctx->wm_sample_info[svt_get_ref_frame_type(REF_LIST_0, 0)].num;
3367
0
        INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
3368
        // update the total number of candidates injected
3369
0
        (*candidate_total_cnt) = cand_total_cnt;
3370
0
    }
3371
0
}
3372
3373
941k
int svt_av1_allow_palette(int allow_palette, BlockSize bsize) {
3374
941k
    assert(bsize < BLOCK_SIZES_ALL);
3375
941k
    return allow_palette && block_size_wide[bsize] <= 64 && block_size_high[bsize] <= 64 && bsize >= BLOCK_8X8;
3376
941k
}
3377
3378
void search_palette_luma(PictureControlSet* pcs, ModeDecisionContext* ctx, PaletteInfo* palette_cand,
3379
                         uint8_t* palette_size_array, uint32_t* tot_palette_cands);
3380
3381
0
static void inject_palette_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candidate_total_cnt) {
3382
0
    uint32_t               can_total_cnt      = *candidate_total_cnt;
3383
0
    ModeDecisionCandidate* cand_array         = ctx->fast_cand_array;
3384
0
    const TxSize           tx_size_uv         = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1);
3385
0
    uint32_t               tot_palette_cands  = 0;
3386
0
    PaletteInfo*           palette_cand_array = ctx->palette_cand_array;
3387
    // MD palette search
3388
0
    uint8_t* palette_size_array_0 = ctx->palette_size_array_0;
3389
3390
0
    search_palette_luma(pcs, ctx, palette_cand_array, palette_size_array_0, &tot_palette_cands);
3391
3392
0
    for (uint32_t cand_i = 0; cand_i < tot_palette_cands; ++cand_i) {
3393
0
        ModeDecisionCandidate* cand       = &cand_array[can_total_cnt];
3394
0
        cand->block_mi.is_interintra_used = 0;
3395
0
        cand->palette_size[0]             = palette_size_array_0[cand_i];
3396
        // Palette is not supported for chroma
3397
0
        cand->palette_size[1] = 0;
3398
0
        cand->palette_info    = &palette_cand_array[cand_i];
3399
0
        assert(palette_size_array_0[cand_i] < 9);
3400
        //to re check these fields
3401
0
        cand->skip_mode_allowed    = false;
3402
0
        cand->block_mi.mode        = DC_PRED;
3403
0
        cand->block_mi.use_intrabc = 0;
3404
3405
0
        cand->block_mi.filter_intra_mode         = FILTER_INTRA_MODES;
3406
0
        cand->block_mi.angle_delta[PLANE_TYPE_Y] = 0;
3407
        // Palette is not supported for chroma mode, so we can set the intra chroma mode to anything. To use palette
3408
        // for chroma, we must force DC_PRED to be used for the intra chroma mode
3409
0
        assert(cand_array[can_total_cnt].palette_size[1] == 0);
3410
0
        cand->block_mi.uv_mode = ctx->ind_uv_avail ? ctx->best_uv_mode[DC_PRED] : intra_luma_to_chroma[DC_PRED];
3411
0
        cand->block_mi.angle_delta[PLANE_TYPE_UV] = ctx->ind_uv_avail ? ctx->best_uv_angle[DC_PRED]
3412
0
                                                                      : cand->block_mi.angle_delta[PLANE_TYPE_Y];
3413
0
        cand->block_mi.cfl_alpha_signs            = 0;
3414
0
        cand->block_mi.cfl_alpha_idx              = 0;
3415
0
        cand->transform_type[0]                   = DCT_DCT;
3416
0
        cand->transform_type_uv                   = svt_aom_get_intra_uv_tx_type(
3417
0
            cand->block_mi.uv_mode, tx_size_uv, pcs->ppcs->frm_hdr.reduced_tx_set);
3418
0
        if (svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && cand->transform_type_uv != DCT_DCT) {
3419
0
            continue;
3420
0
        }
3421
0
        cand->block_mi.ref_frame[0] = INTRA_FRAME;
3422
0
        cand->block_mi.ref_frame[1] = NONE_FRAME;
3423
0
        cand->block_mi.motion_mode  = SIMPLE_TRANSLATION;
3424
0
        INC_MD_CAND_CNT(can_total_cnt, pcs->ppcs->max_can_count);
3425
0
    }
3426
3427
    // update the total number of candidates injected
3428
0
    (*candidate_total_cnt) = can_total_cnt;
3429
3430
0
    return;
3431
0
}
3432
3433
0
static INLINE void eliminate_candidate_based_on_pme_me_results(ModeDecisionContext* ctx, uint8_t* dc_cand_only_flag) {
3434
0
    if (ctx->md_pme_dist != (uint32_t)~0 || ctx->md_me_dist != (uint32_t)~0) {
3435
0
        uint32_t th = ctx->cand_reduction_ctrls.cand_elimination_ctrls.dc_only_th;
3436
0
        th *= ctx->blk_geom->bheight * ctx->blk_geom->bwidth;
3437
0
        const uint32_t best_me_distotion = MIN(ctx->md_pme_dist, ctx->md_me_dist);
3438
0
        if (best_me_distotion < th) {
3439
0
            *dc_cand_only_flag = 1;
3440
0
        }
3441
0
    }
3442
0
}
3443
3444
static bool valid_ref_frame_type(MvReferenceFrame rf[2], const MvReferenceFrame ref_frame_type_arr[],
3445
0
                                 uint8_t tot_ref_frame_types) {
3446
    // INTRA_FRAME is added in candidates sometimes, skip validation
3447
0
    if (rf[0] == INTRA_FRAME) {
3448
0
        return true;
3449
0
    }
3450
3451
0
    for (uint8_t i = 0; i < tot_ref_frame_types; i++) {
3452
0
        MvReferenceFrame rf_in_arr[2];
3453
0
        av1_set_ref_frame(rf_in_arr, ref_frame_type_arr[i]);
3454
0
        if (rf[0] == rf_in_arr[0] && rf[1] == rf_in_arr[1]) {
3455
0
            return true;
3456
0
        }
3457
0
    }
3458
0
    return false;
3459
0
}
3460
3461
// refer to inject_zz_backup_candidate, but use BWD ref instead of LAST
3462
static void inject_sframe_backup_candidate(PictureControlSet* pcs, ModeDecisionContext* ctx,
3463
0
                                           uint32_t* candidate_total_cnt) {
3464
0
    ModeDecisionCandidate* cand_array      = ctx->fast_cand_array;
3465
0
    Mv                     best_pred_mv[2] = {{{0}}, {{0}}};
3466
0
    uint32_t               cand_total_cnt  = (*candidate_total_cnt);
3467
0
    cand_array[cand_total_cnt].drl_index   = 0;
3468
0
    svt_aom_choose_best_av1_mv_pred(ctx,
3469
0
                                    svt_get_ref_frame_type(REF_LIST_1, 0),
3470
0
                                    NEWMV,
3471
0
                                    (Mv){{0}},
3472
0
                                    (Mv){{0}},
3473
0
                                    &cand_array[cand_total_cnt].drl_index,
3474
0
                                    best_pred_mv);
3475
0
    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, (Mv){{0, 0}}, (Mv){{0, 0}}, 0)) {
3476
0
        ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
3477
0
        cand->block_mi.use_intrabc        = 0;
3478
0
        cand->skip_mode_allowed           = false;
3479
0
        cand->block_mi.mode               = NEWMV;
3480
0
        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
3481
0
        cand->block_mi.mv[0]              = (Mv){{0, 0}};
3482
0
        cand->block_mi.ref_frame[0]       = svt_get_ref_frame_type(REF_LIST_1, 0);
3483
0
        cand->block_mi.ref_frame[1]       = NONE_FRAME;
3484
0
        cand->transform_type[0]           = DCT_DCT;
3485
0
        cand->transform_type_uv           = DCT_DCT;
3486
0
        cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
3487
0
        cand->block_mi.is_interintra_used = 0;
3488
0
        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
3489
0
        cand->block_mi.num_proj_ref       = ctx->wm_sample_info[svt_get_ref_frame_type(REF_LIST_1, 0)].num;
3490
0
        INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
3491
        // update the total number of candidates injected
3492
0
        (*candidate_total_cnt) = cand_total_cnt;
3493
0
    }
3494
0
}
3495
3496
// in MD stage 0, candidates are injected by different tools, but for S-Frame in RA mode
3497
// the ref frame types in ref_list0 has be pruned in PD for the reversed direction of ref MVs
3498
// here to check and reject the candidates if mismatches the available frame types array
3499
0
static uint32_t reject_candidate_sframe(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t cand_total_cnt) {
3500
0
    for (uint32_t i = 0; i < cand_total_cnt;) {
3501
0
        if (!valid_ref_frame_type(
3502
0
                ctx->fast_cand_array[i].block_mi.ref_frame, ctx->ref_frame_type_arr, ctx->tot_ref_frame_types)) {
3503
0
            for (uint32_t j = i; j < cand_total_cnt; j++) {
3504
0
                memcpy(&ctx->fast_cand_array[j], &ctx->fast_cand_array[j + 1], sizeof(ModeDecisionCandidate));
3505
0
            }
3506
0
            cand_total_cnt--;
3507
0
            continue;
3508
0
        }
3509
0
        i++;
3510
0
    }
3511
    // zero candidate in fast cand array risks in md stage 0, add a candidate from ref list1 as backup
3512
0
    if (cand_total_cnt == 0) {
3513
0
        inject_sframe_backup_candidate(pcs, ctx, &cand_total_cnt);
3514
0
    }
3515
0
    assert(cand_total_cnt > 0);
3516
0
    return cand_total_cnt;
3517
0
}
3518
3519
EbErrorType generate_md_stage_0_cand_light_pd0(ModeDecisionContext* ctx, uint32_t* candidate_total_count_ptr,
3520
7.29k
                                               PictureControlSet* pcs) {
3521
7.29k
    const SliceType slice_type     = pcs->slice_type;
3522
7.29k
    uint32_t        cand_total_cnt = 0;
3523
    //----------------------
3524
    // Intra
3525
7.29k
    if (ctx->blk_geom->sq_size < 128 && ctx->intra_ctrls.enable_intra) {
3526
7.29k
        inject_intra_candidates_light_pd0(pcs, ctx, &cand_total_cnt);
3527
7.29k
    }
3528
3529
7.29k
    if (slice_type != I_SLICE) {
3530
0
        inject_inter_candidates_light_pd0(pcs, ctx, &cand_total_cnt);
3531
0
    }
3532
3533
    // For I_SLICE, DC is always injected, and therefore there is no a risk of no candidates @ md_stage_0()
3534
    // For non I_SLICE, there is a risk of no candidates @ md_stage_0() because of the INTER candidates pruning techniques
3535
7.29k
    if (slice_type != I_SLICE && cand_total_cnt == 0) {
3536
0
        inject_zz_backup_candidate(pcs, ctx, &cand_total_cnt);
3537
0
    }
3538
3539
7.29k
    if (pcs->ppcs->sframe_ref_pruned) {
3540
0
        cand_total_cnt = reject_candidate_sframe(pcs, ctx, cand_total_cnt);
3541
0
    }
3542
3543
7.29k
    *candidate_total_count_ptr = cand_total_cnt;
3544
3545
7.29k
    return EB_ErrorNone;
3546
7.29k
}
3547
3548
/*
3549
   generate candidates for light pd1
3550
*/
3551
void generate_md_stage_0_cand_light_pd1(ModeDecisionContext* ctx, uint32_t* candidate_total_count_ptr,
3552
0
                                        PictureControlSet* pcs) {
3553
0
    const SliceType slice_type     = pcs->slice_type;
3554
0
    uint32_t        cand_total_cnt = 0;
3555
    // Reset duplicates variables
3556
0
    ctx->injected_mv_count = 0;
3557
0
    ctx->inject_new_me     = 1;
3558
0
    if (slice_type != I_SLICE) {
3559
0
        inject_inter_candidates_light_pd1(pcs, ctx, &cand_total_cnt);
3560
0
    }
3561
    //----------------------
3562
    // Intra
3563
0
    if (ctx->intra_ctrls.enable_intra && ctx->blk_geom->sq_size < 128) {
3564
0
        uint8_t dc_cand_only_flag = ctx->intra_ctrls.intra_mode_end == DC_PRED || is_dc_only_safe(pcs, ctx);
3565
0
        if (ctx->cand_reduction_ctrls.cand_elimination_ctrls.enabled && !dc_cand_only_flag &&
3566
0
            ctx->md_me_dist != (uint32_t)~0) {
3567
0
            uint32_t th = ctx->cand_reduction_ctrls.cand_elimination_ctrls.dc_only_th;
3568
0
            th *= (ctx->blk_geom->bheight * ctx->blk_geom->bwidth);
3569
0
            if (ctx->md_me_dist < th) {
3570
0
                dc_cand_only_flag = 1;
3571
0
            }
3572
0
        }
3573
0
        inject_intra_candidates(pcs, ctx, dc_cand_only_flag, &cand_total_cnt);
3574
0
    }
3575
3576
    // For I_SLICE, DC is always injected, and therefore there is no a risk of no candidates @ md_syage_0()
3577
    // For non I_SLICE, there is a risk of no candidates @ md_stage_0() because of the INTER candidates pruning techniques
3578
0
    if (slice_type != I_SLICE && cand_total_cnt == 0) {
3579
0
        inject_zz_backup_candidate(pcs, ctx, &cand_total_cnt);
3580
0
    }
3581
3582
0
    if (pcs->ppcs->sframe_ref_pruned) {
3583
0
        cand_total_cnt = reject_candidate_sframe(pcs, ctx, cand_total_cnt);
3584
0
    }
3585
3586
0
    *candidate_total_count_ptr = cand_total_cnt;
3587
0
}
3588
3589
EbErrorType generate_md_stage_0_cand(PictureControlSet* pcs, ModeDecisionContext* ctx, const PC_TREE* const pc_tree,
3590
224k
                                     uint32_t* candidate_total_count_ptr) {
3591
224k
    const SequenceControlSet* scs            = pcs->scs;
3592
224k
    const SliceType           slice_type     = pcs->slice_type;
3593
224k
    uint32_t                  cand_total_cnt = 0;
3594
    // Reset duplicates variables
3595
224k
    ctx->injected_mv_count = 0;
3596
224k
    ctx->inject_new_me     = 1;
3597
224k
    ctx->inject_new_pme    = 1;
3598
    //----------------------
3599
    // Intra
3600
224k
    if (ctx->intra_ctrls.enable_intra) {
3601
224k
        uint8_t dc_cand_only_flag = ctx->intra_ctrls.intra_mode_end == DC_PRED || is_dc_only_safe(pcs, ctx);
3602
224k
        if (ctx->cand_reduction_ctrls.cand_elimination_ctrls.enabled) {
3603
0
            eliminate_candidate_based_on_pme_me_results(ctx, &dc_cand_only_flag);
3604
0
        }
3605
226k
        if (ctx->blk_geom->sq_size < 128) {
3606
226k
            inject_intra_candidates(pcs, ctx, dc_cand_only_flag, &cand_total_cnt);
3607
226k
        }
3608
224k
        if (ctx->filter_intra_ctrls.enabled && svt_aom_filter_intra_allowed_bsize(ctx->blk_geom->bsize)) {
3609
0
            inject_filter_intra_candidates(pcs, ctx, &cand_total_cnt);
3610
0
        }
3611
3612
224k
        bool eval_intrabc = true;
3613
3614
224k
        if (svt_av1_allow_palette(ctx->md_palette_level, ctx->blk_geom->bsize)) {
3615
0
            uint32_t palette_start_cnt = cand_total_cnt;
3616
3617
0
            inject_palette_candidates(pcs, ctx, &cand_total_cnt);
3618
3619
0
            eval_intrabc = cand_total_cnt > palette_start_cnt;
3620
0
        }
3621
3622
224k
        if (ctx->md_allow_intrabc) {
3623
0
            if (!pcs->ppcs->intrabc_ctrls.palette_hint || eval_intrabc) {
3624
0
                bool do_intra_bc = true;
3625
3626
0
                if (ctx->shape == PART_N) {
3627
0
                    if (pcs->ppcs->intrabc_ctrls.b4_parent_gating && ctx->blk_geom->sq_size == 4 &&
3628
0
                        pc_tree->parent->tested_blk[PART_N][0]) {
3629
0
                        if (pc_tree->parent->block_data[PART_N][0]->block_mi.use_intrabc == 0) {
3630
0
                            do_intra_bc = false;
3631
0
                        }
3632
0
                    }
3633
0
                } else {
3634
0
                    if (pcs->ppcs->intrabc_ctrls.nsq_parent_gating && pc_tree->tested_blk[PART_N][0]) {
3635
0
                        if (pc_tree->block_data[PART_N][0]->block_mi.use_intrabc == 0) {
3636
0
                            do_intra_bc = false;
3637
0
                        }
3638
0
                    }
3639
0
                }
3640
3641
0
                if (do_intra_bc) {
3642
0
                    inject_intra_bc_candidates(pcs, ctx, scs, ctx->blk_ptr, &cand_total_cnt);
3643
0
                }
3644
0
            }
3645
0
        }
3646
224k
    }
3647
224k
    if (slice_type != I_SLICE) {
3648
0
        svt_aom_inject_inter_candidates(pcs, ctx, &cand_total_cnt);
3649
0
    }
3650
    // For I_SLICE, DC is always injected, and therefore there is no a risk of no candidates @ md_syage_0()
3651
    // For non I_SLICE, there is a risk of no candidates @ md_stage_0() because of the INTER candidates pruning techniques
3652
224k
    if (slice_type != I_SLICE && cand_total_cnt == 0) {
3653
0
        inject_zz_backup_candidate(pcs, ctx, &cand_total_cnt);
3654
0
    }
3655
3656
224k
    if (pcs->ppcs->sframe_ref_pruned) {
3657
0
        cand_total_cnt = reject_candidate_sframe(pcs, ctx, cand_total_cnt);
3658
0
    }
3659
3660
224k
    *candidate_total_count_ptr = cand_total_cnt;
3661
3662
224k
    memset(ctx->md_stage_0_count, 0, CAND_CLASS_TOTAL * sizeof(uint32_t));
3663
224k
    bool merge_inter_cands = 0;
3664
228k
    if (ctx->nic_ctrls.pruning_ctrls.merge_inter_cands_mult != (uint8_t)~0) {
3665
228k
        uint16_t th = (ctx->nic_ctrls.pruning_ctrls.merge_inter_cands_mult * (63 - pcs->scs->static_config.qp)) >> 1;
3666
228k
        if ((MIN(ctx->md_me_dist, ctx->md_pme_dist) / (ctx->blk_geom->bwidth * ctx->blk_geom->bheight)) < th) {
3667
217k
            merge_inter_cands = 1;
3668
217k
        }
3669
228k
    }
3670
3671
452k
    for (uint32_t cand_i = 0; cand_i < cand_total_cnt; cand_i++) {
3672
228k
        ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_i];
3673
228k
        if (is_intra_mode(cand->block_mi.mode)) {
3674
            // Intra prediction
3675
228k
            if ((cand->palette_info == NULL || cand->palette_size[0] == 0) && cand->block_mi.use_intrabc == 0) {
3676
227k
                cand->cand_class = CAND_CLASS_0;
3677
227k
                ctx->md_stage_0_count[CAND_CLASS_0]++;
3678
227k
            } else if (cand->block_mi.use_intrabc == 0) {
3679
                // Palette Prediction
3680
0
                cand->cand_class = CAND_CLASS_3;
3681
0
                ctx->md_stage_0_count[CAND_CLASS_3]++;
3682
120
            } else {
3683
                // Intra-BC Prediction
3684
120
                cand->cand_class = CAND_CLASS_4;
3685
120
                ctx->md_stage_0_count[CAND_CLASS_4]++;
3686
120
            }
3687
227k
        } else { // INTER
3688
189
            if (cand->block_mi.mode == NEWMV || cand->block_mi.mode == NEW_NEWMV || merge_inter_cands) {
3689
                // MV Prediction
3690
0
                cand->cand_class = CAND_CLASS_2;
3691
0
                ctx->md_stage_0_count[CAND_CLASS_2]++;
3692
189
            } else {
3693
                //MVP Prediction
3694
189
                cand->cand_class = CAND_CLASS_1;
3695
189
                ctx->md_stage_0_count[CAND_CLASS_1]++;
3696
189
            }
3697
189
        }
3698
228k
    }
3699
224k
    return EB_ErrorNone;
3700
224k
}
3701
3702
uint8_t av1_drl_ctx(const CandidateMv* ref_mv_stack, int32_t ref_idx);
3703
3704
/***************************************
3705
* Update symbols for light-PD1 path
3706
***************************************/
3707
void svt_aom_product_full_mode_decision_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx,
3708
0
                                                  ModeDecisionCandidateBuffer* cand_bf) {
3709
0
    BlkStruct*             blk_ptr = ctx->blk_ptr;
3710
0
    ModeDecisionCandidate* cand    = cand_bf->cand;
3711
0
    blk_ptr->total_rate            = cand_bf->total_rate;
3712
3713
    // Set common signals (INTER/INTRA)
3714
0
    svt_memcpy(&blk_ptr->block_mi, &cand->block_mi, sizeof(BlockModeInfo));
3715
0
    blk_ptr->palette_size[0] = blk_ptr->palette_size[1] = 0;
3716
3717
    // Set INTER mode signals
3718
0
    if (is_inter_mode(cand->block_mi.mode)) {
3719
0
        blk_ptr->drl_index = cand->drl_index;
3720
0
        assert(IMPLIES(
3721
0
            is_inter_compound_mode(cand->block_mi.mode) && blk_ptr->block_mi.interinter_comp.type == COMPOUND_AVERAGE,
3722
0
            (blk_ptr->block_mi.comp_group_idx == 0 && blk_ptr->block_mi.compound_idx == 1)));
3723
3724
        // Set MVs
3725
0
        blk_ptr->predmv[0].as_int = cand->pred_mv[0].as_int;
3726
0
        if (has_second_ref(&blk_ptr->block_mi)) {
3727
0
            blk_ptr->predmv[1].as_int = cand->pred_mv[1].as_int;
3728
0
        }
3729
3730
0
        const int8_t ref_frame_type = av1_ref_frame_type(blk_ptr->block_mi.ref_frame);
3731
        // Store winning inter_mode_ctx in blk to avoid storing for all ref frames for EC
3732
0
        blk_ptr->inter_mode_ctx = ctx->inter_mode_ctx[ref_frame_type];
3733
        // Store drl_ctx in blk to avoid storing final_ref_mv_stack for EC
3734
0
        if (blk_ptr->block_mi.mode == NEWMV || blk_ptr->block_mi.mode == NEW_NEWMV) {
3735
0
            for (uint8_t idx = 0; idx < 2; ++idx) {
3736
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
3737
0
                    blk_ptr->drl_ctx[idx] = av1_drl_ctx(ctx->ref_mv_stack[ref_frame_type], idx);
3738
0
                } else {
3739
0
                    blk_ptr->drl_ctx[idx] = -1;
3740
0
                }
3741
0
            }
3742
0
        }
3743
3744
0
        if (have_nearmv_in_inter_mode(blk_ptr->block_mi.mode)) {
3745
            // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
3746
0
            for (uint8_t idx = 1; idx < 3; ++idx) {
3747
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
3748
0
                    blk_ptr->drl_ctx_near[idx - 1] = av1_drl_ctx(ctx->ref_mv_stack[ref_frame_type], idx);
3749
0
                } else {
3750
0
                    blk_ptr->drl_ctx_near[idx - 1] = -1;
3751
0
                }
3752
0
            }
3753
0
        }
3754
0
    } else { // Set INTRA mode signals
3755
0
        cand->skip_mode_allowed = false;
3756
0
    }
3757
    // Set TX and coeff-related data
3758
0
    blk_ptr->block_has_coeff   = ((cand_bf->block_has_coeff) > 0) ? true : false;
3759
0
    ctx->blk_ptr->cnt_nz_coeff = cand_bf->cnt_nz_coeff;
3760
3761
    // If skip_mode is allowed, and block has no coeffs, use skip_mode
3762
0
    if (cand->skip_mode_allowed == true) {
3763
0
        blk_ptr->block_mi.skip_mode |= !blk_ptr->block_has_coeff;
3764
0
    }
3765
3766
0
    assert(IMPLIES(pcs->ppcs->frm_hdr.interpolation_filter == SWITCHABLE && blk_ptr->block_mi.skip_mode,
3767
0
                   cand->block_mi.interp_filters == 0));
3768
0
    if (blk_ptr->block_mi.skip_mode) {
3769
0
        blk_ptr->block_has_coeff = 0;
3770
0
        cand_bf->y_has_coeff     = 0;
3771
0
        cand_bf->u_has_coeff     = 0;
3772
0
        cand_bf->v_has_coeff     = 0;
3773
0
    }
3774
0
    blk_ptr->block_mi.skip = !blk_ptr->block_has_coeff;
3775
3776
0
    const uint16_t txb_itr       = 0;
3777
0
    const int32_t  txb_1d_offset = 0, txb_1d_offset_uv = 0;
3778
0
    blk_ptr->y_has_coeff         = cand_bf->y_has_coeff;
3779
0
    blk_ptr->u_has_coeff         = cand_bf->u_has_coeff;
3780
0
    blk_ptr->v_has_coeff         = cand_bf->v_has_coeff;
3781
0
    blk_ptr->tx_type[txb_itr]    = cand->transform_type[txb_itr];
3782
0
    blk_ptr->tx_type_uv          = cand->transform_type_uv;
3783
0
    blk_ptr->quant_dc.y[txb_itr] = cand_bf->quant_dc.y[txb_itr];
3784
0
    blk_ptr->quant_dc.u[txb_itr] = cand_bf->quant_dc.u[txb_itr];
3785
0
    blk_ptr->quant_dc.v[txb_itr] = cand_bf->quant_dc.v[txb_itr];
3786
3787
0
    if (ctx->bypass_encdec) {
3788
0
        blk_ptr->eob.y[txb_itr] = cand_bf->eob.y[txb_itr];
3789
0
        blk_ptr->eob.u[txb_itr] = cand_bf->eob.u[txb_itr];
3790
0
        blk_ptr->eob.v[txb_itr] = cand_bf->eob.v[txb_itr];
3791
0
        int32_t* src_ptr;
3792
0
        int32_t* dst_ptr;
3793
3794
0
        const TxSize tx_size   = tx_depth_to_tx_size[blk_ptr->block_mi.tx_depth][ctx->blk_geom->bsize];
3795
0
        const int    tx_width  = tx_size_wide[tx_size];
3796
0
        const int    tx_height = tx_size_high[tx_size];
3797
3798
        // only one TX unit, so no need to bitmask
3799
0
        if (blk_ptr->y_has_coeff) {
3800
0
            src_ptr = &(((int32_t*)cand_bf->quant->y_buffer)[txb_1d_offset]);
3801
0
            dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->y_buffer) + ctx->coded_area_sb;
3802
0
            svt_memcpy(dst_ptr, src_ptr, tx_width * tx_height * sizeof(int32_t));
3803
0
        }
3804
0
        ctx->coded_area_sb += tx_width * tx_height;
3805
3806
0
        const TxSize tx_size_uv   = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1);
3807
0
        const int    tx_width_uv  = tx_size_wide[tx_size_uv];
3808
0
        const int    tx_height_uv = tx_size_high[tx_size_uv];
3809
        // Cb
3810
        // only one TX unit, so no need to bitmask
3811
0
        if (blk_ptr->u_has_coeff) {
3812
0
            src_ptr = &(((int32_t*)cand_bf->quant->u_buffer)[txb_1d_offset_uv]);
3813
0
            dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->u_buffer) +
3814
0
                ctx->coded_area_sb_uv;
3815
0
            svt_memcpy(dst_ptr, src_ptr, tx_width_uv * tx_height_uv * sizeof(int32_t));
3816
0
        }
3817
3818
        // Cr
3819
        // only one TX unit, so no need to bitmask
3820
0
        if (blk_ptr->v_has_coeff) {
3821
0
            src_ptr = &(((int32_t*)cand_bf->quant->v_buffer)[txb_1d_offset_uv]);
3822
0
            dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->v_buffer) +
3823
0
                ctx->coded_area_sb_uv;
3824
0
            svt_memcpy(dst_ptr, src_ptr, tx_width_uv * tx_height_uv * sizeof(int32_t));
3825
0
        }
3826
0
        ctx->coded_area_sb_uv += tx_width_uv * tx_height_uv;
3827
0
    }
3828
0
}
3829
3830
0
static INLINE double derive_ssim_threshold_factor_for_full_md(SequenceControlSet* scs) {
3831
0
    return scs->input_resolution >= INPUT_SIZE_1080p_RANGE ? 1.02 : 1.03;
3832
0
}
3833
3834
/***************************************
3835
* Full Mode Decision
3836
***************************************/
3837
uint32_t svt_aom_product_full_mode_decision(PictureControlSet* pcs, ModeDecisionContext* ctx,
3838
                                            ModeDecisionCandidateBuffer** buffer_ptr_array,
3839
230k
                                            uint32_t candidate_total_count, uint32_t* best_candidate_index_array) {
3840
230k
    SequenceControlSet* scs                = pcs->scs;
3841
230k
    BlkStruct*          blk_ptr            = ctx->blk_ptr;
3842
230k
    uint32_t            lowest_cost_index  = best_candidate_index_array[0];
3843
230k
    const bool          use_ssim_full_cost = ctx->tune_ssim_level > SSIM_LVL_0 ? true : false;
3844
3845
    // Find the candidate with the lowest cost
3846
    // Only need to sort if have multiple candidates
3847
230k
    if (ctx->md_stage_3_total_count > 1) {
3848
0
        if (use_ssim_full_cost) {
3849
            // Pass one: find candidate with the lowest SSD cost
3850
0
            uint64_t ssd_lowest_cost = 0xFFFFFFFFFFFFFFFFull;
3851
0
            for (uint32_t i = 0; i < candidate_total_count; ++i) {
3852
0
                uint32_t cand_index = best_candidate_index_array[i];
3853
0
                uint64_t cost       = *(buffer_ptr_array[cand_index]->full_cost);
3854
0
                if (cost < ssd_lowest_cost) {
3855
0
                    lowest_cost_index = cand_index;
3856
0
                    ssd_lowest_cost   = cost;
3857
0
                }
3858
0
            }
3859
3860
            // Pass two: among the candidates with SSD cost not greater than the threshold, find the one with the lowest SSIM cost
3861
0
            const double   threshold_factor   = derive_ssim_threshold_factor_for_full_md(scs);
3862
0
            const uint64_t ssd_cost_threshold = (uint64_t)(threshold_factor * ssd_lowest_cost);
3863
0
            uint64_t       ssim_lowest_cost   = 0xFFFFFFFFFFFFFFFFull;
3864
0
            for (uint32_t i = 0; i < candidate_total_count; ++i) {
3865
0
                uint32_t cand_index = best_candidate_index_array[i];
3866
3867
0
                uint64_t ssim_cost = *(buffer_ptr_array[cand_index]->full_cost_ssim);
3868
0
                uint64_t ssd_cost  = *(buffer_ptr_array[cand_index]->full_cost);
3869
0
                if (ssim_cost < ssim_lowest_cost) {
3870
0
                    if (ssd_cost <= ssd_cost_threshold) {
3871
0
                        lowest_cost_index = cand_index;
3872
0
                        ssim_lowest_cost  = ssim_cost;
3873
0
                        ssd_lowest_cost   = ssd_cost;
3874
0
                    }
3875
0
                } else if (ssim_cost == ssim_lowest_cost) {
3876
                    // if two candidates have the same ssim cost, choose the one with lower ssd cost
3877
0
                    if (ssd_cost < ssd_lowest_cost) {
3878
0
                        lowest_cost_index = cand_index;
3879
0
                        ssd_lowest_cost   = ssd_cost;
3880
0
                    }
3881
0
                }
3882
0
            }
3883
0
        } else { // fallback to SSD based RD cost
3884
0
            uint64_t lowest_cost = 0xFFFFFFFFFFFFFFFFull;
3885
0
            for (uint32_t i = 0; i < candidate_total_count; ++i) {
3886
0
                uint32_t cand_index = best_candidate_index_array[i];
3887
3888
0
                uint64_t cost = *(buffer_ptr_array[cand_index]->full_cost);
3889
0
                if (scs->vq_ctrls.sharpness_ctrls.unipred_bias && pcs->ppcs->is_noise_level &&
3890
0
                    is_inter_singleref_mode(buffer_ptr_array[cand_index]->cand->block_mi.mode)) {
3891
0
                    cost = (cost * uni_psy_bias[pcs->ppcs->picture_qp]) / 100;
3892
0
                }
3893
3894
0
                if (cost < lowest_cost) {
3895
0
                    lowest_cost_index = cand_index;
3896
0
                    lowest_cost       = cost;
3897
0
                }
3898
0
            }
3899
0
        }
3900
0
    }
3901
230k
    ModeDecisionCandidateBuffer* cand_bf = buffer_ptr_array[lowest_cost_index];
3902
230k
    ModeDecisionCandidate*       cand    = cand_bf->cand;
3903
230k
    blk_ptr->total_rate                  = cand_bf->total_rate;
3904
230k
    if (!(ctx->pd_pass == PD_PASS_1 && ctx->fixed_partition)) {
3905
        // When lambda tuning is on, lambda of each block is set separately, however at interdepth decision the sb lambda is used
3906
222k
        uint32_t full_lambda = ctx->hbd_md ? ctx->full_sb_lambda_md[EB_10_BIT_MD] : ctx->full_sb_lambda_md[EB_8_BIT_MD];
3907
222k
        ctx->blk_ptr->cost   = RDCOST(full_lambda, cand_bf->total_rate, cand_bf->full_dist);
3908
222k
        ctx->blk_ptr->full_dist = cand_bf->full_dist;
3909
222k
    }
3910
3911
    // Set common signals (INTER/INTRA)
3912
230k
    svt_memcpy(&blk_ptr->block_mi, &cand->block_mi, sizeof(BlockModeInfo));
3913
    // Set INTER mode signals
3914
    // INTER signals set first b/c INTER shuts Palette, so INTRA must overwrite if Palette + intrabc is used
3915
230k
    if (is_inter_block(&blk_ptr->block_mi)) {
3916
0
        blk_ptr->drl_index = cand->drl_index;
3917
0
        assert(IMPLIES(
3918
0
            is_inter_compound_mode(cand->block_mi.mode) && blk_ptr->block_mi.interinter_comp.type == COMPOUND_AVERAGE,
3919
0
            (blk_ptr->block_mi.comp_group_idx == 0 && blk_ptr->block_mi.compound_idx == 1)));
3920
3921
0
        blk_ptr->palette_size[0] = blk_ptr->palette_size[1] = 0;
3922
        // Set MVs
3923
0
        blk_ptr->predmv[0].as_int = cand->pred_mv[0].as_int;
3924
0
        if (has_second_ref(&blk_ptr->block_mi)) {
3925
0
            blk_ptr->predmv[1].as_int = cand->pred_mv[1].as_int;
3926
0
        }
3927
0
        if (blk_ptr->block_mi.motion_mode == WARPED_CAUSAL ||
3928
0
            (cand->block_mi.mode == GLOBALMV || cand->block_mi.mode == GLOBAL_GLOBALMV)) {
3929
0
            svt_memcpy(&ctx->blk_ptr->wm_params_l0, &cand->wm_params_l0, sizeof(WarpedMotionParams));
3930
0
            svt_memcpy(&ctx->blk_ptr->wm_params_l1, &cand->wm_params_l1, sizeof(WarpedMotionParams));
3931
0
        }
3932
3933
0
        if (ctx->pd_pass == PD_PASS_1) {
3934
0
            const int8_t ref_frame_type = av1_ref_frame_type(blk_ptr->block_mi.ref_frame);
3935
            // Store winning inter_mode_ctx in blk to avoid storing for all ref frames for EC
3936
0
            blk_ptr->inter_mode_ctx = ctx->inter_mode_ctx[ref_frame_type];
3937
            // Store drl_ctx in blk to avoid storing final_ref_mv_stack for EC
3938
0
            if (blk_ptr->block_mi.mode == NEWMV || blk_ptr->block_mi.mode == NEW_NEWMV) {
3939
0
                for (uint8_t idx = 0; idx < 2; ++idx) {
3940
0
                    if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
3941
0
                        blk_ptr->drl_ctx[idx] = av1_drl_ctx(ctx->ref_mv_stack[ref_frame_type], idx);
3942
0
                    } else {
3943
0
                        blk_ptr->drl_ctx[idx] = -1;
3944
0
                    }
3945
0
                }
3946
0
            }
3947
3948
0
            if (have_nearmv_in_inter_mode(blk_ptr->block_mi.mode)) {
3949
                // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
3950
0
                for (uint8_t idx = 1; idx < 3; ++idx) {
3951
0
                    if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
3952
0
                        blk_ptr->drl_ctx_near[idx - 1] = av1_drl_ctx(ctx->ref_mv_stack[ref_frame_type], idx);
3953
0
                    } else {
3954
0
                        blk_ptr->drl_ctx_near[idx - 1] = -1;
3955
0
                    }
3956
0
                }
3957
0
            }
3958
0
        }
3959
0
    }
3960
3961
    // Set INTRA mode signals
3962
230k
    if (is_intra_mode(blk_ptr->block_mi.mode)) {
3963
229k
        if (!cand->palette_info) {
3964
229k
            blk_ptr->palette_size[0] = blk_ptr->palette_size[1] = 0;
3965
18.4E
        } else if (svt_av1_allow_palette(ctx->md_palette_level, ctx->blk_geom->bsize)) {
3966
0
            memcpy(&blk_ptr->palette_info->pmi, &cand->palette_info->pmi, sizeof(PaletteModeInfo));
3967
0
            memcpy(blk_ptr->palette_info->color_idx_map, cand->palette_info->color_idx_map, MAX_PALETTE_SQUARE);
3968
0
            blk_ptr->palette_size[0] = cand->palette_size[0];
3969
0
            blk_ptr->palette_size[1] = cand->palette_size[1];
3970
0
        }
3971
3972
229k
        if (blk_ptr->block_mi.use_intrabc == 0) {
3973
229k
            cand->skip_mode_allowed = false;
3974
229k
        }
3975
229k
    }
3976
3977
    // Set TX and coeff-related data
3978
230k
    blk_ptr->block_has_coeff   = ((cand_bf->block_has_coeff) > 0) ? true : false;
3979
230k
    ctx->blk_ptr->cnt_nz_coeff = cand_bf->cnt_nz_coeff;
3980
3981
    // If skip_mode is allowed, and block has no coeffs, use skip_mode
3982
230k
    if (cand->skip_mode_allowed == true) {
3983
0
        blk_ptr->block_mi.skip_mode |= !blk_ptr->block_has_coeff;
3984
0
    }
3985
3986
230k
    assert(IMPLIES(pcs->ppcs->frm_hdr.interpolation_filter == SWITCHABLE && blk_ptr->block_mi.skip_mode,
3987
230k
                   cand->block_mi.interp_filters == 0));
3988
230k
    if (blk_ptr->block_mi.skip_mode) {
3989
0
        blk_ptr->block_has_coeff = 0;
3990
0
        cand_bf->y_has_coeff     = 0;
3991
0
        cand_bf->u_has_coeff     = 0;
3992
0
        cand_bf->v_has_coeff     = 0;
3993
0
    }
3994
3995
230k
    blk_ptr->block_mi.skip = !blk_ptr->block_has_coeff;
3996
230k
    blk_ptr->y_has_coeff   = cand_bf->y_has_coeff;
3997
230k
    blk_ptr->u_has_coeff   = cand_bf->u_has_coeff;
3998
230k
    blk_ptr->v_has_coeff   = cand_bf->v_has_coeff;
3999
230k
    svt_memcpy(blk_ptr->tx_type, cand->transform_type, sizeof(TxType) * MAX_TXB_COUNT);
4000
230k
    blk_ptr->tx_type_uv = cand->transform_type_uv;
4001
230k
    svt_memcpy(&blk_ptr->quant_dc, &cand_bf->quant_dc, sizeof(QuantDcData));
4002
230k
    svt_memcpy(&blk_ptr->eob, &cand_bf->eob, sizeof(EobData));
4003
4004
    // If bypassing EncDec, save recon/coeff
4005
230k
    if (ctx->bypass_encdec && ctx->pd_pass == PD_PASS_1) {
4006
117k
        const uint16_t tu_total_count = tx_blocks_per_depth[ctx->blk_geom->bsize][blk_ptr->block_mi.tx_depth];
4007
117k
        int32_t        txb_1d_offset = 0, txb_1d_offset_uv = 0;
4008
117k
        const TxSize   tx_size      = tx_depth_to_tx_size[blk_ptr->block_mi.tx_depth][ctx->blk_geom->bsize];
4009
117k
        const int      tx_width     = tx_size_wide[tx_size];
4010
117k
        const int      tx_height    = tx_size_high[tx_size];
4011
117k
        const TxSize   tx_size_uv   = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1);
4012
117k
        const int      tx_width_uv  = tx_size_wide[tx_size_uv];
4013
117k
        const int      tx_height_uv = tx_size_high[tx_size_uv];
4014
556k
        for (uint16_t txb_itr = 0; txb_itr < tu_total_count; txb_itr++) {
4015
438k
            const bool uv_pass = (blk_ptr->block_mi.tx_depth == 0 || txb_itr == 0);
4016
4017
438k
            int32_t* src_ptr = &(((int32_t*)cand_bf->quant->y_buffer)[txb_1d_offset]);
4018
438k
            int32_t* dst_ptr = &(((int32_t*)ctx->blk_ptr->coeff_tmp->y_buffer)[txb_1d_offset]);
4019
4020
438k
            if (ctx->fixed_partition) {
4021
7.29k
                dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->y_buffer) +
4022
7.29k
                    ctx->coded_area_sb;
4023
7.29k
                ctx->coded_area_sb += tx_width * tx_height;
4024
7.29k
            }
4025
4026
438k
            if (blk_ptr->y_has_coeff & (1 << txb_itr)) {
4027
5.28k
                svt_memcpy(dst_ptr, src_ptr, tx_width * tx_height * sizeof(int32_t));
4028
5.28k
            }
4029
4030
438k
            txb_1d_offset += tx_width * tx_height;
4031
4032
438k
            if (ctx->has_uv && uv_pass) {
4033
                // Cb
4034
117k
                src_ptr = &(((int32_t*)cand_bf->quant->u_buffer)[txb_1d_offset_uv]);
4035
117k
                dst_ptr = &(((int32_t*)ctx->blk_ptr->coeff_tmp->u_buffer)[txb_1d_offset_uv]);
4036
4037
117k
                if (ctx->fixed_partition) {
4038
7.29k
                    dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->u_buffer) +
4039
7.29k
                        ctx->coded_area_sb_uv;
4040
7.29k
                }
4041
4042
117k
                if (blk_ptr->u_has_coeff & (1 << txb_itr)) {
4043
5.13k
                    svt_memcpy(dst_ptr, src_ptr, tx_width_uv * tx_height_uv * sizeof(int32_t));
4044
5.13k
                }
4045
4046
                // Cr
4047
117k
                src_ptr = &(((int32_t*)cand_bf->quant->v_buffer)[txb_1d_offset_uv]);
4048
117k
                dst_ptr = &(((int32_t*)ctx->blk_ptr->coeff_tmp->v_buffer)[txb_1d_offset_uv]);
4049
4050
117k
                if (ctx->fixed_partition) {
4051
7.29k
                    dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->v_buffer) +
4052
7.29k
                        ctx->coded_area_sb_uv;
4053
7.29k
                    ctx->coded_area_sb_uv += tx_width_uv * tx_height_uv;
4054
7.29k
                }
4055
4056
117k
                if (blk_ptr->v_has_coeff & (1 << txb_itr)) {
4057
5.13k
                    svt_memcpy(dst_ptr, src_ptr, tx_width_uv * tx_height_uv * sizeof(int32_t));
4058
5.13k
                }
4059
4060
117k
                txb_1d_offset_uv += tx_width_uv * tx_height_uv;
4061
117k
            }
4062
438k
        }
4063
117k
    }
4064
4065
230k
    return lowest_cost_index;
4066
230k
}
4067
4068
// Return the end column for the current superblock, in unit of TPL blocks.
4069
0
static int get_superblock_tpl_column_end(PictureParentControlSet* ppcs, int mi_col, int num_mi_w) {
4070
0
    const int mib_size_log2 = ppcs->scs->seq_header.sb_size == BLOCK_128X128 ? 5 : 4;
4071
    // Find the start column of this superblock.
4072
0
    const int sb_mi_col_start = (mi_col >> mib_size_log2) << mib_size_log2;
4073
    // Same but in superres upscaled dimension.
4074
0
    const int sb_mi_col_start_sr = coded_to_superres_mi(sb_mi_col_start, ppcs->superres_denom);
4075
    // Width of this superblock in mi units.
4076
0
    const int sb_mi_width = mi_size_wide[ppcs->scs->seq_header.sb_size];
4077
    // Same but in superres upscaled dimension.
4078
0
    const int sb_mi_width_sr = coded_to_superres_mi(sb_mi_width, ppcs->superres_denom);
4079
    // Superblock end in mi units.
4080
0
    const int sb_mi_end = sb_mi_col_start_sr + sb_mi_width_sr;
4081
    // Superblock end in TPL units.
4082
0
    return (sb_mi_end + num_mi_w - 1) / num_mi_w;
4083
0
}
4084
4085
0
void aom_av1_set_ssim_rdmult(ModeDecisionContext* ctx, PictureControlSet* pcs, const int mi_row, const int mi_col) {
4086
0
    const Av1Common* const cm    = pcs->ppcs->av1_cm;
4087
0
    BlockSize              bsize = ctx->blk_geom->bsize;
4088
4089
0
    const int bsize_base = BLOCK_16X16;
4090
0
    const int num_mi_w   = mi_size_wide[bsize_base];
4091
0
    const int num_mi_h   = mi_size_high[bsize_base];
4092
0
    const int num_cols   = (cm->mi_cols + num_mi_w - 1) / num_mi_w;
4093
0
    const int num_rows   = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
4094
0
    const int num_bcols  = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w;
4095
0
    const int num_brows  = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
4096
0
    int       row, col;
4097
0
    double    num_of_mi          = 0.0;
4098
0
    double    geom_mean_of_scale = 1.0;
4099
0
    for (row = mi_row / num_mi_w; row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
4100
0
        for (col = mi_col / num_mi_h; col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
4101
0
            const int index = row * num_cols + col;
4102
0
            geom_mean_of_scale *= pcs->ppcs->pa_me_data->ssim_rdmult_scaling_factors[index];
4103
0
            num_of_mi += 1.0;
4104
0
        }
4105
0
    }
4106
0
    geom_mean_of_scale = pow(geom_mean_of_scale, (1.0 / num_of_mi));
4107
0
    if (!pcs->ppcs->blk_lambda_tuning) {
4108
0
        ctx->full_lambda_md[EB_8_BIT_MD] =
4109
0
            (uint32_t)((double)ctx->ed_ctx->pic_full_lambda[EB_8_BIT_MD] * geom_mean_of_scale + 0.5);
4110
0
        ctx->full_lambda_md[EB_10_BIT_MD] =
4111
0
            (uint32_t)((double)ctx->ed_ctx->pic_full_lambda[EB_10_BIT_MD] * geom_mean_of_scale + 0.5);
4112
4113
0
        ctx->fast_lambda_md[EB_8_BIT_MD] =
4114
0
            (uint32_t)((double)ctx->ed_ctx->pic_fast_lambda[EB_8_BIT_MD] * geom_mean_of_scale + 0.5);
4115
0
        ctx->fast_lambda_md[EB_10_BIT_MD] =
4116
0
            (uint32_t)((double)ctx->ed_ctx->pic_fast_lambda[EB_10_BIT_MD] * geom_mean_of_scale + 0.5);
4117
0
    } else {
4118
0
        ctx->full_lambda_md[EB_8_BIT_MD]  = (uint32_t)((double)ctx->full_lambda_md[EB_8_BIT_MD] * geom_mean_of_scale +
4119
0
                                                      0.5);
4120
0
        ctx->full_lambda_md[EB_10_BIT_MD] = (uint32_t)((double)ctx->full_lambda_md[EB_10_BIT_MD] * geom_mean_of_scale +
4121
0
                                                       0.5);
4122
4123
0
        ctx->fast_lambda_md[EB_8_BIT_MD]  = (uint32_t)((double)ctx->fast_lambda_md[EB_8_BIT_MD] * geom_mean_of_scale +
4124
0
                                                      0.5);
4125
0
        ctx->fast_lambda_md[EB_10_BIT_MD] = (uint32_t)((double)ctx->fast_lambda_md[EB_10_BIT_MD] * geom_mean_of_scale +
4126
0
                                                       0.5);
4127
0
    }
4128
0
}
4129
4130
0
void svt_aom_set_tuned_blk_lambda(ModeDecisionContext* ctx, PictureControlSet* pcs) {
4131
0
    PictureParentControlSet* ppcs = pcs->ppcs;
4132
0
    Av1Common*               cm   = ppcs->av1_cm;
4133
4134
0
    BlockSize bsize  = ctx->blk_geom->bsize;
4135
0
    int       mi_row = ctx->blk_org_y / 4;
4136
0
    int       mi_col = ctx->blk_org_x / 4;
4137
4138
0
    const int mi_col_sr         = coded_to_superres_mi(mi_col, ppcs->superres_denom);
4139
0
    const int mi_cols_sr        = ((ppcs->enhanced_unscaled_pic->width + 15) / 16) << 2; // picture column boundary
4140
0
    const int block_mi_width_sr = coded_to_superres_mi(mi_size_wide[bsize], ppcs->superres_denom);
4141
0
    const int bsize_base        = ppcs->tpl_ctrls.synth_blk_size == 32 ? BLOCK_32X32 : BLOCK_16X16;
4142
0
    const int num_mi_w          = mi_size_wide[bsize_base];
4143
0
    const int num_mi_h          = mi_size_high[bsize_base];
4144
0
    const int num_cols          = (mi_cols_sr + num_mi_w - 1) / num_mi_w;
4145
0
    const int num_rows          = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
4146
0
    const int num_bcols         = (block_mi_width_sr + num_mi_w - 1) / num_mi_w;
4147
0
    const int num_brows         = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
4148
4149
    // This is required because the end col of superblock may be off by 1 in case
4150
    // of superres.
4151
0
    const int sb_bcol_end = get_superblock_tpl_column_end(ppcs, mi_col, num_mi_w);
4152
0
    int       row, col;
4153
0
    int32_t   base_block_count   = 0;
4154
0
    double    geom_mean_of_scale = 0.0;
4155
0
    for (row = mi_row / num_mi_w; row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
4156
0
        for (col = mi_col_sr / num_mi_h; col < num_cols && col < mi_col_sr / num_mi_h + num_bcols && col < sb_bcol_end;
4157
0
             ++col) {
4158
0
            const int index = row * num_cols + col;
4159
0
            geom_mean_of_scale += log(ppcs->pa_me_data->tpl_sb_rdmult_scaling_factors[index]);
4160
0
            ++base_block_count;
4161
0
        }
4162
0
    }
4163
    // When superres is on, base_block_count could be zero.
4164
    // This function's counterpart in AOM, av1_get_hier_tpl_rdmult, will encounter division by zero
4165
0
    if (base_block_count == 0) {
4166
        // return a large number to indicate invalid state
4167
0
        ctx->full_lambda_md[EB_8_BIT_MD]  = SUPERRES_INVALID_STATE;
4168
0
        ctx->full_lambda_md[EB_10_BIT_MD] = SUPERRES_INVALID_STATE;
4169
4170
0
        ctx->fast_lambda_md[EB_8_BIT_MD]  = SUPERRES_INVALID_STATE;
4171
0
        ctx->fast_lambda_md[EB_10_BIT_MD] = SUPERRES_INVALID_STATE;
4172
0
        return;
4173
0
    }
4174
4175
0
    geom_mean_of_scale = exp(geom_mean_of_scale / base_block_count);
4176
4177
0
    ctx->full_lambda_md[EB_8_BIT_MD] =
4178
0
        (uint32_t)((double)ctx->ed_ctx->pic_full_lambda[EB_8_BIT_MD] * geom_mean_of_scale + 0.5);
4179
0
    ctx->full_lambda_md[EB_10_BIT_MD] =
4180
0
        (uint32_t)((double)ctx->ed_ctx->pic_full_lambda[EB_10_BIT_MD] * geom_mean_of_scale + 0.5);
4181
4182
0
    ctx->fast_lambda_md[EB_8_BIT_MD] =
4183
0
        (uint32_t)((double)ctx->ed_ctx->pic_fast_lambda[EB_8_BIT_MD] * geom_mean_of_scale + 0.5);
4184
0
    ctx->fast_lambda_md[EB_10_BIT_MD] =
4185
0
        (uint32_t)((double)ctx->ed_ctx->pic_fast_lambda[EB_10_BIT_MD] * geom_mean_of_scale + 0.5);
4186
0
    if (ppcs->scs->static_config.tune == TUNE_SSIM || ppcs->scs->static_config.tune == TUNE_IQ ||
4187
0
        ppcs->scs->static_config.tune == TUNE_MS_SSIM) {
4188
0
        aom_av1_set_ssim_rdmult(ctx, pcs, mi_row, mi_col);
4189
0
    }
4190
0
}
4191
4192
0
double svt_ssim_4x4_c(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp) {
4193
0
    const int32_t count = 4 * 4;
4194
4195
0
    uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
4196
0
    uint32_t i, j;
4197
0
    for (i = 0; i < 4; i++) {
4198
0
        for (j = 0; j < 4; j++) {
4199
0
            sum_s += s[j];
4200
0
            sum_r += r[j];
4201
0
            sum_sq_s += s[j] * s[j];
4202
0
            sum_sq_r += r[j] * r[j];
4203
0
            sum_sxr += s[j] * r[j];
4204
0
        }
4205
4206
0
        s += sp;
4207
0
        r += rp;
4208
0
    }
4209
4210
    //
4211
    // similarity
4212
    //
4213
0
    double score = svt_aom_similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, count, 8);
4214
0
    return score;
4215
0
}
4216
4217
0
double svt_ssim_8x8_c(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp) {
4218
0
    const int32_t count = 8 * 8;
4219
4220
    //
4221
    // is similar to svt_aom_ssim_parms_8x8_c, but supports MxN block size
4222
    //
4223
0
    uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
4224
0
    uint32_t i, j;
4225
0
    for (i = 0; i < 8; i++) {
4226
0
        for (j = 0; j < 8; j++) {
4227
0
            sum_s += s[j];
4228
0
            sum_r += r[j];
4229
0
            sum_sq_s += s[j] * s[j];
4230
0
            sum_sq_r += r[j] * r[j];
4231
0
            sum_sxr += s[j] * r[j];
4232
0
        }
4233
4234
0
        s += sp;
4235
0
        r += rp;
4236
0
    }
4237
4238
    //
4239
    // similarity
4240
    //
4241
0
    double score = svt_aom_similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, count, 8);
4242
0
    return score;
4243
0
}
4244
4245
0
double svt_ssim_4x4_hbd_c(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp) {
4246
0
    const int32_t count = 4 * 4;
4247
4248
0
    uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
4249
0
    uint32_t i, j;
4250
0
    for (i = 0; i < 4; i++) {
4251
0
        for (j = 0; j < 4; j++) {
4252
0
            sum_s += s[j];
4253
0
            sum_r += r[j];
4254
0
            sum_sq_s += s[j] * s[j];
4255
0
            sum_sq_r += r[j] * r[j];
4256
0
            sum_sxr += s[j] * r[j];
4257
0
        }
4258
4259
0
        s += sp;
4260
0
        r += rp;
4261
0
    }
4262
4263
    //
4264
    // similarity
4265
    //
4266
0
    double score = svt_aom_similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, count, 10);
4267
0
    return score;
4268
0
}
4269
4270
0
double svt_ssim_8x8_hbd_c(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp) {
4271
0
    const int32_t count = 8 * 8;
4272
4273
0
    uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
4274
0
    uint32_t i, j;
4275
0
    for (i = 0; i < 8; i++) {
4276
0
        for (j = 0; j < 8; j++) {
4277
0
            sum_s += s[j];
4278
0
            sum_r += r[j];
4279
0
            sum_sq_s += s[j] * s[j];
4280
0
            sum_sq_r += r[j] * r[j];
4281
0
            sum_sxr += s[j] * r[j];
4282
0
        }
4283
4284
0
        s += sp;
4285
0
        r += rp;
4286
0
    }
4287
4288
    //
4289
    // similarity
4290
    //
4291
0
    double score = svt_aom_similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, count, 10);
4292
0
    return score;
4293
0
}
4294
4295
static double ssim_8x8_blocks(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp, uint32_t width,
4296
0
                              uint32_t height) {
4297
0
    uint32_t i, j;
4298
0
    int      samples    = 0;
4299
0
    double   ssim_total = 0;
4300
4301
    // sample point start with each 4x4 location
4302
0
    for (i = 0; i <= height - 8; i += 8, s += sp * 8, r += rp * 8) {
4303
0
        for (j = 0; j <= width - 8; j += 8) {
4304
0
            double v = svt_ssim_8x8(s + j, sp, r + j, rp);
4305
0
            v        = CLIP3(0, 1, v);
4306
0
            ssim_total += v;
4307
0
            samples++;
4308
0
        }
4309
0
    }
4310
0
    assert(samples > 0);
4311
0
    ssim_total /= samples;
4312
0
    assert(ssim_total <= 1.0 && ssim_total >= 0);
4313
0
    return ssim_total;
4314
0
}
4315
4316
static double ssim_4x4_blocks(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp, uint32_t width,
4317
0
                              uint32_t height) {
4318
0
    uint32_t i, j;
4319
0
    int      samples    = 0;
4320
0
    double   ssim_total = 0;
4321
4322
    // sample point start with each 2x2 location
4323
0
    for (i = 0; i <= height - 4; i += 4, s += sp * 4, r += rp * 4) {
4324
0
        for (j = 0; j <= width - 4; j += 4) {
4325
0
            double v = svt_ssim_4x4(s + j, sp, r + j, rp);
4326
0
            v        = CLIP3(0, 1, v);
4327
0
            ssim_total += v;
4328
0
            samples++;
4329
0
        }
4330
0
    }
4331
0
    assert(samples > 0);
4332
0
    ssim_total /= samples;
4333
0
    assert(ssim_total <= 1.0 && ssim_total >= 0);
4334
0
    return ssim_total;
4335
0
}
4336
4337
0
static double ssim(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp, uint32_t width, uint32_t height) {
4338
0
    assert((width % 4) == 0 && (height % 4) == 0);
4339
0
    if ((width % 8) == 0 && (height % 8) == 0) {
4340
0
        return ssim_8x8_blocks(s, sp, r, rp, width, height);
4341
0
    } else {
4342
0
        return ssim_4x4_blocks(s, sp, r, rp, width, height);
4343
0
    }
4344
0
}
4345
4346
static double ssim_8x8_blocks_hbd(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp, uint32_t width,
4347
0
                                  uint32_t height) {
4348
0
    uint32_t i, j;
4349
0
    int      samples    = 0;
4350
0
    double   ssim_total = 0;
4351
4352
    // sample point start with each 4x4 location
4353
0
    for (i = 0; i <= height - 8; i += 8, s += sp * 8, r += rp * 8) {
4354
0
        for (j = 0; j <= width - 8; j += 8) {
4355
0
            double v = svt_ssim_8x8_hbd(s + j, sp, r + j, rp);
4356
0
            v        = CLIP3(0, 1, v);
4357
0
            ssim_total += v;
4358
0
            samples++;
4359
0
        }
4360
0
    }
4361
0
    assert(samples > 0);
4362
0
    ssim_total /= samples;
4363
0
    assert(ssim_total <= 1.0 && ssim_total >= 0);
4364
0
    return ssim_total;
4365
0
}
4366
4367
static double ssim_4x4_blocks_hbd(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp, uint32_t width,
4368
0
                                  uint32_t height) {
4369
0
    uint32_t i, j;
4370
0
    int      samples    = 0;
4371
0
    double   ssim_total = 0;
4372
4373
    // sample point start with each 2x2 location
4374
0
    for (i = 0; i <= height - 4; i += 4, s += sp * 4, r += rp * 4) {
4375
0
        for (j = 0; j <= width - 4; j += 4) {
4376
0
            double v = svt_ssim_4x4_hbd(s + j, sp, r + j, rp);
4377
0
            v        = CLIP3(0, 1, v);
4378
0
            ssim_total += v;
4379
0
            samples++;
4380
0
        }
4381
0
    }
4382
0
    assert(samples > 0);
4383
0
    ssim_total /= samples;
4384
0
    assert(ssim_total <= 1.0 && ssim_total >= 0);
4385
0
    return ssim_total;
4386
0
}
4387
4388
static double ssim_hbd(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp, uint32_t width,
4389
0
                       uint32_t height) {
4390
0
    assert((width % 4) == 0 && (height % 4) == 0);
4391
0
    if ((width % 8) == 0 && (height % 8) == 0) {
4392
0
        return ssim_8x8_blocks_hbd(s, sp, r, rp, width, height);
4393
0
    } else {
4394
0
        return ssim_4x4_blocks_hbd(s, sp, r, rp, width, height);
4395
0
    }
4396
0
}
4397
4398
uint64_t svt_spatial_full_distortion_ssim_kernel(uint8_t* input, uint32_t input_offset, uint32_t input_stride,
4399
                                                 uint8_t* recon, int32_t recon_offset, uint32_t recon_stride,
4400
0
                                                 uint32_t area_width, uint32_t area_height, bool hbd, double ac_bias) {
4401
0
    uint8_t        m     = 1;
4402
0
    const uint32_t count = area_width * area_height;
4403
4404
    // SSIM
4405
0
    uint64_t spatial_distortion;
4406
0
    double   ssim_score;
4407
4408
    // AC SAD
4409
0
    uint64_t psy_distortion = 0;
4410
4411
0
    if (!hbd) {
4412
0
        ssim_score = ssim(
4413
0
            input + input_offset, input_stride, recon + recon_offset, recon_stride, area_width, area_height);
4414
0
        if (ac_bias) {
4415
0
            uint64_t ac_distortion = svt_psy_distortion(
4416
0
                input + input_offset, input_stride, recon + recon_offset, recon_stride, area_width, area_height);
4417
0
            psy_distortion = (uint64_t)(ac_distortion * ac_bias);
4418
0
        }
4419
0
    } else {
4420
0
        m          = 8;
4421
0
        ssim_score = ssim_hbd((uint16_t*)input + input_offset,
4422
0
                              input_stride,
4423
0
                              (uint16_t*)recon + recon_offset,
4424
0
                              recon_stride,
4425
0
                              area_width,
4426
0
                              area_height);
4427
0
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
4428
0
        if (ac_bias) {
4429
0
            uint64_t ac_distortion = svt_psy_distortion_hbd((uint16_t*)input + input_offset,
4430
0
                                                            input_stride,
4431
0
                                                            (uint16_t*)recon + recon_offset,
4432
0
                                                            recon_stride,
4433
0
                                                            area_width,
4434
0
                                                            area_height);
4435
0
            psy_distortion         = (uint64_t)(ac_distortion * ac_bias);
4436
0
        }
4437
0
#endif
4438
0
    }
4439
4440
0
    spatial_distortion        = (uint64_t)((1 - ssim_score) * count * 100 * 7 * m);
4441
0
    uint64_t total_distortion = spatial_distortion + psy_distortion;
4442
4443
0
    return total_distortion;
4444
0
}