Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/mode_decision.c
Line
Count
Source
1
/*
2
* Copyright(c) 2019 Intel Corporation
3
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
4
*
5
* This source code is subject to the terms of the BSD 3-Clause Clear License and
6
* the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License
7
* was not distributed with this source code in the LICENSE file, you can
8
* obtain it at https://www.aomedia.org/license. If the Alliance for Open
9
* Media Patent License 1.0 was not distributed with this source code in the
10
* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11
*/
12
13
/***************************************
14
* Includes
15
***************************************/
16
#include <stdbool.h>
17
#include <stdio.h>
18
#include <stdlib.h>
19
#include <limits.h>
20
21
#include "common_utils.h"
22
#include "definitions.h"
23
#include "sequence_control_set.h"
24
#include "mode_decision.h"
25
#include "md_process.h"
26
#include "motion_estimation.h"
27
28
#include "av1me.h"
29
#include "hash.h"
30
#include "enc_inter_prediction.h"
31
#include "rd_cost.h"
32
#include "aom_dsp_rtcd.h"
33
#include "svt_log.h"
34
#include "resize.h"
35
#include "mcomp.h"
36
#include "ac_bias.h"
37
#include "src_ops_process.h"
38
#include "utility.h"
39
#include "adaptive_mv_pred.h"
40
#include "av1me.h"
41
static const uint32_t intra_luma_to_chroma[INTRA_MODES] = {
42
    UV_DC_PRED, // Average of above and left pixels
43
    UV_V_PRED, // Vertical
44
    UV_H_PRED, // Horizontal
45
    UV_D45_PRED, // Directional 45  degree
46
    UV_D135_PRED, // Directional 135 degree
47
    UV_D113_PRED, // Directional 113 degree
48
    UV_D157_PRED, // Directional 157 degree
49
    UV_D203_PRED, // Directional 203 degree
50
    UV_D67_PRED, // Directional 67  degree
51
    UV_SMOOTH_PRED, // Combination of horizontal and vertical interpolation
52
    UV_SMOOTH_V_PRED, // Vertical interpolation
53
    UV_SMOOTH_H_PRED, // Horizontal interpolation
54
    UV_PAETH_PRED, // Predict from the direction of smallest gradient
55
};
56
57
void calc_target_weighted_pred(PictureControlSet* pcs, ModeDecisionContext* ctx, const Av1Common* cm,
58
                               const MacroBlockD* xd, int mi_row, int mi_col, const uint8_t* above, int above_stride,
59
                               const uint8_t* left, int left_stride);
60
#define INC_MD_CAND_CNT(cnt, max_can_count)                  \
61
253k
    MULTI_LINE_MACRO_BEGIN                                   \
62
253k
    if (cnt + 1 < max_can_count)                             \
63
255k
        cnt++;                                               \
64
253k
    else                                                     \
65
18.4E
        SVT_ERROR("Mode decision candidate count exceeded"); \
66
253k
    MULTI_LINE_MACRO_END
67
68
0
#define SUPERRES_INVALID_STATE 0x7fffffff
69
70
3.71M
bool svt_av1_is_lossless_segment(PictureControlSet* pcs, int8_t segment_id) {
71
3.71M
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
72
3.71M
    if (frm_hdr->segmentation_params.segmentation_enabled) {
73
0
        return pcs->lossless[segment_id];
74
3.71M
    } else {
75
3.71M
        return pcs->lossless[0];
76
3.71M
    }
77
3.71M
}
78
79
0
static bool check_mv_validity(int16_t x_mv, int16_t y_mv, uint8_t need_shift) {
80
0
    Mv mv;
81
    //go to 1/8th if input is 1/4pel
82
0
    mv.y = y_mv << need_shift;
83
0
    mv.x = x_mv << need_shift;
84
    /* AV1 limits
85
      -16384 < MV_x_in_1/8 or MV_y_in_1/8 < 16384
86
      which means in full pel:
87
      -2048 < MV_x_in_full_pel or MV_y_in_full_pel < 2048
88
    */
89
0
    if (!is_mv_valid(&mv)) {
90
0
        return false;
91
0
    }
92
0
    return true;
93
0
}
94
95
int svt_is_interintra_allowed(uint8_t enable_inter_intra, BlockSize bsize, PredictionMode mode,
96
0
                              const MvReferenceFrame ref_frame[2]) {
97
0
    return enable_inter_intra && svt_aom_is_interintra_allowed_bsize((const BlockSize)bsize) &&
98
0
        svt_aom_is_interintra_allowed_mode(mode) && svt_aom_is_interintra_allowed_ref(ref_frame);
99
0
}
100
101
0
int svt_aom_filter_intra_allowed_bsize(BlockSize bs) {
102
0
    return block_size_wide[bs] <= 32 && block_size_high[bs] <= 32;
103
0
}
104
105
256k
int svt_aom_filter_intra_allowed(uint8_t enable_filter_intra, BlockSize bsize, uint8_t palette_size, uint32_t mode) {
106
256k
    return enable_filter_intra && mode == DC_PRED && palette_size == 0 && svt_aom_filter_intra_allowed_bsize(bsize);
107
256k
}
108
109
// returns the max inter-inter compound type based on settings and block size
110
0
static MD_COMP_TYPE get_tot_comp_types_bsize(MD_COMP_TYPE tot_comp_types, BlockSize bsize) {
111
0
    return (svt_aom_get_wedge_params_bits(bsize) == 0) ? MIN(tot_comp_types, MD_COMP_WEDGE) : tot_comp_types;
112
0
}
113
114
/*
115
Get the ME offset for a given block (the offset used to locate the PA MVs from the parent PCS).
116
*/
117
uint32_t svt_aom_get_me_block_offset(const uint32_t org_x, const uint32_t org_y, const BlockSize bsize,
118
246k
                                     const uint8_t enable_me_8x8, const uint8_t enable_me_16x16) {
119
246k
    const int      bwidth     = block_size_wide[bsize];
120
246k
    const int      bheight    = block_size_high[bsize];
121
246k
    const uint32_t max_length = MAX(bwidth, bheight);
122
123
246k
    uint32_t me_idx = 0;
124
246k
    switch (max_length) {
125
0
    case 4:
126
241k
    case 8:
127
241k
        me_idx++;
128
241k
        if (org_x & 8) { // (org_x % 16) / 8
129
117k
            me_idx += 1;
130
117k
        }
131
241k
        if (org_y & 8) { // (org_y % 16) / 8
132
117k
            me_idx += 2;
133
117k
        }
134
241k
        AOM_FALLTHROUGH_INTENDED;
135
243k
    case 16:
136
243k
        me_idx++;
137
243k
        if (org_x & 16) { // (org_x % 32) / 16
138
116k
            me_idx += 5;
139
116k
        }
140
243k
        if (org_y & 16) { // (org_y % 32) / 16
141
115k
            me_idx += 10;
142
115k
        }
143
243k
        AOM_FALLTHROUGH_INTENDED;
144
243k
    case 32:
145
243k
        me_idx++;
146
243k
        if (org_x & 32) { // (org_x % 64) / 32
147
113k
            me_idx += 21;
148
113k
        }
149
243k
        if (org_y & 32) { // (org_y % 64) / 32
150
111k
            me_idx += 42;
151
111k
        }
152
243k
        break;
153
2.77k
    default:
154
        // me_idx = 0;
155
2.77k
        break;
156
246k
    }
157
158
246k
    uint32_t me_block_offset = me_idx_85[me_idx]; // convert idx to me_idx
159
160
246k
    if (!enable_me_8x8) {
161
246k
        if (me_block_offset >= MAX_SB64_PU_COUNT_NO_8X8) {
162
241k
            me_block_offset = me_idx_85_8x8_to_16x16_conversion[me_block_offset - MAX_SB64_PU_COUNT_NO_8X8];
163
241k
        }
164
246k
        assert(me_block_offset < 21);
165
246k
        if (!enable_me_16x16) {
166
0
            if (me_block_offset >= MAX_SB64_PU_COUNT_WO_16X16) {
167
0
                assert(me_block_offset < 21);
168
0
                me_block_offset = me_idx_16x16_to_parent_32x32_conversion[me_block_offset - MAX_SB64_PU_COUNT_WO_16X16];
169
0
            }
170
0
        }
171
246k
    }
172
173
246k
    return me_block_offset;
174
246k
}
175
176
//Given one reference frame identified by the pair (list_index,ref_index)
177
//indicate if ME data is valid
178
uint8_t svt_aom_is_me_data_present(uint32_t me_block_offset, uint32_t me_cand_offset, const MeSbResults* me_results,
179
0
                                   uint8_t list_idx, uint8_t ref_idx) {
180
0
    uint8_t            total_me_cnt     = me_results->total_me_candidate_index[me_block_offset];
181
0
    const MeCandidate* me_block_results = &me_results->me_candidate_array[me_cand_offset];
182
0
    for (uint32_t me_cand_i = 0; me_cand_i < total_me_cnt; ++me_cand_i) {
183
0
        const MeCandidate* me_cand = &me_block_results[me_cand_i];
184
0
        assert(me_cand->direction <= 2);
185
0
        if (me_cand->direction == 0 || me_cand->direction == 2) {
186
0
            if (list_idx == me_cand->ref0_list && ref_idx == me_cand->ref_idx_l0) {
187
0
                return 1;
188
0
            }
189
0
        }
190
0
        if (me_cand->direction == 1 || me_cand->direction == 2) {
191
0
            if (list_idx == me_cand->ref1_list && ref_idx == me_cand->ref_idx_l1) {
192
0
                return 1;
193
0
            }
194
0
        }
195
0
    }
196
0
    return 0;
197
0
}
198
199
/********************************************
200
* Constants
201
********************************************/
202
// 1 - Regular uni-pred ,
203
// 2 - Regular uni-pred + Wedge compound Inter Intra
204
// 3 - Regular uni-pred + Wedge compound Inter Intra + Smooth compound Inter Intra
205
206
#if CONFIG_ENABLE_OBMC
207
0
static bool warped_motion_mode_allowed(PictureControlSet* pcs, ModeDecisionContext* ctx) {
208
0
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
209
0
    return frm_hdr->allow_warped_motion && has_overlappable_candidates(ctx->blk_ptr) && ctx->blk_geom->bwidth >= 8 &&
210
0
        ctx->blk_geom->bheight >= 8 && ctx->wm_ctrls.enabled;
211
0
}
212
#endif
213
MotionMode svt_aom_obmc_motion_mode_allowed(
214
    const PictureControlSet* pcs, ModeDecisionContext* ctx, const BlockSize bsize,
215
    uint8_t          situation, // 0: candidate(s) preparation, 1: data preparation, 2: simple translation face-off
216
0
    MvReferenceFrame rf0, MvReferenceFrame rf1, PredictionMode mode) {
217
0
    if (ctx->obmc_ctrls.trans_face_off && !situation) {
218
0
        return SIMPLE_TRANSLATION;
219
0
    }
220
    // check if should cap the max block size for obmc
221
222
0
    if (block_size_wide[bsize] > ctx->obmc_ctrls.max_blk_size ||
223
0
        block_size_high[bsize] > ctx->obmc_ctrls.max_blk_size) {
224
0
        return SIMPLE_TRANSLATION;
225
0
    }
226
0
    if (!ctx->obmc_ctrls.enabled) {
227
0
        return SIMPLE_TRANSLATION;
228
0
    }
229
0
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
230
231
0
    if (!frm_hdr->is_motion_mode_switchable) {
232
0
        return SIMPLE_TRANSLATION;
233
0
    }
234
235
0
    if (frm_hdr->force_integer_mv == 0) {
236
0
        const TransformationType gm_type = pcs->ppcs->global_motion[rf0].wmtype;
237
0
        if (is_global_mv_block(mode, bsize, gm_type)) {
238
0
            return SIMPLE_TRANSLATION;
239
0
        }
240
0
    }
241
0
    if (is_motion_variation_allowed_bsize(bsize) && is_inter_singleref_mode(mode) && rf1 != INTRA_FRAME &&
242
0
        !(rf1 > INTRA_FRAME)) // is_motion_variation_allowed_compound
243
0
    {
244
0
        if (!has_overlappable_candidates(ctx->blk_ptr)) { // check_num_overlappable_neighbors
245
0
            return SIMPLE_TRANSLATION;
246
0
        }
247
248
0
        return OBMC_CAUSAL;
249
0
    } else {
250
0
        return SIMPLE_TRANSLATION;
251
0
    }
252
0
}
253
254
//static uint32_t  AntiContouringIntraMode[11] = { EB_INTRA_PLANAR, EB_INTRA_DC, EB_INTRA_HORIZONTAL, EB_INTRA_VERTICAL,
255
//EB_INTRA_MODE_2, EB_INTRA_MODE_6, EB_INTRA_MODE_14, EB_INTRA_MODE_18, EB_INTRA_MODE_22, EB_INTRA_MODE_30, EB_INTRA_MODE_34 };
256
0
int32_t svt_aom_have_newmv_in_inter_mode(PredictionMode mode) {
257
0
    return (mode == NEWMV || mode == NEW_NEWMV || mode == NEAREST_NEWMV || mode == NEW_NEARESTMV ||
258
0
            mode == NEAR_NEWMV || mode == NEW_NEARMV);
259
0
}
260
261
static MvReferenceFrame to_ref_frame[2][4] = {{LAST_FRAME, LAST2_FRAME, LAST3_FRAME, GOLDEN_FRAME},
262
                                              {BWDREF_FRAME, ALTREF2_FRAME, ALTREF_FRAME, INVALID_REF}};
263
264
0
MvReferenceFrame svt_get_ref_frame_type(uint8_t list, uint8_t ref_idx) {
265
0
    return to_ref_frame[list][ref_idx];
266
0
};
267
268
0
uint8_t svt_aom_get_max_drl_index(uint8_t refmvCnt, PredictionMode mode) {
269
0
    uint8_t max_drl = 0;
270
271
0
    if (mode == NEWMV || mode == NEW_NEWMV) {
272
0
        if (refmvCnt < 2) {
273
0
            max_drl = 1;
274
0
        } else if (refmvCnt == 2) {
275
0
            max_drl = 2;
276
0
        } else {
277
0
            max_drl = 3;
278
0
        }
279
0
    }
280
281
0
    if (mode == NEARMV || mode == NEAR_NEARMV || mode == NEAR_NEWMV || mode == NEW_NEARMV) {
282
0
        if (refmvCnt < 3) {
283
0
            max_drl = 1;
284
0
        } else if (refmvCnt == 3) {
285
0
            max_drl = 2;
286
0
        } else {
287
0
            max_drl = 3;
288
0
        }
289
0
    }
290
291
0
    return max_drl;
292
0
}
293
294
0
#define MV_COST_WEIGHT 108
295
296
static int64_t pick_interintra_wedge(PictureControlSet* pcs, ModeDecisionContext* ctx, const BlockSize bsize,
297
                                     const uint8_t* const p0, const uint8_t* const p1, uint8_t* src_buf,
298
0
                                     uint32_t src_stride, int8_t* wedge_index_out) {
299
0
    assert(svt_aom_is_interintra_wedge_used(bsize));
300
    // assert(cpi->common.seq_params.enable_interintra_compound);
301
302
0
    const int bw = block_size_wide[bsize];
303
0
    const int bh = block_size_high[bsize];
304
0
    DECLARE_ALIGNED(32, int16_t, residual1[MAX_INTERINTRA_SB_SQUARE]); // src - pred1
305
0
    DECLARE_ALIGNED(32, int16_t, diff10[MAX_INTERINTRA_SB_SQUARE]); // pred1 - pred0
306
0
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
307
0
    if (ctx->hbd_md) {
308
0
        svt_aom_highbd_subtract_block(bh, bw, residual1, bw, src_buf, src_stride, p1, bw, EB_TEN_BIT);
309
0
        svt_aom_highbd_subtract_block(bh, bw, diff10, bw, p1, bw, p0, bw, EB_TEN_BIT);
310
311
0
    } else
312
0
#endif
313
0
    {
314
0
        svt_aom_subtract_block(bh, bw, residual1, bw, src_buf, src_stride, p1, bw);
315
0
        svt_aom_subtract_block(bh, bw, diff10, bw, p1, bw, p0, bw);
316
0
    }
317
318
0
    int8_t  wedge_index = -1;
319
0
    int64_t rd          = pick_wedge_fixed_sign(pcs, ctx, bsize, residual1, diff10, 0, &wedge_index);
320
0
    *wedge_index_out    = wedge_index;
321
322
0
    return rd;
323
0
}
324
325
0
static void inter_intra_search(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand) {
326
0
    SequenceControlSet* scs = pcs->scs;
327
0
    DECLARE_ALIGNED(16, uint8_t, tmp_buf[2 * MAX_INTERINTRA_SB_SQUARE]);
328
0
    DECLARE_ALIGNED(16, uint8_t, ii_pred_buf[2 * MAX_INTERINTRA_SB_SQUARE]);
329
    // get inter pred for ref0
330
0
    EbPictureBufferDesc* src_pic = ctx->hbd_md ? pcs->input_frame16bit : pcs->ppcs->enhanced_pic;
331
0
    uint16_t* src_buf_hbd = (uint16_t*)src_pic->y_buffer + (ctx->blk_org_x) + (ctx->blk_org_y) * src_pic->y_stride;
332
0
    uint8_t*  src_buf     = src_pic->y_buffer + (ctx->blk_org_x) + (ctx->blk_org_y) * src_pic->y_stride;
333
334
0
    uint8_t  bit_depth   = ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT;
335
0
    uint32_t full_lambda = ctx->hbd_md ? ctx->full_lambda_md[EB_10_BIT_MD] : ctx->full_lambda_md[EB_8_BIT_MD];
336
337
0
    uint32_t            bwidth  = ctx->blk_geom->bwidth;
338
0
    uint32_t            bheight = ctx->blk_geom->bheight;
339
0
    EbPictureBufferDesc pred_desc;
340
0
    pred_desc.border   = 0;
341
0
    pred_desc.y_stride = bwidth;
342
343
0
    EbPictureBufferDesc* ref_pic_list0 = svt_aom_get_ref_pic_buffer(pcs, cand->block_mi.ref_frame[0]);
344
0
    EbPictureBufferDesc* ref_pic_list1 = NULL;
345
346
    // Use scaled references if resolution of the reference is different from that of the input
347
    // Only have one ref
348
0
    if (ref_pic_list0 != NULL) {
349
0
        uint8_t list_idx0  = get_list_idx(cand->block_mi.ref_frame[0]);
350
0
        int8_t  ref_idx_l0 = get_ref_frame_idx(cand->block_mi.ref_frame[0]);
351
0
        svt_aom_use_scaled_rec_refs_if_needed(
352
0
            pcs,
353
0
            pcs->ppcs->enhanced_pic,
354
0
            (EbReferenceObject*)pcs->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr,
355
0
            &ref_pic_list0,
356
0
            ctx->hbd_md);
357
0
    }
358
0
    pred_desc.y_buffer = tmp_buf;
359
360
    //we call the regular inter prediction path here (no compound)
361
0
    cand->block_mi.interp_filters     = 0;
362
0
    cand->block_mi.is_interintra_used = 0;
363
0
    svt_aom_inter_prediction(scs,
364
0
                             pcs,
365
0
                             &cand->block_mi,
366
0
                             &cand->wm_params_l0,
367
0
                             &cand->wm_params_l1,
368
0
                             ctx->blk_ptr,
369
0
                             ctx->blk_geom->bsize,
370
0
                             ctx->shape,
371
0
                             false, // use_precomputed_obmc
372
0
                             false, // use_precomputed_ii - ii not performed here
373
0
                             ctx,
374
0
                             NULL,
375
0
                             NULL,
376
0
                             NULL,
377
0
                             ref_pic_list0,
378
0
                             ref_pic_list1,
379
0
                             ctx->blk_org_x,
380
0
                             ctx->blk_org_y,
381
0
                             &pred_desc, //output
382
0
                             0, //output org_x,
383
0
                             0, //output org_y,
384
0
                             PICTURE_BUFFER_DESC_LUMA_MASK,
385
0
                             ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
386
0
                             0); // is_16bit_pipeline
387
388
0
    assert(svt_aom_is_interintra_wedge_used(ctx->blk_geom->bsize)); //if not I need to add nowedge path!!
389
390
0
    int64_t        best_interintra_rd   = INT64_MAX;
391
0
    InterIntraMode best_interintra_mode = INTERINTRA_MODES;
392
0
    for (int j = 0; j < INTERINTRA_MODES; ++j) {
393
        // if ((!cpi->oxcf.enable_smooth_intra || cpi->sf.disable_smooth_intra) &&
394
        //     (InterIntraMode)j == II_SMOOTH_PRED)
395
        //   continue;
396
0
        InterIntraMode interintra_mode = (InterIntraMode)j;
397
        // rmode = interintra_mode_cost[mbmi->interintra_mode];
398
0
        const int bsize_group = eb_size_group_lookup[ctx->blk_geom->bsize];
399
0
        const int rmode       = ctx->md_rate_est_ctx->inter_intra_mode_fac_bits[bsize_group][interintra_mode];
400
        // av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
401
0
        if (ctx->hbd_md) {
402
0
            svt_aom_combine_interintra_highbd(interintra_mode, // mode,
403
0
                                              0, // use_wedge_interintra,
404
0
                                              0, // cand->interintra_wedge_index,
405
0
                                              0, // int wedge_sign,
406
0
                                              ctx->blk_geom->bsize,
407
0
                                              ctx->blk_geom->bsize, // plane_bsize,
408
0
                                              ii_pred_buf,
409
0
                                              bwidth, /*uint8_t *comppred, int compstride,*/
410
0
                                              tmp_buf,
411
0
                                              bwidth, /*const uint8_t *interpred, int interstride,*/
412
0
                                              ctx->intrapred_buf[j],
413
0
                                              bwidth /*const uint8_t *intrapred,   int intrastride*/,
414
0
                                              bit_depth);
415
0
        } else {
416
0
            svt_aom_combine_interintra(interintra_mode, //mode,
417
0
                                       0, //use_wedge_interintra,
418
0
                                       0, //cand->interintra_wedge_index,
419
0
                                       0, //int wedge_sign,
420
0
                                       ctx->blk_geom->bsize,
421
0
                                       ctx->blk_geom->bsize, // plane_bsize,
422
0
                                       ii_pred_buf,
423
0
                                       bwidth, /*uint8_t *comppred, int compstride,*/
424
0
                                       tmp_buf,
425
0
                                       bwidth, /*const uint8_t *interpred, int interstride,*/
426
0
                                       ctx->intrapred_buf[j],
427
0
                                       bwidth /*const uint8_t *intrapred,   int intrastride*/);
428
0
        }
429
0
        int64_t rd;
430
0
        if (ctx->inter_intra_comp_ctrls.use_rd_model) {
431
0
            int     rate_sum;
432
0
            int64_t dist_sum;
433
0
            model_rd_for_sb_with_curvfit(pcs,
434
0
                                         ctx,
435
0
                                         ctx->blk_geom->bsize,
436
0
                                         bwidth,
437
0
                                         bheight,
438
0
                                         ctx->hbd_md ? (uint8_t*)src_buf_hbd : src_buf,
439
0
                                         src_pic->y_stride,
440
0
                                         ii_pred_buf,
441
0
                                         bwidth,
442
0
                                         0,
443
0
                                         0,
444
0
                                         0,
445
0
                                         0,
446
0
                                         &rate_sum,
447
0
                                         &dist_sum,
448
0
                                         NULL,
449
0
                                         NULL,
450
0
                                         NULL);
451
452
0
            rd = RDCOST(full_lambda, rate_sum + rmode, dist_sum);
453
0
        } else {
454
0
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
455
0
            if (ctx->hbd_md) {
456
0
                rd = svt_aom_highbd_sse((uint8_t*)src_buf_hbd, src_pic->y_stride, ii_pred_buf, bwidth, bwidth, bheight);
457
0
            } else
458
0
#endif
459
0
            {
460
0
                rd = svt_aom_sse(src_buf, src_pic->y_stride, ii_pred_buf, bwidth, bwidth, bheight);
461
0
            }
462
0
        }
463
0
        if (rd < best_interintra_rd) {
464
0
            best_interintra_rd             = rd;
465
0
            cand->block_mi.interintra_mode = best_interintra_mode = interintra_mode;
466
0
        }
467
0
    }
468
    // To test: Enable wedge search if source variance and edge strength are above the thresholds.
469
    //CHKN need to re-do intra pred using the winner, or have a separate intra serch for wedge
470
0
    int64_t       best_interintra_rd_wedge = INT64_MAX;
471
0
    const uint8_t ii_wedge_mode            = ctx->shape == PART_N ? ctx->inter_intra_comp_ctrls.wedge_mode_sq
472
0
                                                                  : ctx->inter_intra_comp_ctrls.wedge_mode_nsq;
473
0
    if (ii_wedge_mode) {
474
0
        best_interintra_rd_wedge = pick_interintra_wedge(pcs,
475
0
                                                         ctx,
476
0
                                                         ctx->blk_geom->bsize,
477
0
                                                         ctx->intrapred_buf[best_interintra_mode],
478
0
                                                         tmp_buf,
479
0
                                                         ctx->hbd_md ? (uint8_t*)src_buf_hbd : src_buf,
480
0
                                                         src_pic->y_stride,
481
0
                                                         &cand->block_mi.interintra_wedge_index);
482
0
    }
483
484
    // for ii_wedge_mode 1, always inject wedge as a separate candidate; for wedge mode 2 only inject
485
    // if wedge is better than non-wedge
486
0
    if (ii_wedge_mode == 1 || best_interintra_rd_wedge < best_interintra_rd) {
487
0
        cand->block_mi.use_wedge_interintra = 1;
488
0
    } else {
489
0
        cand->block_mi.use_wedge_interintra = 0;
490
0
    }
491
0
}
492
493
static COMPOUND_TYPE to_av1_compound_lut[] = {COMPOUND_AVERAGE, COMPOUND_DISTWTD, COMPOUND_DIFFWTD, COMPOUND_WEDGE};
494
495
static void determine_compound_mode(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand,
496
0
                                    MD_COMP_TYPE cur_type) {
497
0
    BlockModeInfo* block_mi        = &cand->block_mi;
498
0
    block_mi->interinter_comp.type = to_av1_compound_lut[cur_type];
499
0
    switch (cur_type) {
500
0
    case MD_COMP_AVG:
501
0
        block_mi->comp_group_idx = 0;
502
0
        block_mi->compound_idx   = 1;
503
0
        break;
504
0
    case MD_COMP_DIST:
505
0
        block_mi->comp_group_idx = 0;
506
0
        block_mi->compound_idx   = 0;
507
0
        break;
508
0
    case MD_COMP_DIFF0:
509
0
        block_mi->comp_group_idx            = 1;
510
0
        block_mi->compound_idx              = 1;
511
0
        block_mi->interinter_comp.mask_type = 55;
512
0
        svt_aom_search_compound_diff_wedge(pcs, ctx, cand);
513
0
        break;
514
0
    case MD_COMP_WEDGE:
515
0
        block_mi->comp_group_idx = 1;
516
0
        block_mi->compound_idx   = 1;
517
0
        svt_aom_search_compound_diff_wedge(pcs, ctx, cand);
518
0
        break;
519
0
    default:
520
0
        SVT_ERROR("not used comp type\n");
521
0
        assert(0);
522
0
        break;
523
0
    }
524
0
}
525
526
void svt_aom_choose_best_av1_mv_pred(ModeDecisionContext* ctx, MvReferenceFrame ref_frame,
527
                                     PredictionMode mode, // NEW or NEW_NEW
528
                                     Mv mv0, Mv mv1,
529
                                     uint8_t* bestDrlIndex, // output
530
                                     Mv       best_pred_mv[2] // output
531
0
) {
532
0
    if (ctx->shut_fast_rate) {
533
0
        return;
534
0
    }
535
0
    if (ctx->approx_inter_rate > 1) {
536
0
        *bestDrlIndex   = 0;
537
0
        best_pred_mv[0] = ctx->ref_mv_stack[ref_frame][0].this_mv;
538
0
        best_pred_mv[1] = ctx->ref_mv_stack[ref_frame][0].comp_mv;
539
0
        return;
540
0
    }
541
0
    int16_t mv0x = mv0.x;
542
0
    int16_t mv0y = mv0.y;
543
0
    int16_t mv1x = mv1.x;
544
0
    int16_t mv1y = mv1.y;
545
546
0
    uint8_t is_compound = is_inter_compound_mode(mode);
547
548
0
    struct MdRateEstimationContext* md_rate_est_ctx = ctx->md_rate_est_ctx;
549
0
    BlkStruct*                      blk_ptr         = ctx->blk_ptr;
550
0
    uint8_t                         max_drl_index;
551
0
    Mv                              nearestmv[2] = {{{0}}, {{0}}};
552
0
    Mv                              nearmv[2];
553
0
    Mv                              ref_mv[2];
554
0
    Mv                              mv;
555
556
0
    max_drl_index = svt_aom_get_max_drl_index(blk_ptr->av1xd->ref_mv_count[ref_frame], mode);
557
    // max_drl_index = 1;
558
559
0
    if (max_drl_index == 1) {
560
0
        *bestDrlIndex = 0;
561
562
0
        best_pred_mv[0] = ctx->ref_mv_stack[ref_frame][0].this_mv;
563
0
        best_pred_mv[1] = ctx->ref_mv_stack[ref_frame][0].comp_mv;
564
0
    } else {
565
0
        uint8_t  drli;
566
0
        uint32_t best_mv_cost = 0xFFFFFFFF;
567
0
        for (drli = 0; drli < max_drl_index; drli++) {
568
0
            svt_aom_get_av1_mv_pred_drl(ctx, blk_ptr, ref_frame, is_compound, mode, drli, nearestmv, nearmv, ref_mv);
569
570
            //compute the rate for this drli Cand
571
0
            mv.y             = mv0y;
572
0
            mv.x             = mv0x;
573
0
            uint32_t mv_rate = 0;
574
0
            if (ctx->approx_inter_rate) {
575
0
                mv_rate = (uint32_t)svt_av1_mv_bit_cost_light(&mv, &(ref_mv[0]));
576
0
            } else {
577
0
                mv_rate = (uint32_t)svt_av1_mv_bit_cost(
578
0
                    &mv, &(ref_mv[0]), md_rate_est_ctx->nmv_vec_cost, md_rate_est_ctx->nmvcoststack, MV_COST_WEIGHT);
579
0
            }
580
581
0
            if (is_compound) {
582
0
                mv.y = mv1y;
583
0
                mv.x = mv1x;
584
0
                if (ctx->approx_inter_rate) {
585
0
                    mv_rate += (uint32_t)svt_av1_mv_bit_cost_light(&mv, &(ref_mv[1]));
586
0
                } else {
587
0
                    mv_rate += (uint32_t)svt_av1_mv_bit_cost(&mv,
588
0
                                                             &(ref_mv[1]),
589
0
                                                             md_rate_est_ctx->nmv_vec_cost,
590
0
                                                             md_rate_est_ctx->nmvcoststack,
591
0
                                                             MV_COST_WEIGHT);
592
0
                }
593
0
            }
594
595
0
            const int32_t new_mv = (mode == NEWMV || mode == NEW_NEWMV);
596
0
            if (new_mv) {
597
0
                int32_t idx;
598
0
                for (idx = 0; idx < 2; ++idx) {
599
0
                    if (blk_ptr->av1xd->ref_mv_count[ref_frame] > idx + 1) {
600
0
                        uint8_t drl_1_ctx = av1_drl_ctx(&(ctx->ref_mv_stack[ref_frame][0]), idx);
601
0
                        mv_rate += ctx->md_rate_est_ctx->drl_mode_fac_bits[drl_1_ctx][drli != idx];
602
0
                        if (drli == idx) {
603
0
                            break;
604
0
                        }
605
0
                    }
606
0
                }
607
0
            }
608
609
0
            if (mv_rate < best_mv_cost) {
610
0
                best_mv_cost    = mv_rate;
611
0
                *bestDrlIndex   = drli;
612
0
                best_pred_mv[0] = ref_mv[0];
613
0
                best_pred_mv[1] = ref_mv[1];
614
0
            }
615
0
        }
616
0
    }
617
0
}
618
619
14.8k
static void mode_decision_cand_bf_dctor(EbPtr p) {
620
14.8k
    ModeDecisionCandidateBuffer* obj = (ModeDecisionCandidateBuffer*)p;
621
14.8k
    EB_DELETE(obj->pred);
622
14.8k
    EB_DELETE(obj->rec_coeff);
623
14.8k
    EB_DELETE(obj->quant);
624
14.8k
}
625
626
2.96k
static void mode_decision_scratch_cand_bf_dctor(EbPtr p) {
627
2.96k
    ModeDecisionCandidateBuffer* obj = (ModeDecisionCandidateBuffer*)p;
628
2.96k
    EB_DELETE(obj->pred);
629
2.96k
    EB_DELETE(obj->residual);
630
2.96k
    EB_DELETE(obj->rec_coeff);
631
2.96k
    EB_DELETE(obj->recon);
632
2.96k
    EB_DELETE(obj->quant);
633
2.96k
}
634
635
/***************************************
636
* Mode Decision Candidate Ctor
637
***************************************/
638
EbErrorType svt_aom_mode_decision_cand_bf_ctor(ModeDecisionCandidateBuffer* buffer_ptr, EbBitDepth max_bitdepth,
639
                                               uint8_t sb_size, uint32_t buffer_desc_mask,
640
                                               EbPictureBufferDesc* temp_residual, EbPictureBufferDesc* temp_recon_ptr,
641
14.8k
                                               uint64_t* fast_cost, uint64_t* full_cost, uint64_t* full_cost_ssim) {
642
14.8k
    EbPictureBufferDescInitData picture_buffer_desc_init_data;
643
644
14.8k
    EbPictureBufferDescInitData thirty_two_width_picture_buffer_desc_init_data;
645
646
14.8k
    buffer_ptr->dctor = mode_decision_cand_bf_dctor;
647
648
    // Init Picture Data
649
14.8k
    picture_buffer_desc_init_data.max_width          = sb_size;
650
14.8k
    picture_buffer_desc_init_data.max_height         = sb_size;
651
14.8k
    picture_buffer_desc_init_data.bit_depth          = max_bitdepth;
652
14.8k
    picture_buffer_desc_init_data.color_format       = EB_YUV420;
653
14.8k
    picture_buffer_desc_init_data.buffer_enable_mask = buffer_desc_mask;
654
14.8k
    picture_buffer_desc_init_data.border             = 0;
655
14.8k
    picture_buffer_desc_init_data.split_mode         = false;
656
14.8k
    picture_buffer_desc_init_data.is_16bit_pipeline  = max_bitdepth > EB_EIGHT_BIT;
657
658
14.8k
    thirty_two_width_picture_buffer_desc_init_data.max_width          = sb_size;
659
14.8k
    thirty_two_width_picture_buffer_desc_init_data.max_height         = sb_size;
660
14.8k
    thirty_two_width_picture_buffer_desc_init_data.bit_depth          = EB_THIRTYTWO_BIT;
661
14.8k
    thirty_two_width_picture_buffer_desc_init_data.color_format       = EB_YUV420;
662
14.8k
    thirty_two_width_picture_buffer_desc_init_data.buffer_enable_mask = buffer_desc_mask;
663
14.8k
    thirty_two_width_picture_buffer_desc_init_data.border             = 0;
664
14.8k
    thirty_two_width_picture_buffer_desc_init_data.split_mode         = false;
665
14.8k
    thirty_two_width_picture_buffer_desc_init_data.is_16bit_pipeline  = true;
666
667
    // Candidate Ptr
668
14.8k
    buffer_ptr->cand = NULL;
669
670
    // Video Buffers
671
14.8k
    EB_NEW(buffer_ptr->pred, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data);
672
    // Reuse the residual_ptr memory in MD context
673
14.8k
    buffer_ptr->residual = temp_residual;
674
14.8k
    EB_NEW(buffer_ptr->rec_coeff, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data);
675
14.8k
    EB_NEW(buffer_ptr->quant, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data);
676
    // Reuse the recon_ptr memory in MD context
677
14.8k
    buffer_ptr->recon = temp_recon_ptr;
678
679
    // Costs
680
14.8k
    buffer_ptr->fast_cost      = fast_cost;
681
14.8k
    buffer_ptr->full_cost      = full_cost;
682
14.8k
    buffer_ptr->full_cost_ssim = full_cost_ssim;
683
14.8k
    return EB_ErrorNone;
684
14.8k
}
685
686
EbErrorType svt_aom_mode_decision_scratch_cand_bf_ctor(ModeDecisionCandidateBuffer* buffer_ptr, uint8_t sb_size,
687
2.96k
                                                       EbBitDepth max_bitdepth) {
688
2.96k
    EbPictureBufferDescInitData picture_buffer_desc_init_data;
689
2.96k
    EbPictureBufferDescInitData double_width_picture_buffer_desc_init_data;
690
2.96k
    EbPictureBufferDescInitData thirty_two_width_picture_buffer_desc_init_data;
691
692
2.96k
    buffer_ptr->dctor = mode_decision_scratch_cand_bf_dctor;
693
694
    // Init Picture Data
695
2.96k
    picture_buffer_desc_init_data.max_width                           = sb_size;
696
2.96k
    picture_buffer_desc_init_data.max_height                          = sb_size;
697
2.96k
    picture_buffer_desc_init_data.bit_depth                           = max_bitdepth;
698
2.96k
    picture_buffer_desc_init_data.color_format                        = EB_YUV420;
699
2.96k
    picture_buffer_desc_init_data.buffer_enable_mask                  = PICTURE_BUFFER_DESC_FULL_MASK;
700
2.96k
    picture_buffer_desc_init_data.border                              = 0;
701
2.96k
    picture_buffer_desc_init_data.split_mode                          = false;
702
2.96k
    picture_buffer_desc_init_data.is_16bit_pipeline                   = max_bitdepth > EB_EIGHT_BIT;
703
2.96k
    double_width_picture_buffer_desc_init_data.max_width              = sb_size;
704
2.96k
    double_width_picture_buffer_desc_init_data.max_height             = sb_size;
705
2.96k
    double_width_picture_buffer_desc_init_data.bit_depth              = EB_SIXTEEN_BIT;
706
2.96k
    double_width_picture_buffer_desc_init_data.color_format           = EB_YUV420;
707
2.96k
    double_width_picture_buffer_desc_init_data.buffer_enable_mask     = PICTURE_BUFFER_DESC_FULL_MASK;
708
2.96k
    double_width_picture_buffer_desc_init_data.border                 = 0;
709
2.96k
    double_width_picture_buffer_desc_init_data.split_mode             = false;
710
2.96k
    double_width_picture_buffer_desc_init_data.is_16bit_pipeline      = true;
711
2.96k
    thirty_two_width_picture_buffer_desc_init_data.max_width          = sb_size;
712
2.96k
    thirty_two_width_picture_buffer_desc_init_data.max_height         = sb_size;
713
2.96k
    thirty_two_width_picture_buffer_desc_init_data.bit_depth          = EB_THIRTYTWO_BIT;
714
2.96k
    thirty_two_width_picture_buffer_desc_init_data.color_format       = EB_YUV420;
715
2.96k
    thirty_two_width_picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK;
716
2.96k
    thirty_two_width_picture_buffer_desc_init_data.border             = 0;
717
2.96k
    thirty_two_width_picture_buffer_desc_init_data.split_mode         = false;
718
2.96k
    thirty_two_width_picture_buffer_desc_init_data.is_16bit_pipeline  = true;
719
720
    // Candidate Ptr
721
2.96k
    buffer_ptr->cand = NULL;
722
723
    // Video Buffers
724
2.96k
    EB_NEW(buffer_ptr->pred, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data);
725
2.96k
    EB_NEW(buffer_ptr->residual, svt_picture_buffer_desc_ctor, (EbPtr)&double_width_picture_buffer_desc_init_data);
726
2.96k
    EB_NEW(buffer_ptr->rec_coeff, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data);
727
2.96k
    EB_NEW(buffer_ptr->quant, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data);
728
729
2.96k
    EB_NEW(buffer_ptr->recon, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data);
730
2.96k
    return EB_ErrorNone;
731
2.96k
}
732
733
/***************************************
734
* return true if the MV candidate is already injected
735
***************************************/
736
0
static bool mv_is_already_injected(ModeDecisionContext* ctx, Mv mv0, Mv mv1, uint8_t ref_type) {
737
0
    MvReferenceFrame rf[2];
738
0
    av1_set_ref_frame(rf, ref_type);
739
740
    // Unipred Candidate
741
0
    if (rf[1] <= INTRA_FRAME) {
742
        // First check the validity of the candidate MV, and exit if invalid MV
743
0
        if (ctx->corrupted_mv_check && !check_mv_validity(mv0.x, mv0.y, 0)) {
744
0
            return true;
745
0
        }
746
747
0
        for (int cand_idx = 0; cand_idx < ctx->injected_mv_count; cand_idx++) {
748
0
            if (ctx->injected_ref_types[cand_idx] == ref_type && ctx->injected_mvs[cand_idx][0].as_int == mv0.as_int) {
749
0
                return true;
750
0
            }
751
0
        }
752
0
    } else { // Bipred Candidate
753
        // First check the validity of the candidate MV, and exit if invalid MV
754
0
        if (ctx->corrupted_mv_check && (!check_mv_validity(mv0.x, mv0.y, 0) || !check_mv_validity(mv1.x, mv1.y, 0))) {
755
0
            return true;
756
0
        }
757
758
0
        RedundantCandCtrls* redund_ctrls = &ctx->cand_reduction_ctrls.redundant_cand_ctrls;
759
0
        if (redund_ctrls->score_th) {
760
0
            uint8_t is_high_mag = (ABS(mv0.x) > redund_ctrls->mag_th) && (ABS(mv0.y) > redund_ctrls->mag_th) &&
761
0
                (ABS(mv1.x) > redund_ctrls->mag_th) && (ABS(mv1.y) > redund_ctrls->mag_th);
762
0
            for (int cand_idx = 0; cand_idx < ctx->injected_mv_count; cand_idx++) {
763
0
                if (ctx->injected_ref_types[cand_idx] == ref_type) {
764
0
                    int score = ABS(ctx->injected_mvs[cand_idx][0].x - mv0.x) +
765
0
                        ABS(ctx->injected_mvs[cand_idx][0].y - mv0.y) + ABS(ctx->injected_mvs[cand_idx][1].x - mv1.x) +
766
0
                        ABS(ctx->injected_mvs[cand_idx][1].y - mv1.y);
767
768
0
                    if (score == 0 || (score < redund_ctrls->score_th && is_high_mag)) {
769
0
                        return true;
770
0
                    }
771
0
                }
772
0
            }
773
0
        } else {
774
0
            for (int cand_idx = 0; cand_idx < ctx->injected_mv_count; cand_idx++) {
775
0
                if (ctx->injected_ref_types[cand_idx] == ref_type &&
776
0
                    ctx->injected_mvs[cand_idx][0].as_int == mv0.as_int &&
777
0
                    ctx->injected_mvs[cand_idx][1].as_int == mv1.as_int) {
778
0
                    return true;
779
0
                }
780
0
            }
781
0
        }
782
0
    }
783
0
    return false;
784
0
}
785
786
bool svt_aom_is_valid_unipred_ref(ModeDecisionContext* ctx, uint8_t inter_cand_group, uint8_t list_idx,
787
0
                                  uint8_t ref_idx) {
788
0
    if (!ctx->ref_pruning_ctrls.enabled) {
789
0
        return true;
790
0
    }
791
0
    if (!ctx->ref_filtering_res[inter_cand_group][list_idx][ref_idx].do_ref &&
792
0
        (ref_idx || !ctx->ref_pruning_ctrls.closest_refs[inter_cand_group])) {
793
0
        return false;
794
0
    } else {
795
0
        return true;
796
0
    }
797
0
}
798
799
// Determine if the MV-to-MVP difference satisfies the mv_diff restriction
800
0
static bool is_valid_mv_diff(Mv best_pred_mv[2], Mv mv0, Mv mv1, uint8_t is_compound) {
801
0
    const uint8_t mv_diff_max_bit = MV_IN_USE_BITS;
802
803
0
    if (abs(mv0.x - best_pred_mv[0].x) > (1 << mv_diff_max_bit) ||
804
0
        abs(mv0.y - best_pred_mv[0].y) > (1 << mv_diff_max_bit)) {
805
0
        return false;
806
0
    }
807
808
0
    if (is_compound) {
809
0
        if (abs(mv1.x - best_pred_mv[1].x) > (1 << mv_diff_max_bit) ||
810
0
            abs(mv1.y - best_pred_mv[1].y) > (1 << mv_diff_max_bit)) {
811
0
            return false;
812
0
        }
813
0
    }
814
0
    return true;
815
0
}
816
817
static bool is_valid_bipred_ref(ModeDecisionContext* ctx, uint8_t inter_cand_group, uint8_t list_idx_0,
818
0
                                uint8_t ref_idx_0, uint8_t list_idx_1, uint8_t ref_idx_1) {
819
0
    if (!ctx->ref_pruning_ctrls.enabled) {
820
0
        return true;
821
0
    }
822
    // Both ref should be 1 for bipred refs to be valid: if 1 is not best_refs then there is a chance to exit the injection
823
0
    if (!ctx->ref_filtering_res[inter_cand_group][list_idx_0][ref_idx_0].do_ref ||
824
0
        !ctx->ref_filtering_res[inter_cand_group][list_idx_1][ref_idx_1].do_ref) {
825
        // Check whether we should check the closest, if no then there no need to move forward and return false
826
0
        if (!ctx->ref_pruning_ctrls.closest_refs[inter_cand_group]) {
827
0
            return false;
828
0
        }
829
830
        // Else check if ref are LAST and BWD, if not then return false
831
0
        if (ref_idx_0 || ref_idx_1) {
832
0
            return false;
833
0
        }
834
0
    }
835
0
    return true;
836
0
}
837
838
0
#define BIPRED_3x3_REFINMENT_POSITIONS 8
839
840
static int8_t allow_refinement_flag[BIPRED_3x3_REFINMENT_POSITIONS] = {1, 0, 1, 0, 1, 0, 1, 0};
841
static int8_t bipred_3x3_x_pos[BIPRED_3x3_REFINMENT_POSITIONS]      = {-1, -1, 0, 1, 1, 1, 0, -1};
842
static int8_t bipred_3x3_y_pos[BIPRED_3x3_REFINMENT_POSITIONS]      = {0, 1, 1, 1, 0, -1, -1, -1};
843
844
127k
static INLINE uint8_t is_dc_only_safe(PictureControlSet* pcs, ModeDecisionContext* ctx) {
845
    // Early exit if pruning not enabled, SB-128, NSQ, or 4x4 (no variance available)
846
127k
    if (!ctx->intra_ctrls.prune_using_edge_info || pcs->scs->super_block_size == 128 || ctx->shape != PART_N ||
847
127k
        ctx->blk_geom->sq_size == 4) {
848
0
        return 0;
849
0
    }
850
851
    // Block variance lookup
852
127k
    int            blk_idx;
853
127k
    int            sub_idx[4];
854
127k
    const Position blk_org = {.x = ctx->blk_org_x - ctx->sb_origin_x, .y = ctx->blk_org_y - ctx->sb_origin_y};
855
127k
    svt_aom_get_blk_var_map(ctx->blk_geom->sq_size, blk_org.x, blk_org.y, &blk_idx, sub_idx);
856
857
127k
    uint16_t* sb_var  = pcs->ppcs->variance[ctx->sb_index];
858
127k
    uint32_t  blk_var = sb_var[blk_idx];
859
860
    // For 8x8, we do not have 4x4 sub-variance, skip spread check
861
127k
    if (ctx->blk_geom->sq_size == 8) {
862
123k
        return (blk_var < 2000);
863
123k
    }
864
865
    // For 16x16 and above, compute spread from sub-blocks
866
3.81k
    uint32_t min_var = UINT32_MAX;
867
3.81k
    uint32_t max_var = 0;
868
869
22.7k
    for (int i = 0; i < 4; i++) {
870
18.9k
        uint32_t v = sb_var[sub_idx[i]];
871
18.9k
        min_var    = MIN(min_var, v);
872
18.9k
        max_var    = MAX(max_var, v);
873
18.9k
    }
874
875
3.81k
    uint32_t spread_var = max_var - min_var;
876
877
4.72k
    return (blk_var < 2000 && spread_var < 4000);
878
127k
}
879
880
// Inject inter-intra, WM, OBMC for unipred simple-trans candidate
881
//
882
// total_cand_count is the index to ctx->fast_cand_array for the next candidate injected (which is the
883
// same as the number of candidates injected so far).  It is assumed the simple-trans candidate to base
884
// the other candidtes on is the previously injected candidate (at index total_cand_count - 1).
885
//
886
// enable_ii, enable_wm, and enable_obmc allow the caller to disable some modes explicitly; if enabled, the
887
// mode will be injected if the block size/candidate type supports the mode. The enable signals are left as
888
// arguments because some candidates do not inject all modes (e.g. unipred does not inject WM/OBMC).
889
static void inj_non_simple_modes(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* total_cand_count,
890
0
                                 const bool enable_ii, const bool enable_wm, const bool enable_obmc) {
891
    // index of simple translation candidate (to be used to copy cand info for other modes)
892
    // assumes the simple trans cand is the previously injected candidate
893
0
    const uint32_t                     simple_trans_cand_idx = *total_cand_count - 1;
894
0
    const ModeDecisionCandidate* const simple_trans_cand     = &ctx->fast_cand_array[simple_trans_cand_idx];
895
896
    // The candidate count to be used to track number of inj cands, and the index of fast_cand_array for new candidates
897
0
    uint32_t cand_count = *total_cand_count;
898
899
0
    assert(simple_trans_cand->block_mi.ref_frame[1] == NONE_FRAME);
900
0
    const uint8_t list_idx = get_list_idx(simple_trans_cand->block_mi.ref_frame[0]);
901
0
    const uint8_t ref_idx  = get_ref_frame_idx(simple_trans_cand->block_mi.ref_frame[0]);
902
903
    // INJECT INTER-INTRA
904
0
    const uint8_t is_ii_allowed = svt_aom_is_valid_unipred_ref(ctx, INTER_INTRA_GROUP, list_idx, ref_idx) &&
905
0
        svt_is_interintra_allowed(ctx->inter_intra_comp_ctrls.enabled,
906
0
                                  ctx->blk_geom->bsize,
907
0
                                  simple_trans_cand->block_mi.mode,
908
0
                                  simple_trans_cand->block_mi.ref_frame);
909
0
    if (enable_ii && is_ii_allowed) {
910
0
        ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_count];
911
0
        svt_memcpy(cand, simple_trans_cand, sizeof(ModeDecisionCandidate));
912
913
0
        inter_intra_search(pcs, ctx, cand);
914
0
        cand->block_mi.is_interintra_used = 1;
915
0
        cand->block_mi.ref_frame[1]       = INTRA_FRAME;
916
0
        const InterIntraMode ii_mode      = cand->block_mi.interintra_mode;
917
0
        INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count);
918
919
        // if ii_wedge_mode is 1, then inject wedge/non-wedge as separate candidates; OW, only inject the best (above)
920
0
        const uint8_t ii_wedge_mode = ctx->shape == PART_N ? ctx->inter_intra_comp_ctrls.wedge_mode_sq
921
0
                                                           : ctx->inter_intra_comp_ctrls.wedge_mode_nsq;
922
0
        if (ii_wedge_mode == 1) {
923
0
            cand = &ctx->fast_cand_array[cand_count];
924
0
            svt_memcpy(cand, simple_trans_cand, sizeof(ModeDecisionCandidate));
925
926
0
            cand->block_mi.is_interintra_used   = 1;
927
0
            cand->block_mi.ref_frame[1]         = INTRA_FRAME;
928
0
            cand->block_mi.interintra_mode      = ii_mode;
929
0
            cand->block_mi.use_wedge_interintra = 0;
930
0
            INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count);
931
0
        }
932
0
    }
933
934
0
#if CONFIG_ENABLE_OBMC
935
    // INJECT WARP
936
0
    const uint8_t is_warp_allowed = warped_motion_mode_allowed(pcs, ctx) &&
937
0
        svt_aom_is_valid_unipred_ref(ctx, WARP_GROUP, list_idx, ref_idx);
938
0
    if (enable_wm && is_warp_allowed) {
939
0
        ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_count];
940
0
        svt_memcpy(cand, simple_trans_cand, sizeof(ModeDecisionCandidate));
941
942
0
        cand->block_mi.is_interintra_used = 0;
943
0
        cand->block_mi.motion_mode        = WARPED_CAUSAL;
944
0
        cand->wm_params_l0.wmtype         = AFFINE;
945
946
0
        uint8_t motion_mode_valid = 1;
947
0
        if (cand->block_mi.mode == NEWMV && ctx->wm_ctrls.refinement_iterations && ctx->wm_ctrls.refine_level == 0) {
948
            // Perform refinement; if refinement is off, then MV is valid, since it's been checked above
949
0
            motion_mode_valid = svt_aom_wm_motion_refinement(pcs, ctx, cand, 0);
950
0
        }
951
952
0
        if (motion_mode_valid) {
953
0
            motion_mode_valid = svt_aom_warped_motion_parameters(ctx,
954
0
                                                                 cand->block_mi.mv[0],
955
0
                                                                 ctx->blk_geom,
956
0
                                                                 cand->block_mi.ref_frame[0],
957
0
                                                                 &cand->wm_params_l0,
958
0
                                                                 &cand->block_mi.num_proj_ref,
959
0
                                                                 ctx->wm_ctrls.lower_band_th,
960
0
                                                                 ctx->wm_ctrls.upper_band_th,
961
0
                                                                 0);
962
0
        }
963
964
0
        if (motion_mode_valid) {
965
0
            INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count);
966
0
        }
967
0
    }
968
969
    // INJECT OBMC
970
0
    const uint8_t is_obmc_allowed = svt_aom_is_valid_unipred_ref(ctx, OBMC_GROUP, list_idx, ref_idx) &&
971
0
        (svt_aom_obmc_motion_mode_allowed(pcs,
972
0
                                          ctx,
973
0
                                          ctx->blk_geom->bsize,
974
0
                                          0,
975
0
                                          simple_trans_cand->block_mi.ref_frame[0],
976
0
                                          simple_trans_cand->block_mi.ref_frame[1],
977
0
                                          simple_trans_cand->block_mi.mode) == OBMC_CAUSAL);
978
0
    if (enable_obmc && is_obmc_allowed) {
979
0
        ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_count];
980
0
        svt_memcpy(cand, simple_trans_cand, sizeof(ModeDecisionCandidate));
981
982
0
        cand->block_mi.is_interintra_used = 0;
983
0
        cand->block_mi.motion_mode        = OBMC_CAUSAL;
984
985
0
        uint8_t motion_mode_valid = 1;
986
0
        if (cand->block_mi.mode == NEWMV && ctx->obmc_ctrls.refine_level == 0) {
987
0
            assert(cand->block_mi.ref_frame[1] == NONE_FRAME);
988
0
            motion_mode_valid = svt_aom_obmc_motion_refinement(pcs, ctx, cand, ctx->obmc_ctrls.refine_level);
989
0
        }
990
991
0
        if (motion_mode_valid) {
992
0
            INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count);
993
0
        }
994
0
    }
995
#else
996
    UNUSED(enable_wm);
997
    UNUSED(enable_obmc);
998
#endif // CONFIG_ENABLE_OBMC
999
1000
0
    *total_cand_count = cand_count;
1001
0
}
1002
1003
// Determines if inter MVP compound modes should be skipped based on info from neighbouring blocks/ref frame types.
1004
0
static bool skip_compound_on_ref_types(ModeDecisionContext* ctx, MvReferenceFrame rf[2]) {
1005
0
    if (!ctx->inter_comp_ctrls.skip_on_ref_info) {
1006
0
        return false;
1007
0
    }
1008
1009
0
    MacroBlockD* xd = ctx->blk_ptr->av1xd;
1010
1011
    // If both references are from the same list, skip compound
1012
0
    const uint8_t list_idx_0 = get_list_idx(rf[0]);
1013
0
    const uint8_t list_idx_1 = get_list_idx(rf[1]);
1014
0
    if (list_idx_0 == list_idx_1) {
1015
0
        return true;
1016
0
    }
1017
1018
    // Skip compound unless neighbours selected the ref frames
1019
0
    bool skip_comp = true;
1020
0
    if (!xd->left_available && !xd->up_available) {
1021
0
        return false;
1022
0
    }
1023
1024
0
    if (xd->left_available) {
1025
0
        const BlockModeInfo* const left_mi = &xd->left_mbmi->block_mi;
1026
0
        if ((is_inter_singleref_mode(left_mi->mode) &&
1027
0
             (left_mi->ref_frame[0] == rf[0] || left_mi->ref_frame[0] == rf[1])) ||
1028
0
            (is_inter_compound_mode(left_mi->mode) &&
1029
0
             (left_mi->ref_frame[0] == rf[0] && left_mi->ref_frame[1] == rf[1]))) {
1030
0
            return false;
1031
0
        }
1032
0
    }
1033
0
    if (xd->up_available) {
1034
0
        const BlockModeInfo* const above_mi = &xd->above_mbmi->block_mi;
1035
0
        if ((is_inter_singleref_mode(above_mi->mode) &&
1036
0
             (above_mi->ref_frame[0] == rf[0] || above_mi->ref_frame[0] == rf[1])) ||
1037
0
            (is_inter_compound_mode(above_mi->mode) &&
1038
0
             (above_mi->ref_frame[0] == rf[0] && above_mi->ref_frame[1] == rf[1]))) {
1039
0
            return false;
1040
0
        }
1041
0
    }
1042
1043
0
    return skip_comp;
1044
0
}
1045
1046
// Inject inter-inter compound types (DIST, DIFF, WEDGE) for a bipred AVG candidate
1047
//
1048
// total_cand_count is the index to ctx->fast_cand_array for the next candidate injected (which is the
1049
// same as the number of candidates injected so far).  It is assumed the AVG candidate to base
1050
// the other candidtes on is the previously injected candidate (at index total_cand_count - 1).
1051
0
static void inj_comp_modes(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* total_cand_count) {
1052
    // index of MD_COMP_AVG candidate (to be used to copy cand info for other modes)
1053
    // assumes the avg cand is the previously injected candidate
1054
0
    const uint32_t         avg_cand_idx = *total_cand_count - 1;
1055
0
    ModeDecisionCandidate* avg_cand     = &ctx->fast_cand_array[avg_cand_idx];
1056
1057
    // Get allowable compound types based on settings and block size
1058
0
    MD_COMP_TYPE tot_comp_types = get_tot_comp_types_bsize(ctx->inter_comp_ctrls.tot_comp_types, ctx->blk_geom->bsize);
1059
0
    if (tot_comp_types == MD_COMP_DIST) {
1060
0
        return;
1061
0
    }
1062
1063
    // Distortion-based ref pruning for compound types
1064
0
    const uint8_t ref_idx_0  = get_ref_frame_idx(avg_cand->block_mi.ref_frame[0]);
1065
0
    const uint8_t ref_idx_1  = get_ref_frame_idx(avg_cand->block_mi.ref_frame[1]);
1066
0
    const uint8_t list_idx_0 = get_list_idx(avg_cand->block_mi.ref_frame[0]);
1067
0
    const uint8_t list_idx_1 = get_list_idx(avg_cand->block_mi.ref_frame[1]);
1068
0
    if (!is_valid_bipred_ref(ctx, INTER_COMP_GROUP, list_idx_0, ref_idx_0, list_idx_1, ref_idx_1)) {
1069
0
        return;
1070
0
    }
1071
1072
    // Skip compound on neighbour info
1073
0
    if (skip_compound_on_ref_types(ctx, avg_cand->block_mi.ref_frame)) {
1074
0
        return;
1075
0
    }
1076
1077
    // Skip compound on MV length
1078
0
    if (ctx->inter_comp_ctrls.max_mv_length) {
1079
0
        const uint16_t max_mv_length = ctx->inter_comp_ctrls.max_mv_length;
1080
0
        if (abs(avg_cand->block_mi.mv[0].x) > max_mv_length || abs(avg_cand->block_mi.mv[0].y) > max_mv_length ||
1081
0
            abs(avg_cand->block_mi.mv[1].x) > max_mv_length || abs(avg_cand->block_mi.mv[1].y) > max_mv_length) {
1082
0
            return;
1083
0
        }
1084
0
    }
1085
    // If compound modes are to be tested for this block, generate the buffers that will be used in the DIFF/WEDGE search.
1086
    // Even if DIFF/WEDGE are not used, still call the function because it is needed for pred0_to_pred1_mult to work.
1087
0
    if (tot_comp_types > MD_COMP_DIST) {
1088
0
        if (svt_aom_calc_pred_masked_compound(pcs, ctx, avg_cand)) {
1089
0
            return;
1090
0
        }
1091
0
    }
1092
1093
    // The candidate count to be used to track number of inj cands, and the index of fast_cand_array for new candidates
1094
0
    uint32_t cand_count = *total_cand_count;
1095
0
    for (MD_COMP_TYPE cur_type = MD_COMP_DIST; cur_type < tot_comp_types; cur_type++) {
1096
0
        if (ctx->inter_comp_ctrls.no_sym_dist && cur_type == MD_COMP_DIST && ref_idx_0 == 0 && ref_idx_1 == 0) {
1097
0
            continue;
1098
0
        }
1099
0
        ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_count];
1100
0
        svt_memcpy(cand, &ctx->fast_cand_array[avg_cand_idx], sizeof(ModeDecisionCandidate));
1101
0
        cand->skip_mode_allowed = false;
1102
0
        determine_compound_mode(pcs, ctx, cand, cur_type);
1103
0
        INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count);
1104
0
    }
1105
0
    *total_cand_count = cand_count;
1106
0
}
1107
1108
static void unipred_3x3_candidates_injection(PictureControlSet* pcs, ModeDecisionContext* ctx,
1109
0
                                             uint32_t* candidate_total_cnt) {
1110
0
    uint32_t               cand_total_cnt          = (*candidate_total_cnt);
1111
0
    const uint8_t          allow_high_precision_mv = pcs->ppcs->frm_hdr.allow_high_precision_mv;
1112
0
    MeSbResults*           me_results              = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr];
1113
0
    const uint8_t          total_me_cnt            = me_results->total_me_candidate_index[ctx->me_block_offset];
1114
0
    const MeCandidate*     me_block_results        = &me_results->me_candidate_array[ctx->me_cand_offset];
1115
0
    ModeDecisionCandidate* cand_array              = ctx->fast_cand_array;
1116
1117
    // (8 Best_L0 neighbors)
1118
0
    for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) {
1119
0
        const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index];
1120
0
        const uint8_t      inter_direction      = me_block_results_ptr->direction;
1121
0
        const uint8_t      list0_ref_index      = me_block_results_ptr->ref_idx_l0;
1122
0
        const uint8_t      list1_ref_index      = me_block_results_ptr->ref_idx_l1;
1123
0
        if (inter_direction == BI_PRED) {
1124
0
            continue;
1125
0
        }
1126
0
        assert(inter_direction == 0 || inter_direction == 1);
1127
0
        const uint8_t list_idx = inter_direction;
1128
0
        const uint8_t ref_idx  = list_idx == REF_LIST_0 ? list0_ref_index : list1_ref_index;
1129
0
        if (!svt_aom_is_valid_unipred_ref(ctx, MIN(TOT_INTER_GROUP - 1, UNI_3x3_GROUP), list_idx, ref_idx)) {
1130
0
            continue;
1131
0
        }
1132
0
        for (int unipred_index = 0; unipred_index < BIPRED_3x3_REFINMENT_POSITIONS; ++unipred_index) {
1133
            /**************
1134
            NEWMV L0
1135
            ************* */
1136
0
            if (ctx->unipred3x3_injection >= 2) {
1137
0
                if (allow_refinement_flag[unipred_index] == 0) {
1138
0
                    continue;
1139
0
                }
1140
0
            }
1141
0
            Mv to_inj_mv = ctx->sb_me_mv[list_idx][ref_idx];
1142
0
            to_inj_mv.x += (bipred_3x3_x_pos[unipred_index] << !allow_high_precision_mv);
1143
0
            to_inj_mv.y += (bipred_3x3_y_pos[unipred_index] << !allow_high_precision_mv);
1144
0
            const uint8_t    to_inject_ref_type = svt_get_ref_frame_type(list_idx, ref_idx);
1145
0
            MvReferenceFrame rf[2]              = {to_inject_ref_type, NONE_FRAME};
1146
0
            if ((ctx->injected_mv_count == 0 ||
1147
0
                 mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, to_inject_ref_type) == false)) {
1148
0
                uint8_t drl_index       = 0;
1149
0
                Mv      best_pred_mv[2] = {{{0}}, {{0}}};
1150
0
                svt_aom_choose_best_av1_mv_pred(
1151
0
                    ctx, to_inject_ref_type, NEWMV, to_inj_mv, (Mv){{0}}, &drl_index, best_pred_mv);
1152
0
                if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv, to_inj_mv, 0)) {
1153
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
1154
0
                    cand->block_mi.use_intrabc        = 0;
1155
0
                    cand->skip_mode_allowed           = false;
1156
0
                    cand->block_mi.mode               = NEWMV;
1157
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1158
0
                    cand->block_mi.is_interintra_used = 0;
1159
0
                    cand->drl_index                   = drl_index;
1160
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
1161
0
                    cand->block_mi.ref_frame[0]       = rf[0];
1162
0
                    cand->block_mi.ref_frame[1]       = rf[1];
1163
0
                    cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
1164
0
                    cand->block_mi.num_proj_ref       = ctx->wm_sample_info[to_inject_ref_type].num;
1165
1166
0
                    INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
1167
1168
0
                    const bool enable_ii = true;
1169
                    // OBMC and WM perform a refinement search around the ME MV, so they are not injected as unipred3x3 candidates,
1170
                    // since this is effectively a refinement search
1171
0
                    const bool enable_obmc = false;
1172
0
                    const bool enable_warp = false;
1173
0
                    inj_non_simple_modes(pcs, ctx, &cand_total_cnt, enable_ii, enable_warp, enable_obmc);
1174
1175
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
1176
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
1177
0
                    ++ctx->injected_mv_count;
1178
0
                }
1179
0
            }
1180
0
        }
1181
0
    }
1182
1183
    // update the total number of candidates injected
1184
0
    (*candidate_total_cnt) = cand_total_cnt;
1185
1186
0
    return;
1187
0
}
1188
1189
static void bipred_3x3_candidates_injection(PictureControlSet* pcs, ModeDecisionContext* ctx,
1190
0
                                            uint32_t* candidate_total_cnt) {
1191
0
    uint32_t               cand_total_cnt          = (*candidate_total_cnt);
1192
0
    const uint8_t          allow_high_precision_mv = pcs->ppcs->frm_hdr.allow_high_precision_mv;
1193
0
    const MeSbResults*     me_results              = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr];
1194
0
    const uint8_t          total_me_cnt            = me_results->total_me_candidate_index[ctx->me_block_offset];
1195
0
    const MeCandidate*     me_block_results        = &me_results->me_candidate_array[ctx->me_cand_offset];
1196
0
    ModeDecisionCandidate* cand_array              = ctx->fast_cand_array;
1197
0
    Mv                     best_pred_mv[2]         = {{{0}}, {{0}}};
1198
1199
    /**************
1200
    NEW_NEWMV
1201
    ************* */
1202
0
    for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) {
1203
0
        const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index];
1204
0
        const uint8_t      inter_direction      = me_block_results_ptr->direction;
1205
0
        const uint8_t      list0_ref_index      = me_block_results_ptr->ref_idx_l0;
1206
0
        const uint8_t      list1_ref_index      = me_block_results_ptr->ref_idx_l1;
1207
0
        if (inter_direction < BI_PRED) {
1208
0
            continue;
1209
0
        }
1210
0
        assert(inter_direction == BI_PRED);
1211
1212
0
        const uint8_t ref0_list = me_block_results_ptr->ref0_list;
1213
0
        const uint8_t ref1_list = me_block_results_ptr->ref1_list;
1214
0
        if (!is_valid_bipred_ref(ctx, BI_3x3_GROUP, ref0_list, list0_ref_index, ref1_list, list1_ref_index)) {
1215
0
            continue;
1216
0
        }
1217
1218
0
        int8_t best_list = -1;
1219
0
        int    diff      = ((int)ctx->post_subpel_me_mv_cost[ref0_list][list0_ref_index] -
1220
0
                    (int)ctx->post_subpel_me_mv_cost[ref1_list][list1_ref_index]) *
1221
0
            100;
1222
1223
0
        if (ctx->bipred3x3_ctrls.use_l0_l1_dev != (uint8_t)~0) {
1224
0
            if (abs(diff) >
1225
0
                (ctx->bipred3x3_ctrls.use_l0_l1_dev * (int)ctx->post_subpel_me_mv_cost[ref0_list][list0_ref_index])) {
1226
0
                return;
1227
0
            }
1228
0
        }
1229
1230
        // Best list in terms of distortion reduction
1231
0
        if (ctx->bipred3x3_ctrls.use_best_list) {
1232
0
            best_list = ref0_list;
1233
0
            if (diff > 0) {
1234
0
                best_list = ref1_list;
1235
0
            }
1236
0
        }
1237
1238
0
        MvReferenceFrame rf[2]              = {svt_get_ref_frame_type(ref0_list, list0_ref_index),
1239
0
                                               svt_get_ref_frame_type(ref1_list, list1_ref_index)};
1240
0
        const uint8_t    to_inject_ref_type = av1_ref_frame_type(rf);
1241
0
        if (best_list == -1 || best_list == ref0_list) {
1242
            // (Best_L0, 8 Best_L1 neighbors)
1243
0
            for (uint32_t bipred_index = 0; bipred_index < BIPRED_3x3_REFINMENT_POSITIONS; ++bipred_index) {
1244
0
                if (!ctx->bipred3x3_ctrls.search_diag) {
1245
0
                    if (allow_refinement_flag[bipred_index] == 0) {
1246
0
                        continue;
1247
0
                    }
1248
0
                }
1249
0
                Mv to_inj_mv0 = ctx->sb_me_mv[ref0_list][list0_ref_index];
1250
0
                Mv to_inj_mv1 = ctx->sb_me_mv[ref1_list][list1_ref_index];
1251
0
                to_inj_mv1.x += (bipred_3x3_x_pos[bipred_index] << !allow_high_precision_mv);
1252
0
                to_inj_mv1.y += (bipred_3x3_y_pos[bipred_index] << !allow_high_precision_mv);
1253
0
                if ((ctx->injected_mv_count == 0 ||
1254
0
                     mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) {
1255
0
                    uint8_t drl_index = 0;
1256
0
                    svt_aom_choose_best_av1_mv_pred(
1257
0
                        ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv);
1258
0
                    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) {
1259
0
                        ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
1260
0
                        cand->block_mi.use_intrabc        = 0;
1261
0
                        cand->skip_mode_allowed           = false;
1262
0
                        cand->drl_index                   = drl_index;
1263
0
                        cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1264
0
                        cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1265
0
                        cand->block_mi.mode               = NEW_NEWMV;
1266
0
                        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1267
0
                        cand->block_mi.is_interintra_used = 0;
1268
0
                        cand->block_mi.ref_frame[0]       = rf[0];
1269
0
                        cand->block_mi.ref_frame[1]       = rf[1];
1270
0
                        cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
1271
0
                        cand->pred_mv[1].as_int           = best_pred_mv[1].as_int;
1272
0
                        determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1273
0
                        INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
1274
1275
0
                        if (ctx->inter_comp_ctrls.do_3x3_bi) {
1276
0
                            ctx->cmp_store.pred0_cnt = 0;
1277
0
                            ctx->cmp_store.pred1_cnt = 0;
1278
0
                            inj_comp_modes(pcs, ctx, &cand_total_cnt);
1279
0
                        }
1280
0
                        ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1281
0
                        ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1282
0
                        ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
1283
0
                        ++ctx->injected_mv_count;
1284
0
                    }
1285
0
                }
1286
0
            }
1287
0
        }
1288
0
        if (best_list == -1 || best_list == ref1_list) {
1289
            // (8 Best_L0 neighbors, Best_L1) :
1290
0
            for (uint32_t bipred_index = 0; bipred_index < BIPRED_3x3_REFINMENT_POSITIONS; ++bipred_index) {
1291
0
                if (!ctx->bipred3x3_ctrls.search_diag) {
1292
0
                    if (allow_refinement_flag[bipred_index] == 0) {
1293
0
                        continue;
1294
0
                    }
1295
0
                }
1296
0
                Mv to_inj_mv0 = ctx->sb_me_mv[ref0_list][list0_ref_index];
1297
0
                to_inj_mv0.x += (bipred_3x3_x_pos[bipred_index] << !allow_high_precision_mv);
1298
0
                to_inj_mv0.y += (bipred_3x3_y_pos[bipred_index] << !allow_high_precision_mv);
1299
0
                Mv to_inj_mv1 = ctx->sb_me_mv[ref1_list][list1_ref_index];
1300
0
                if ((ctx->injected_mv_count == 0 ||
1301
0
                     mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) {
1302
0
                    uint8_t drl_index = 0;
1303
0
                    svt_aom_choose_best_av1_mv_pred(
1304
0
                        ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv);
1305
0
                    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) {
1306
0
                        ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
1307
0
                        cand->block_mi.use_intrabc        = 0;
1308
0
                        cand->skip_mode_allowed           = false;
1309
0
                        cand->drl_index                   = drl_index;
1310
0
                        cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1311
0
                        cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1312
0
                        cand->block_mi.mode               = NEW_NEWMV;
1313
0
                        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1314
0
                        cand->block_mi.is_interintra_used = 0;
1315
0
                        cand->block_mi.ref_frame[0]       = rf[0];
1316
0
                        cand->block_mi.ref_frame[1]       = rf[1];
1317
0
                        cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
1318
0
                        cand->pred_mv[1].as_int           = best_pred_mv[1].as_int;
1319
0
                        determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1320
0
                        INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
1321
1322
0
                        if (ctx->inter_comp_ctrls.do_3x3_bi) {
1323
0
                            ctx->cmp_store.pred0_cnt = 0;
1324
0
                            ctx->cmp_store.pred1_cnt = 0;
1325
0
                            inj_comp_modes(pcs, ctx, &cand_total_cnt);
1326
0
                        }
1327
0
                        ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1328
0
                        ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1329
0
                        ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
1330
0
                        ++ctx->injected_mv_count;
1331
0
                    }
1332
0
                }
1333
0
            }
1334
0
        }
1335
0
    }
1336
1337
    // update the total number of candidates injected
1338
0
    (*candidate_total_cnt) = cand_total_cnt;
1339
1340
0
    return;
1341
0
}
1342
1343
/*********************************************************************
1344
**********************************************************************
1345
        Upto 12 inter Candidated injected
1346
        Min 6 inter Candidated injected
1347
UniPred L0 : NEARST         + upto 3x NEAR
1348
UniPred L1 : NEARST         + upto 3x NEAR
1349
BIPred     : NEARST_NEARST  + upto 3x NEAR_NEAR
1350
**********************************************************************
1351
**********************************************************************/
1352
static void inject_mvp_candidates_ii_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candTotCnt,
1353
0
                                               const bool allow_bipred) {
1354
0
    FrameHeader*           frm_hdr    = &pcs->ppcs->frm_hdr;
1355
0
    uint32_t               cand_idx   = *candTotCnt;
1356
0
    ModeDecisionCandidate* cand_array = ctx->fast_cand_array;
1357
0
    MacroBlockD*           xd         = ctx->blk_ptr->av1xd;
1358
1359
    //all of ref pairs: (1)single-ref List0  (2)single-ref List1  (3)compound Bi-Dir List0-List1
1360
0
    for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) {
1361
0
        MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it];
1362
0
        MvReferenceFrame rf[2];
1363
0
        av1_set_ref_frame(rf, ref_pair);
1364
1365
        //single ref/list
1366
0
        if (rf[1] == NONE_FRAME) {
1367
0
            MvReferenceFrame frame_type = rf[0];
1368
0
            uint8_t          list_idx   = get_list_idx(rf[0]);
1369
0
            if (ctx->cand_reduction_ctrls.lpd1_mvp_best_me_list) {
1370
0
                const MeSbResults* me_results           = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr];
1371
0
                const uint8_t      total_me_cnt         = me_results->total_me_candidate_index[ctx->me_block_offset];
1372
0
                const MeCandidate* me_block_results     = &me_results->me_candidate_array[ctx->me_cand_offset];
1373
0
                const MeCandidate* me_block_results_ptr = &me_block_results[0];
1374
0
                const uint8_t      inter_direction      = me_block_results_ptr->direction;
1375
0
                if (total_me_cnt && list_idx != inter_direction) {
1376
0
                    continue;
1377
0
                }
1378
0
            }
1379
            //NEAREST
1380
            // Don't check if MV is already injected b/c NEAREST is the first INTER MV injected
1381
0
            Mv to_inj_mv = {.as_int = ctx->ref_mv_stack[frame_type][0].this_mv.as_int};
1382
1383
0
            ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1384
0
            cand->block_mi.mode               = NEARESTMV;
1385
0
            cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1386
0
            cand->skip_mode_allowed           = false;
1387
0
            cand->drl_index                   = 0;
1388
0
            cand->block_mi.ref_frame[0]       = rf[0];
1389
0
            cand->block_mi.ref_frame[1]       = rf[1];
1390
0
            cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
1391
0
            cand->block_mi.num_proj_ref       = ctx->wm_sample_info[frame_type].num;
1392
0
            cand->block_mi.use_intrabc        = 0;
1393
0
            cand->block_mi.is_interintra_used = 0;
1394
0
            INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1395
1396
0
            ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
1397
0
            ctx->injected_ref_types[ctx->injected_mv_count]     = frame_type;
1398
0
            ++ctx->injected_mv_count;
1399
            //NEAR
1400
0
            const uint8_t max_drl_index     = svt_aom_get_max_drl_index(xd->ref_mv_count[frame_type], NEARMV);
1401
0
            uint8_t       cap_max_drl_index = 0;
1402
0
            if (ctx->cand_reduction_ctrls.near_count_ctrls.enabled) {
1403
0
                cap_max_drl_index = MIN(ctx->cand_reduction_ctrls.near_count_ctrls.near_count, max_drl_index);
1404
0
            }
1405
0
            for (uint8_t drli = 0; drli < cap_max_drl_index; drli++) {
1406
0
                to_inj_mv.as_int = ctx->ref_mv_stack[frame_type][1 + drli].this_mv.as_int;
1407
1408
0
                if ((ctx->injected_mv_count == 0 ||
1409
0
                     mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, frame_type) == false)) {
1410
0
                    cand                              = &cand_array[cand_idx];
1411
0
                    cand->block_mi.mode               = NEARMV;
1412
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1413
0
                    cand->skip_mode_allowed           = false;
1414
0
                    cand->drl_index                   = drli;
1415
0
                    cand->block_mi.use_intrabc        = 0;
1416
0
                    cand->block_mi.is_interintra_used = 0;
1417
0
                    cand->block_mi.ref_frame[0]       = rf[0];
1418
0
                    cand->block_mi.ref_frame[1]       = rf[1];
1419
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
1420
0
                    cand->block_mi.num_proj_ref       = ctx->wm_sample_info[frame_type].num;
1421
0
                    INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1422
1423
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
1424
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = frame_type;
1425
0
                    ++ctx->injected_mv_count;
1426
0
                }
1427
0
            }
1428
0
        } else if (allow_bipred) {
1429
            //NEAREST_NEAREST
1430
            // Don't check if MV is already injected b/c NEAREST_NEAREST is the first bipred INTER candidate injected
1431
0
            Mv         to_inj_mv0   = {.as_int = ctx->ref_mv_stack[ref_pair][0].this_mv.as_int};
1432
0
            Mv         to_inj_mv1   = {.as_int = ctx->ref_mv_stack[ref_pair][0].comp_mv.as_int};
1433
0
            const bool is_skip_mode = !svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) &&
1434
0
                frm_hdr->skip_mode_params.skip_mode_flag && (rf[0] == frm_hdr->skip_mode_params.ref_frame_idx_0) &&
1435
0
                (rf[1] == frm_hdr->skip_mode_params.ref_frame_idx_1);
1436
0
            ModeDecisionCandidate* cand         = &cand_array[cand_idx];
1437
0
            cand->block_mi.mode                 = NEAREST_NEARESTMV;
1438
0
            cand->block_mi.motion_mode          = SIMPLE_TRANSLATION;
1439
0
            cand->skip_mode_allowed             = is_skip_mode;
1440
0
            cand->block_mi.mv[0].as_int         = to_inj_mv0.as_int;
1441
0
            cand->block_mi.mv[1].as_int         = to_inj_mv1.as_int;
1442
0
            cand->drl_index                     = 0;
1443
0
            cand->block_mi.use_intrabc          = 0;
1444
0
            cand->block_mi.is_interintra_used   = 0;
1445
0
            cand->block_mi.ref_frame[0]         = rf[0];
1446
0
            cand->block_mi.ref_frame[1]         = rf[1];
1447
0
            cand->block_mi.comp_group_idx       = 0;
1448
0
            cand->block_mi.compound_idx         = 1;
1449
0
            cand->block_mi.interinter_comp.type = COMPOUND_AVERAGE;
1450
1451
0
            INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1452
1453
0
            ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1454
0
            ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1455
0
            ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1456
0
            ++ctx->injected_mv_count;
1457
1458
            //NEAR_NEAR
1459
0
            const uint8_t max_drl_index     = svt_aom_get_max_drl_index(xd->ref_mv_count[ref_pair], NEAR_NEARMV);
1460
0
            uint8_t       cap_max_drl_index = 0;
1461
0
            if (ctx->cand_reduction_ctrls.near_count_ctrls.enabled) {
1462
0
                cap_max_drl_index = MIN(ctx->cand_reduction_ctrls.near_count_ctrls.near_near_count, max_drl_index);
1463
0
            }
1464
0
            for (uint8_t drli = 0; drli < cap_max_drl_index; drli++) {
1465
0
                to_inj_mv0.as_int = ctx->ref_mv_stack[ref_pair][1 + drli].this_mv.as_int;
1466
0
                to_inj_mv1.as_int = ctx->ref_mv_stack[ref_pair][1 + drli].comp_mv.as_int;
1467
0
                if ((ctx->injected_mv_count == 0 ||
1468
0
                     mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair) == false)) {
1469
0
                    cand                                = &cand_array[cand_idx];
1470
0
                    cand->block_mi.mode                 = NEAR_NEARMV;
1471
0
                    cand->block_mi.motion_mode          = SIMPLE_TRANSLATION;
1472
0
                    cand->skip_mode_allowed             = false;
1473
0
                    cand->block_mi.use_intrabc          = 0;
1474
0
                    cand->block_mi.is_interintra_used   = 0;
1475
0
                    cand->block_mi.mv[0].as_int         = to_inj_mv0.as_int;
1476
0
                    cand->block_mi.mv[1].as_int         = to_inj_mv1.as_int;
1477
0
                    cand->drl_index                     = drli;
1478
0
                    cand->block_mi.ref_frame[0]         = rf[0];
1479
0
                    cand->block_mi.ref_frame[1]         = rf[1];
1480
0
                    cand->block_mi.comp_group_idx       = 0;
1481
0
                    cand->block_mi.compound_idx         = 1;
1482
0
                    cand->block_mi.interinter_comp.type = COMPOUND_AVERAGE;
1483
1484
0
                    INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1485
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1486
0
                    ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1487
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1488
0
                    ++ctx->injected_mv_count;
1489
0
                }
1490
0
            }
1491
0
        }
1492
0
    }
1493
    //update tot Candidate count
1494
0
    *candTotCnt = cand_idx;
1495
0
}
1496
1497
/*********************************************************************
1498
**********************************************************************
1499
        Upto 12 inter Candidated injected
1500
        Min 6 inter Candidated injected
1501
UniPred L0 : NEARST         + upto 3x NEAR
1502
UniPred L1 : NEARST         + upto 3x NEAR
1503
BIPred     : NEARST_NEARST  + upto 3x NEAR_NEAR
1504
**********************************************************************
1505
**********************************************************************/
1506
static void inject_mvp_candidates_ii(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* cand_total_cnt,
1507
0
                                     const bool allow_bipred) {
1508
0
    BlkStruct*             blk_ptr    = ctx->blk_ptr;
1509
0
    FrameHeader*           frm_hdr    = &pcs->ppcs->frm_hdr;
1510
0
    uint32_t               cand_idx   = *cand_total_cnt;
1511
0
    ModeDecisionCandidate* cand_array = ctx->fast_cand_array;
1512
0
    MacroBlockD*           xd         = blk_ptr->av1xd;
1513
0
    Mv                     nearestmv[2], nearmv[2], ref_mv[2];
1514
1515
    //all of ref pairs: (1)single-ref List0  (2)single-ref List1  (3)compound Bi-Dir List0-List1  (4)compound Uni-Dir List0-List0  (5)compound Uni-Dir List1-List1
1516
0
    for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) {
1517
0
        MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it];
1518
0
        MvReferenceFrame rf[2];
1519
0
        av1_set_ref_frame(rf, ref_pair);
1520
        //single ref/list
1521
0
        if (rf[1] == NONE_FRAME) {
1522
0
            MvReferenceFrame frame_type = rf[0];
1523
0
            uint8_t          list_idx   = get_list_idx(rf[0]);
1524
0
            uint8_t          ref_idx    = get_ref_frame_idx(rf[0]);
1525
            // Always consider the 2 closet ref frames (i.e. ref_idx=0) @ MVP cand generation
1526
0
            if (!svt_aom_is_valid_unipred_ref(ctx, MIN(TOT_INTER_GROUP - 1, NRST_NEAR_GROUP), list_idx, ref_idx)) {
1527
0
                continue;
1528
0
            }
1529
            //NEAREST
1530
0
            Mv to_inj_mv = {.as_int = ctx->ref_mv_stack[frame_type][0].this_mv.as_int};
1531
0
            if ((ctx->injected_mv_count == 0 ||
1532
0
                 mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, frame_type) == false)) {
1533
0
                assert(list_idx == 0 || list_idx == 1);
1534
0
                ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1535
0
                cand->block_mi.mode               = NEARESTMV;
1536
0
                cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1537
0
                cand->block_mi.use_intrabc        = 0;
1538
0
                cand->skip_mode_allowed           = false;
1539
0
                cand->drl_index                   = 0;
1540
0
                cand->block_mi.ref_frame[0]       = rf[0];
1541
0
                cand->block_mi.ref_frame[1]       = rf[1];
1542
0
                cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
1543
0
                cand->block_mi.is_interintra_used = 0;
1544
0
                cand->block_mi.num_proj_ref       = ctx->wm_sample_info[frame_type].num;
1545
0
                INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1546
1547
0
                const bool enable_ii   = true;
1548
0
                const bool enable_obmc = true;
1549
0
                const bool enable_warp = ctx->wm_ctrls.use_wm_for_mvp ? true : false;
1550
0
                inj_non_simple_modes(pcs, ctx, &cand_idx, enable_ii, enable_warp, enable_obmc);
1551
0
                ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
1552
0
                ctx->injected_ref_types[ctx->injected_mv_count]     = frame_type;
1553
0
                ++ctx->injected_mv_count;
1554
0
            }
1555
1556
            //NEAR
1557
0
            const uint8_t max_drl_index     = svt_aom_get_max_drl_index(xd->ref_mv_count[frame_type], NEARMV);
1558
0
            uint8_t       cap_max_drl_index = 0;
1559
0
            if (ctx->cand_reduction_ctrls.near_count_ctrls.enabled) {
1560
0
                cap_max_drl_index = MIN(ctx->cand_reduction_ctrls.near_count_ctrls.near_count, max_drl_index);
1561
0
            }
1562
0
            for (uint8_t drli = 0; drli < cap_max_drl_index; drli++) {
1563
0
                svt_aom_get_av1_mv_pred_drl(ctx, blk_ptr, frame_type, 0, NEARMV, drli, nearestmv, nearmv, ref_mv);
1564
1565
0
                to_inj_mv.as_int = nearmv[0].as_int;
1566
0
                if ((ctx->injected_mv_count == 0 ||
1567
0
                     mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, frame_type) == false)) {
1568
0
                    assert(list_idx == 0 || list_idx == 1);
1569
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1570
0
                    cand->block_mi.mode               = NEARMV;
1571
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1572
0
                    cand->block_mi.use_intrabc        = 0;
1573
0
                    cand->skip_mode_allowed           = false;
1574
0
                    cand->drl_index                   = drli;
1575
0
                    cand->block_mi.ref_frame[0]       = rf[0];
1576
0
                    cand->block_mi.ref_frame[1]       = rf[1];
1577
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
1578
0
                    cand->block_mi.is_interintra_used = 0;
1579
0
                    cand->block_mi.num_proj_ref       = ctx->wm_sample_info[frame_type].num;
1580
0
                    INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1581
1582
0
                    const bool enable_ii   = true;
1583
0
                    const bool enable_obmc = true;
1584
0
                    const bool enable_warp = ctx->wm_ctrls.use_wm_for_mvp ? true : false;
1585
0
                    inj_non_simple_modes(pcs, ctx, &cand_idx, enable_ii, enable_warp, enable_obmc);
1586
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
1587
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = frame_type;
1588
0
                    ++ctx->injected_mv_count;
1589
0
                }
1590
0
            }
1591
0
        } else if (allow_bipred) {
1592
0
            const uint8_t ref_idx_0 = get_ref_frame_idx(rf[0]);
1593
0
            const uint8_t ref_idx_1 = get_ref_frame_idx(rf[1]);
1594
1595
0
            const uint8_t list_idx_0 = get_list_idx(rf[0]);
1596
0
            const uint8_t list_idx_1 = get_list_idx(rf[1]);
1597
1598
0
            ctx->cmp_store.pred0_cnt = 0;
1599
0
            ctx->cmp_store.pred1_cnt = 0;
1600
1601
            // Always consider the 2 closet ref frames (i.e. ref_idx=0) @ MVP cand generation
1602
0
            if (!is_valid_bipred_ref(ctx, NRST_NEAR_GROUP, list_idx_0, ref_idx_0, list_idx_1, ref_idx_1)) {
1603
0
                continue;
1604
0
            }
1605
1606
            //NEAREST_NEAREST
1607
0
            Mv to_inj_mv0 = {.as_int = ctx->ref_mv_stack[ref_pair][0].this_mv.as_int};
1608
0
            Mv to_inj_mv1 = {.as_int = ctx->ref_mv_stack[ref_pair][0].comp_mv.as_int};
1609
0
            if ((ctx->injected_mv_count == 0 ||
1610
0
                 mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair) == false)) {
1611
0
                const bool is_skip_mode = !svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) &&
1612
0
                    frm_hdr->skip_mode_params.skip_mode_flag && (rf[0] == frm_hdr->skip_mode_params.ref_frame_idx_0) &&
1613
0
                    (rf[1] == frm_hdr->skip_mode_params.ref_frame_idx_1);
1614
0
                ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1615
0
                cand->block_mi.mode               = NEAREST_NEARESTMV;
1616
0
                cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1617
0
                cand->block_mi.is_interintra_used = 0;
1618
0
                cand->block_mi.use_intrabc        = 0;
1619
0
                cand->skip_mode_allowed           = /*cur_type == MD_COMP_AVG &&*/ is_skip_mode ? true : false;
1620
0
                cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1621
0
                cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1622
0
                cand->drl_index                   = 0;
1623
0
                cand->block_mi.ref_frame[0]       = rf[0];
1624
0
                cand->block_mi.ref_frame[1]       = rf[1];
1625
0
                determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1626
0
                INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1627
1628
0
                if (ctx->inter_comp_ctrls.do_nearest_nearest) {
1629
                    // Don't reset ctx->cmp_store.pred0_cnt for MVP
1630
0
                    inj_comp_modes(pcs, ctx, &cand_idx);
1631
0
                }
1632
0
                ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1633
0
                ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1634
0
                ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1635
0
                ++ctx->injected_mv_count;
1636
0
            }
1637
1638
            //NEAR_NEAR
1639
0
            const uint8_t max_drl_index     = svt_aom_get_max_drl_index(xd->ref_mv_count[ref_pair], NEAR_NEARMV);
1640
0
            uint8_t       cap_max_drl_index = 0;
1641
0
            if (ctx->cand_reduction_ctrls.near_count_ctrls.enabled) {
1642
0
                cap_max_drl_index = MIN(ctx->cand_reduction_ctrls.near_count_ctrls.near_near_count, max_drl_index);
1643
0
            }
1644
0
            for (uint8_t drli = 0; drli < cap_max_drl_index; drli++) {
1645
0
                svt_aom_get_av1_mv_pred_drl(ctx, blk_ptr, ref_pair, 1, NEAR_NEARMV, drli, nearestmv, nearmv, ref_mv);
1646
1647
0
                to_inj_mv0.as_int = nearmv[0].as_int;
1648
0
                to_inj_mv1.as_int = nearmv[1].as_int;
1649
0
                if ((ctx->injected_mv_count == 0 ||
1650
0
                     mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair) == false)) {
1651
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1652
0
                    cand->block_mi.mode               = NEAR_NEARMV;
1653
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1654
0
                    cand->block_mi.is_interintra_used = 0;
1655
0
                    cand->block_mi.use_intrabc        = 0;
1656
0
                    cand->skip_mode_allowed           = false;
1657
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1658
0
                    cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1659
0
                    cand->drl_index                   = drli;
1660
0
                    cand->block_mi.ref_frame[0]       = rf[0];
1661
0
                    cand->block_mi.ref_frame[1]       = rf[1];
1662
0
                    determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1663
0
                    INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1664
1665
0
                    if (ctx->inter_comp_ctrls.do_near_near) {
1666
                        // Don't reset ctx->cmp_store.pred0_cnt for MVP
1667
0
                        inj_comp_modes(pcs, ctx, &cand_idx);
1668
0
                    }
1669
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1670
0
                    ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1671
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1672
0
                    ++ctx->injected_mv_count;
1673
0
                }
1674
0
            }
1675
0
        }
1676
0
    }
1677
    //update tot Candidate count
1678
0
    *cand_total_cnt = cand_idx;
1679
0
}
1680
1681
static void inject_new_nearest_new_comb_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx,
1682
0
                                                   uint32_t* cand_tot_cnt) {
1683
0
    uint32_t               cand_idx   = *cand_tot_cnt;
1684
0
    ModeDecisionCandidate* cand_array = ctx->fast_cand_array;
1685
0
    MacroBlockD*           xd         = ctx->blk_ptr->av1xd;
1686
0
    Mv                     nearestmv[2], nearmv[2], ref_mv[2];
1687
1688
    //all of ref pairs: (1)single-ref List0  (2)single-ref List1  (3)compound Bi-Dir List0-List1  (4)compound Uni-Dir List0-List0  (5)compound Uni-Dir List1-List1
1689
0
    for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) {
1690
0
        MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it];
1691
0
        MvReferenceFrame rf[2];
1692
0
        av1_set_ref_frame(rf, ref_pair);
1693
0
        if (rf[1] != NONE_FRAME) {
1694
0
            const uint8_t ref_idx_0  = get_ref_frame_idx(rf[0]);
1695
0
            const uint8_t ref_idx_1  = get_ref_frame_idx(rf[1]);
1696
0
            const uint8_t list_idx_0 = get_list_idx(rf[0]);
1697
0
            const uint8_t list_idx_1 = get_list_idx(rf[1]);
1698
0
            if (!svt_aom_is_valid_unipred_ref(
1699
0
                    ctx, MIN(TOT_INTER_GROUP - 1, NRST_NEW_NEAR_GROUP), list_idx_0, ref_idx_0) ||
1700
0
                !svt_aom_is_valid_unipred_ref(
1701
0
                    ctx, MIN(TOT_INTER_GROUP - 1, NRST_NEW_NEAR_GROUP), list_idx_1, ref_idx_1)) {
1702
0
                continue;
1703
0
            }
1704
1705
0
            {
1706
                //NEAREST_NEWMV
1707
0
                const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr];
1708
0
                Mv                 to_inj_mv0 = {.as_int = ctx->ref_mv_stack[ref_pair][0].this_mv.as_int};
1709
0
                Mv                 to_inj_mv1 = ctx->sb_me_mv[list_idx_1][ref_idx_1];
1710
0
                bool               inj_mv =
1711
0
                    (ctx->injected_mv_count == 0 || !mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair)) &&
1712
0
                    svt_aom_is_me_data_present(
1713
0
                        ctx->me_block_offset, ctx->me_cand_offset, me_results, get_list_idx(rf[1]), ref_idx_1);
1714
0
                if (inj_mv) {
1715
0
                    svt_aom_get_av1_mv_pred_drl(ctx,
1716
0
                                                ctx->blk_ptr,
1717
0
                                                ref_pair,
1718
0
                                                1, // is_compound
1719
0
                                                NEAREST_NEWMV,
1720
0
                                                0, //not needed drli,
1721
0
                                                nearestmv,
1722
0
                                                nearmv,
1723
0
                                                ref_mv);
1724
1725
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1726
0
                    cand->block_mi.mode               = NEAREST_NEWMV;
1727
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1728
0
                    cand->block_mi.is_interintra_used = 0;
1729
0
                    cand->block_mi.use_intrabc        = 0;
1730
0
                    cand->skip_mode_allowed           = false;
1731
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1732
0
                    cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1733
0
                    cand->drl_index                   = 0;
1734
0
                    cand->block_mi.ref_frame[0]       = rf[0];
1735
0
                    cand->block_mi.ref_frame[1]       = rf[1];
1736
0
                    cand->pred_mv[1].as_int           = ref_mv[1].as_int;
1737
0
                    determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1738
0
                    INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1739
1740
0
                    if (ctx->inter_comp_ctrls.do_nearest_near_new) {
1741
0
                        ctx->cmp_store.pred0_cnt = 0;
1742
0
                        ctx->cmp_store.pred1_cnt = 0;
1743
0
                        inj_comp_modes(pcs, ctx, &cand_idx);
1744
0
                    }
1745
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1746
0
                    ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1747
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1748
0
                    ++ctx->injected_mv_count;
1749
0
                }
1750
0
            }
1751
1752
0
            {
1753
                //NEW_NEARESTMV
1754
0
                const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr];
1755
0
                Mv                 to_inj_mv0 = ctx->sb_me_mv[list_idx_0][ref_idx_0];
1756
0
                Mv                 to_inj_mv1 = {.as_int = ctx->ref_mv_stack[ref_pair][0].comp_mv.as_int};
1757
0
                bool               inj_mv     = (ctx->injected_mv_count == 0 ||
1758
0
                               !mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair)) &&
1759
0
                    svt_aom_is_me_data_present(ctx->me_block_offset, ctx->me_cand_offset, me_results, 0, ref_idx_0);
1760
0
                if (inj_mv) {
1761
0
                    svt_aom_get_av1_mv_pred_drl(ctx,
1762
0
                                                ctx->blk_ptr,
1763
0
                                                ref_pair,
1764
0
                                                1, // is_compound
1765
0
                                                NEW_NEARESTMV,
1766
0
                                                0, //not needed drli,
1767
0
                                                nearestmv,
1768
0
                                                nearmv,
1769
0
                                                ref_mv);
1770
1771
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1772
0
                    cand->block_mi.mode               = NEW_NEARESTMV;
1773
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1774
0
                    cand->block_mi.is_interintra_used = 0;
1775
0
                    cand->block_mi.use_intrabc        = 0;
1776
0
                    cand->skip_mode_allowed           = false;
1777
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1778
0
                    cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1779
0
                    cand->drl_index                   = 0;
1780
0
                    cand->block_mi.ref_frame[0]       = rf[0];
1781
0
                    cand->block_mi.ref_frame[1]       = rf[1];
1782
0
                    cand->pred_mv[0].as_int           = ref_mv[0].as_int;
1783
0
                    determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1784
0
                    INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1785
1786
0
                    if (ctx->inter_comp_ctrls.do_nearest_near_new) {
1787
0
                        ctx->cmp_store.pred0_cnt = 0;
1788
0
                        ctx->cmp_store.pred1_cnt = 0;
1789
0
                        inj_comp_modes(pcs, ctx, &cand_idx);
1790
0
                    }
1791
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1792
0
                    ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1793
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1794
0
                    ++ctx->injected_mv_count;
1795
0
                }
1796
0
            }
1797
            // For level 2, only inject NEAREST_NEW/NEW_NEAREST candidates
1798
0
            if (ctx->new_nearest_near_comb_injection >= 2) {
1799
0
                continue;
1800
0
            }
1801
1802
            //NEW_NEARMV
1803
0
            {
1804
0
                const uint8_t max_drl_index = svt_aom_get_max_drl_index(xd->ref_mv_count[ref_pair], NEW_NEARMV);
1805
1806
0
                for (uint8_t drli = 0; drli < max_drl_index; drli++) {
1807
0
                    svt_aom_get_av1_mv_pred_drl(
1808
0
                        ctx, ctx->blk_ptr, ref_pair, 1, NEW_NEARMV, drli, nearestmv, nearmv, ref_mv);
1809
1810
                    //NEW_NEARMV
1811
0
                    const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr];
1812
0
                    Mv                 to_inj_mv0 = ctx->sb_me_mv[list_idx_0][ref_idx_0];
1813
0
                    Mv                 to_inj_mv1 = {.as_int = nearmv[1].as_int};
1814
0
                    bool               inj_mv     = (ctx->injected_mv_count == 0 ||
1815
0
                                   !mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair)) &&
1816
0
                        svt_aom_is_me_data_present(ctx->me_block_offset, ctx->me_cand_offset, me_results, 0, ref_idx_0);
1817
0
                    if (inj_mv) {
1818
0
                        ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1819
0
                        cand->block_mi.mode               = NEW_NEARMV;
1820
0
                        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1821
0
                        cand->block_mi.is_interintra_used = 0;
1822
0
                        cand->block_mi.use_intrabc        = 0;
1823
0
                        cand->skip_mode_allowed           = false;
1824
0
                        cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1825
0
                        cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1826
0
                        cand->drl_index                   = drli;
1827
0
                        cand->block_mi.ref_frame[0]       = rf[0];
1828
0
                        cand->block_mi.ref_frame[1]       = rf[1];
1829
0
                        cand->pred_mv[0].as_int           = ref_mv[0].as_int;
1830
0
                        determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1831
0
                        INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1832
1833
0
                        if (ctx->inter_comp_ctrls.do_nearest_near_new) {
1834
0
                            ctx->cmp_store.pred0_cnt = 0;
1835
0
                            ctx->cmp_store.pred1_cnt = 0;
1836
0
                            inj_comp_modes(pcs, ctx, &cand_idx);
1837
0
                        }
1838
0
                        ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1839
0
                        ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1840
0
                        ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1841
0
                        ++ctx->injected_mv_count;
1842
0
                    }
1843
0
                }
1844
0
            }
1845
            //NEAR_NEWMV
1846
0
            {
1847
0
                uint8_t max_drl_index = svt_aom_get_max_drl_index(xd->ref_mv_count[ref_pair], NEAR_NEWMV);
1848
1849
0
                for (uint8_t drli = 0; drli < max_drl_index; drli++) {
1850
0
                    svt_aom_get_av1_mv_pred_drl(
1851
0
                        ctx, ctx->blk_ptr, ref_pair, 1, NEAR_NEWMV, drli, nearestmv, nearmv, ref_mv);
1852
1853
                    //NEAR_NEWMV
1854
0
                    const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr];
1855
0
                    Mv                 to_inj_mv0 = {.as_int = nearmv[0].as_int};
1856
0
                    Mv                 to_inj_mv1 = ctx->sb_me_mv[list_idx_1][ref_idx_1];
1857
0
                    bool               inj_mv     = (ctx->injected_mv_count == 0 ||
1858
0
                                   !mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair)) &&
1859
0
                        svt_aom_is_me_data_present(
1860
0
                                      ctx->me_block_offset, ctx->me_cand_offset, me_results, list_idx_1, ref_idx_1);
1861
1862
0
                    if (inj_mv) {
1863
0
                        ModeDecisionCandidate* cand       = &cand_array[cand_idx];
1864
0
                        cand->block_mi.mode               = NEAR_NEWMV;
1865
0
                        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
1866
0
                        cand->block_mi.is_interintra_used = 0;
1867
0
                        cand->block_mi.use_intrabc        = 0;
1868
0
                        cand->skip_mode_allowed           = false;
1869
0
                        cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
1870
0
                        cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
1871
0
                        cand->drl_index                   = drli;
1872
0
                        cand->block_mi.ref_frame[0]       = rf[0];
1873
0
                        cand->block_mi.ref_frame[1]       = rf[1];
1874
0
                        cand->pred_mv[1].as_int           = ref_mv[1].as_int;
1875
0
                        determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
1876
0
                        INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count);
1877
1878
0
                        if (ctx->inter_comp_ctrls.do_nearest_near_new) {
1879
0
                            ctx->cmp_store.pred0_cnt = 0;
1880
0
                            ctx->cmp_store.pred1_cnt = 0;
1881
0
                            inj_comp_modes(pcs, ctx, &cand_idx);
1882
0
                        }
1883
0
                        ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
1884
0
                        ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
1885
0
                        ctx->injected_ref_types[ctx->injected_mv_count]     = ref_pair;
1886
0
                        ++ctx->injected_mv_count;
1887
0
                    }
1888
0
                }
1889
0
            }
1890
0
        }
1891
0
    }
1892
    //update tot Candidate count
1893
0
    *cand_tot_cnt = cand_idx;
1894
0
}
1895
1896
// Refine the WM MV (8 bit search).  Return true if search found a valid MV; false otherwise
1897
uint8_t svt_aom_wm_motion_refinement(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand,
1898
0
                                     const bool shut_approx) {
1899
0
    PictureParentControlSet* ppcs         = pcs->ppcs;
1900
0
    const Mv                 neighbors[9] = {
1901
0
        {{0, 0}}, {{-1, 0}}, {{0, 1}}, {{1, 0}}, {{0, -1}}, {{1, -1}}, {{1, 1}}, {{-1, 1}}, {{-1, -1}}};
1902
1903
    // Set info used to get MV cost
1904
0
    int*        mvjcost       = ctx->md_rate_est_ctx->nmv_vec_cost;
1905
0
    const int** mvcost        = ctx->md_rate_est_ctx->nmvcoststack;
1906
0
    uint32_t    full_lambda   = ctx->full_lambda_md[EB_8_BIT_MD]; // 8bit only
1907
0
    int         error_per_bit = full_lambda >> RD_EPB_SHIFT;
1908
0
    error_per_bit += (error_per_bit == 0);
1909
0
    EbPictureBufferDesc*    input_pic          = ppcs->enhanced_pic; // 10BIT not supported
1910
0
    uint32_t                input_origin_index = (ctx->blk_org_y) * input_pic->y_stride + (ctx->blk_org_x);
1911
0
    const AomVarianceFnPtr* fn_ptr             = &svt_aom_mefn_ptr[ctx->blk_geom->bsize];
1912
0
    unsigned int            sse;
1913
0
    uint8_t*                src_y = input_pic->y_buffer + input_origin_index;
1914
1915
0
    int mv_prec_shift = ppcs->frm_hdr.allow_high_precision_mv ? 0 : 1;
1916
0
    int best_cost     = INT_MAX;
1917
    // local WM always uses one ref - MV for ref0 stored in idx0
1918
0
    assert(cand->block_mi.ref_frame[1] == NONE_FRAME);
1919
0
    Mv       search_centre_mv = {.as_int = cand->block_mi.mv[0].as_int};
1920
0
    Mv       best_mv          = {.as_int = cand->block_mi.mv[0].as_int};
1921
0
    Mv       prev_mv          = {.as_int = cand->block_mi.mv[0].as_int};
1922
0
    const Mv ref_mv           = {.as_int = cand->pred_mv[0].as_int};
1923
1924
0
    int      max_iterations  = ctx->wm_ctrls.refinement_iterations;
1925
0
    int      tot_checked_pos = 0;
1926
0
    uint32_t mv_record[256];
1927
0
    for (int iter = 0; iter < max_iterations; iter++) {
1928
        // search the (0,0) offset position only for the first search iteration
1929
0
        for (int i = (iter ? 1 : 0); i < (ctx->wm_ctrls.refine_diag ? 9 : 5); i++) {
1930
0
            const Mv test_mv = (Mv){{search_centre_mv.x + (neighbors[i].x << mv_prec_shift),
1931
0
                                     search_centre_mv.y + (neighbors[i].y << mv_prec_shift)}};
1932
1933
            // Don't re-test previously tested positions
1934
0
            if (iter) {
1935
0
                if (prev_mv.as_int == test_mv.as_int) {
1936
0
                    continue;
1937
0
                }
1938
0
                int match_found = 0;
1939
0
                for (int j = 0; j < tot_checked_pos; j++) {
1940
0
                    if (test_mv.as_int == mv_record[j]) {
1941
0
                        match_found = 1;
1942
0
                    }
1943
0
                }
1944
0
                if (match_found) {
1945
0
                    continue;
1946
0
                }
1947
0
            }
1948
0
            mv_record[tot_checked_pos++] = test_mv.as_int;
1949
0
            uint8_t local_warp_valid     = svt_aom_warped_motion_parameters(ctx,
1950
0
                                                                        test_mv,
1951
0
                                                                        ctx->blk_geom,
1952
0
                                                                        cand->block_mi.ref_frame[0],
1953
0
                                                                        &cand->wm_params_l0,
1954
0
                                                                        &cand->block_mi.num_proj_ref,
1955
0
                                                                        ctx->wm_ctrls.lower_band_th,
1956
0
                                                                        ctx->wm_ctrls.upper_band_th,
1957
0
                                                                        shut_approx);
1958
0
            if (!local_warp_valid) {
1959
0
                continue;
1960
0
            }
1961
0
            assert(cand->block_mi.ref_frame[1] == NONE_FRAME);
1962
0
            EbPictureBufferDesc* ref_pic_0 = svt_aom_get_ref_pic_buffer(pcs, cand->block_mi.ref_frame[0]);
1963
0
            EbPictureBufferDesc* ref_pic_1 = NULL; // will stay NULL b/c this is unipred candidate
1964
1965
            // update MV to be testing MV before calling prediction function
1966
0
            cand->block_mi.mv[0].as_int = test_mv.as_int;
1967
0
            svt_aom_inter_prediction(pcs->scs,
1968
0
                                     pcs,
1969
0
                                     &cand->block_mi,
1970
0
                                     &cand->wm_params_l0,
1971
0
                                     &cand->wm_params_l1,
1972
0
                                     ctx->blk_ptr,
1973
0
                                     ctx->blk_geom->bsize,
1974
0
                                     ctx->shape,
1975
                                     // If using 8bit MD for HBD content, can't use pre-computed OBMC/II to
1976
                                     // generate conformant recon
1977
0
                                     true, //use_precomputed_obmc - not used here
1978
0
                                     true, //use_precomputed_ii - not used here
1979
0
                                     ctx,
1980
0
                                     ctx->recon_neigh_y,
1981
0
                                     ctx->recon_neigh_cb,
1982
0
                                     ctx->recon_neigh_cr,
1983
0
                                     ref_pic_0,
1984
0
                                     ref_pic_1, // this is NULL
1985
0
                                     ctx->blk_org_x,
1986
0
                                     ctx->blk_org_y,
1987
0
                                     ctx->scratch_prediction_ptr,
1988
0
                                     0,
1989
0
                                     0,
1990
0
                                     PICTURE_BUFFER_DESC_LUMA_MASK,
1991
0
                                     EB_EIGHT_BIT,
1992
0
                                     0); // is_16bit_pipeline
1993
1994
0
            int var = fn_ptr->vf(ctx->scratch_prediction_ptr->y_buffer,
1995
0
                                 ctx->scratch_prediction_ptr->y_stride,
1996
0
                                 src_y,
1997
0
                                 input_pic->y_stride,
1998
0
                                 &sse);
1999
0
            if (ctx->approx_inter_rate) {
2000
0
                var += svt_aom_mv_err_cost_light(&test_mv, &ref_mv);
2001
0
            } else {
2002
0
                var += svt_aom_mv_err_cost(&test_mv, &ref_mv, mvjcost, mvcost, error_per_bit);
2003
0
            }
2004
2005
0
            if (var < best_cost) {
2006
0
                best_mv.as_int = test_mv.as_int;
2007
0
                best_cost      = var;
2008
0
            }
2009
0
        }
2010
0
        prev_mv.as_int          = search_centre_mv.as_int;
2011
0
        search_centre_mv.as_int = best_mv.as_int;
2012
0
        if (prev_mv.as_int == best_mv.as_int) {
2013
0
            break;
2014
0
        }
2015
0
    }
2016
0
    cand->block_mi.mv[0].as_int = best_mv.as_int;
2017
2018
    // Derive pred MV for best WM position
2019
0
    Mv best_pred_mv[2] = {{{0}}, {{0}}};
2020
0
    svt_aom_choose_best_av1_mv_pred(ctx,
2021
0
                                    cand->block_mi.ref_frame[0], // WM only allowed for unipred cands
2022
0
                                    cand->block_mi.mode,
2023
0
                                    cand->block_mi.mv[0],
2024
0
                                    (Mv){{0}},
2025
0
                                    &cand->drl_index,
2026
0
                                    best_pred_mv);
2027
0
    cand->pred_mv[0].as_int = best_pred_mv[0].as_int;
2028
2029
    // Check that final chosen MV is valid
2030
0
    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, best_mv, best_mv, 0)) {
2031
0
        return 1;
2032
0
    }
2033
2034
0
    return 0;
2035
0
}
2036
2037
static INLINE void setup_pred_plane(Buf2D* dst, BlockSize bsize, uint8_t* src, int width, int height, int stride,
2038
0
                                    int mi_row, int mi_col, int subsampling_x, int subsampling_y) {
2039
    // Offset the buffer pointer
2040
0
    if (subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1)) {
2041
0
        mi_row -= 1;
2042
0
    }
2043
0
    if (subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1)) {
2044
0
        mi_col -= 1;
2045
0
    }
2046
2047
0
    const int x = (MI_SIZE * mi_col) >> subsampling_x;
2048
0
    const int y = (MI_SIZE * mi_row) >> subsampling_y;
2049
0
    dst->buf    = src + (y * stride + x); // scaled_buffer_offset(x, y, stride, scale);
2050
0
    dst->buf0   = src;
2051
0
    dst->width  = width;
2052
0
    dst->height = height;
2053
0
    dst->stride = stride;
2054
0
}
2055
2056
void svt_av1_setup_pred_block(BlockSize bsize, Buf2D dst[MAX_PLANES], const Yv12BufferConfig* src, int mi_row,
2057
0
                              int mi_col) {
2058
0
    dst[0].buf    = src->y_buffer;
2059
0
    dst[0].stride = src->y_stride;
2060
0
    dst[1].buf    = src->u_buffer;
2061
0
    dst[2].buf    = src->v_buffer;
2062
0
    dst[1].stride = dst[2].stride = src->uv_stride;
2063
2064
0
    setup_pred_plane(
2065
0
        dst, bsize, dst[0].buf, src->y_crop_width, src->y_crop_height, dst[0].stride, mi_row, mi_col, 0, 0);
2066
0
}
2067
2068
static int sad_per_bit_lut_8[QINDEX_RANGE];
2069
static int sad_per_bit_lut_10[QINDEX_RANGE];
2070
2071
// Get the sad per bit for the relevant qindex and bit depth
2072
0
int svt_aom_get_sad_per_bit(int qidx, EbBitDepth is_hbd) {
2073
0
    return is_hbd ? sad_per_bit_lut_10[qidx] : sad_per_bit_lut_8[qidx];
2074
0
}
2075
2076
2
static void init_me_luts_bd(int* bit16lut, int range, EbBitDepth bit_depth) {
2077
2
    int i;
2078
    // Initialize the sad lut tables using a formulaic calculation for now.
2079
    // This is to make it easier to resolve the impact of experimental changes
2080
    // to the quantizer tables.
2081
514
    for (i = 0; i < range; i++) {
2082
512
        const double q = svt_av1_convert_qindex_to_q(i, bit_depth);
2083
512
        bit16lut[i]    = (int)(0.0418 * q + 2.4107);
2084
512
    }
2085
2
}
2086
2087
1
void svt_av1_init_me_luts(void) {
2088
1
    init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, EB_EIGHT_BIT);
2089
1
    init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, EB_TEN_BIT);
2090
1
}
2091
2092
#if CONFIG_ENABLE_OBMC
2093
static void single_motion_search(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand,
2094
                                 Mv best_pred_mv, IntraBcContext* x, BlockSize bsize, Mv* ref_mv, int* rate_mv,
2095
0
                                 int refine_level) {
2096
0
    bool do_full_refine = 0;
2097
0
    bool do_frac_refine = 0;
2098
0
    switch (refine_level) {
2099
0
    case 0:
2100
0
    case 1:
2101
0
    case 3:
2102
0
        do_full_refine = 1;
2103
0
        do_frac_refine = 1;
2104
0
        break;
2105
0
    case 2:
2106
0
    case 4:
2107
0
        do_full_refine = 0;
2108
0
        do_frac_refine = 1;
2109
0
        break;
2110
0
    default:
2111
0
        break;
2112
0
    }
2113
0
    const Av1Common* const cm      = pcs->ppcs->av1_cm;
2114
0
    FrameHeader*           frm_hdr = &pcs->ppcs->frm_hdr;
2115
    // single_motion_search supports 8bit path only
2116
0
    uint32_t full_lambda = ctx->full_lambda_md[EB_8_BIT_MD];
2117
2118
0
    x->xd            = ctx->blk_ptr->av1xd;
2119
0
    const int mi_row = -x->xd->mb_to_top_edge / (8 * MI_SIZE);
2120
0
    const int mi_col = -x->xd->mb_to_left_edge / (8 * MI_SIZE);
2121
2122
0
    x->nmv_vec_cost  = ctx->md_rate_est_ctx->nmv_vec_cost;
2123
0
    x->mv_cost_stack = ctx->md_rate_est_ctx->nmvcoststack;
2124
    // Set up limit values for MV components.
2125
    // Mv beyond the range do not produce new/different prediction block.
2126
0
    const int mi_width   = mi_size_wide[bsize];
2127
0
    const int mi_height  = mi_size_high[bsize];
2128
0
    x->mv_limits.row_min = -(((mi_row + mi_height) * MI_SIZE) + AOM_INTERP_EXTEND);
2129
0
    x->mv_limits.col_min = -(((mi_col + mi_width) * MI_SIZE) + AOM_INTERP_EXTEND);
2130
0
    x->mv_limits.row_max = (cm->mi_rows - mi_row) * MI_SIZE + AOM_INTERP_EXTEND;
2131
0
    x->mv_limits.col_max = (cm->mi_cols - mi_col) * MI_SIZE + AOM_INTERP_EXTEND;
2132
    //set search paramters
2133
0
    x->sadperbit16 = svt_aom_get_sad_per_bit(frm_hdr->quantization_params.base_q_idx, 0);
2134
0
    x->errorperbit = full_lambda >> RD_EPB_SHIFT;
2135
0
    x->errorperbit += (x->errorperbit == 0);
2136
0
    if (do_full_refine) {
2137
0
        int      sadpb         = x->sadperbit16;
2138
0
        MvLimits tmp_mv_limits = x->mv_limits;
2139
2140
        // Note: MV limits are modified here. Always restore the original values
2141
        // after full-pixel motion search.
2142
0
        svt_av1_set_mv_search_range(&x->mv_limits, ref_mv);
2143
2144
0
        Mv mvp_full = best_pred_mv; // mbmi->mv[0].as_mv;
2145
2146
        // TODO: should use get_fullmv_from_mv instead of shifting
2147
0
        mvp_full.x >>= 3;
2148
0
        mvp_full.y >>= 3;
2149
2150
0
        x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV; //D
2151
2152
0
        switch (cand->block_mi.motion_mode) {
2153
0
        case OBMC_CAUSAL:
2154
0
            svt_av1_obmc_full_pixel_search(
2155
0
                ctx, x, &mvp_full, sadpb, &svt_aom_mefn_ptr[bsize], ref_mv, &(x->best_mv), 0);
2156
0
            break;
2157
0
        default:
2158
0
            assert(0 && "Invalid motion mode!\n");
2159
0
        }
2160
2161
0
        x->mv_limits = tmp_mv_limits;
2162
0
    } else { // round-up the default
2163
0
        x->best_mv.x = best_pred_mv.x >> 3;
2164
0
        x->best_mv.y = best_pred_mv.y >> 3;
2165
0
    }
2166
2167
0
    if (do_frac_refine) {
2168
0
        int          dis; /* TODO: use dis in distortion calculation later. */
2169
0
        unsigned int sse1; //unused
2170
0
        switch (cand->block_mi.motion_mode) {
2171
0
        case OBMC_CAUSAL:
2172
0
            svt_av1_find_best_obmc_sub_pixel_tree_up(ctx,
2173
0
                                                     x,
2174
0
                                                     cm,
2175
0
                                                     mi_row,
2176
0
                                                     mi_col,
2177
0
                                                     &x->best_mv,
2178
0
                                                     ref_mv,
2179
0
                                                     frm_hdr->allow_high_precision_mv,
2180
0
                                                     x->errorperbit,
2181
0
                                                     &svt_aom_mefn_ptr[bsize],
2182
0
                                                     0, // mv.subpel_force_stop
2183
0
                                                     2, //  mv.subpel_iters_per_step
2184
0
                                                     x->nmv_vec_cost,
2185
0
                                                     x->mv_cost_stack,
2186
0
                                                     &dis,
2187
0
                                                     &sse1,
2188
0
                                                     0,
2189
0
                                                     USE_8_TAPS);
2190
2191
0
            break;
2192
0
        default:
2193
0
            assert(0 && "Invalid motion mode!\n");
2194
0
        }
2195
0
    } else {
2196
0
        x->best_mv.x *= 8;
2197
0
        x->best_mv.y *= 8;
2198
0
    }
2199
0
    if (ctx->approx_inter_rate) {
2200
0
        *rate_mv = svt_av1_mv_bit_cost_light(&x->best_mv, ref_mv);
2201
0
    } else {
2202
0
        *rate_mv = svt_av1_mv_bit_cost(&x->best_mv, ref_mv, x->nmv_vec_cost, x->mv_cost_stack, MV_COST_WEIGHT);
2203
0
    }
2204
0
}
2205
2206
// Refine the OBMC MV (8 bit search). Return true if search found a valid MV; false otherwise
2207
uint8_t svt_aom_obmc_motion_refinement(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand,
2208
0
                                       int refine_level) {
2209
0
    if (block_size_wide[ctx->blk_geom->bsize] > ctx->obmc_ctrls.max_blk_size_to_refine ||
2210
0
        block_size_high[ctx->blk_geom->bsize] > ctx->obmc_ctrls.max_blk_size_to_refine) {
2211
0
        return 1;
2212
0
    }
2213
2214
0
    if (ctx->obmc_weighted_pred_ready == false) {
2215
0
        int mi_row = ctx->blk_org_y >> 2;
2216
0
        int mi_col = ctx->blk_org_x >> 2;
2217
2218
0
        DECLARE_ALIGNED(16, uint8_t, dst_buf1_8b[4 * MAX_PLANES * MAX_SB_SQUARE]);
2219
2220
0
        uint8_t* dst_buf2_8b = dst_buf1_8b + 2 * MAX_PLANES * MAX_SB_SQUARE;
2221
0
        if (ctx->obmc_is_luma_neigh_10bit) {
2222
0
            svt_aom_un_pack2d((uint16_t*)ctx->obmc_buff_0,
2223
0
                              ctx->blk_geom->bwidth,
2224
0
                              dst_buf1_8b,
2225
0
                              ctx->blk_geom->bwidth,
2226
0
                              NULL,
2227
0
                              ctx->blk_geom->bwidth,
2228
0
                              ctx->blk_geom->bwidth,
2229
0
                              ctx->blk_geom->bheight);
2230
2231
0
            svt_aom_un_pack2d((uint16_t*)ctx->obmc_buff_1,
2232
0
                              ctx->blk_geom->bwidth,
2233
0
                              dst_buf2_8b,
2234
0
                              ctx->blk_geom->bwidth,
2235
0
                              NULL,
2236
0
                              ctx->blk_geom->bwidth,
2237
0
                              ctx->blk_geom->bwidth,
2238
0
                              ctx->blk_geom->bheight);
2239
0
        }
2240
2241
0
        calc_target_weighted_pred(pcs,
2242
0
                                  ctx,
2243
0
                                  pcs->ppcs->av1_cm,
2244
0
                                  ctx->blk_ptr->av1xd,
2245
0
                                  mi_row,
2246
0
                                  mi_col,
2247
0
                                  ctx->obmc_is_luma_neigh_10bit ? dst_buf1_8b : ctx->obmc_buff_0,
2248
0
                                  ctx->blk_geom->bwidth,
2249
0
                                  ctx->obmc_is_luma_neigh_10bit ? dst_buf2_8b : ctx->obmc_buff_1,
2250
0
                                  ctx->blk_geom->bwidth);
2251
2252
0
        ctx->obmc_weighted_pred_ready = true;
2253
0
    }
2254
0
    Mv              best_pred_mv[2] = {{{0}}, {{0}}};
2255
0
    IntraBcContext  x_st;
2256
0
    IntraBcContext* x = &x_st;
2257
2258
0
    MacroBlockD* xd;
2259
0
    xd = x->xd       = ctx->blk_ptr->av1xd;
2260
0
    const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
2261
0
    const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
2262
2263
0
    {
2264
0
        assert(cand->block_mi.ref_frame[1] == NONE_FRAME); // OBMC only allowed for unipred cands
2265
0
        uint8_t ref_idx  = get_ref_frame_idx(cand->block_mi.ref_frame[0]);
2266
0
        uint8_t list_idx = get_list_idx(cand->block_mi.ref_frame[0]);
2267
2268
0
        assert(list_idx < MAX_NUM_OF_REF_PIC_LIST);
2269
0
        EbPictureBufferDesc* reference_picture =
2270
0
            ((EbReferenceObject*)pcs->ref_pic_ptr_array[list_idx][ref_idx]->object_ptr)->reference_picture;
2271
2272
0
        svt_aom_use_scaled_rec_refs_if_needed(pcs,
2273
0
                                              pcs->ppcs->enhanced_pic,
2274
0
                                              (EbReferenceObject*)pcs->ref_pic_ptr_array[list_idx][ref_idx]->object_ptr,
2275
0
                                              &reference_picture,
2276
0
                                              EB_8_BIT_MD);
2277
0
        Yv12BufferConfig ref_buf;
2278
0
        svt_aom_link_eb_to_aom_buffer_desc_8bit(reference_picture, &ref_buf);
2279
2280
0
        Buf2D yv12_mb[MAX_PLANES];
2281
0
        svt_av1_setup_pred_block(ctx->blk_geom->bsize, yv12_mb, &ref_buf, mi_row, mi_col);
2282
0
        for (int i = 0; i < 1; ++i) {
2283
0
            x->xdplane[i].pre[0] = yv12_mb[i]; //ref in ME
2284
0
        }
2285
2286
0
        x->plane[0].src.buf  = 0; // x->xdplane[0].pre[0];
2287
0
        x->plane[0].src.buf0 = 0;
2288
0
    }
2289
2290
0
    Mv  best_mv = {.as_int = cand->block_mi.mv[0].as_int};
2291
0
    int tmp_rate_mv;
2292
2293
0
    Mv ref_mv = {.as_int = cand->pred_mv[0].as_int};
2294
2295
0
    single_motion_search(pcs, ctx, cand, best_mv, x, ctx->blk_geom->bsize, &ref_mv, &tmp_rate_mv, refine_level);
2296
0
    cand->block_mi.mv[0].as_int = x->best_mv.as_int;
2297
0
    svt_aom_choose_best_av1_mv_pred(ctx,
2298
0
                                    cand->block_mi.ref_frame[0], // OBMC only allowed for unipred candidtes
2299
0
                                    cand->block_mi.mode,
2300
0
                                    cand->block_mi.mv[0],
2301
0
                                    (Mv){{0}},
2302
0
                                    &cand->drl_index,
2303
0
                                    best_pred_mv);
2304
0
    cand->pred_mv[0].as_int = best_pred_mv[0].as_int;
2305
    // Check that final chosen MV is valid
2306
0
    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, cand->block_mi.mv[0], cand->block_mi.mv[0], 0)) {
2307
0
        return 1;
2308
0
    }
2309
2310
0
    return 0;
2311
0
}
2312
#endif // CONFIG_ENABLE_OBMC
2313
2314
/*
2315
   inject ME candidates for Light PD0
2316
*/
2317
static void inject_new_candidates_light_pd0(PictureControlSet* pcs, ModeDecisionContext* ctx,
2318
0
                                            uint32_t* candidate_total_cnt, const bool allow_bipred) {
2319
0
    const uint32_t         me_sb_addr       = ctx->me_sb_addr;
2320
0
    const uint32_t         me_block_offset  = ctx->me_block_offset;
2321
0
    ModeDecisionCandidate* cand_array       = ctx->fast_cand_array;
2322
0
    uint32_t               cand_total_cnt   = (*candidate_total_cnt);
2323
0
    const MeSbResults*     me_results       = pcs->ppcs->pa_me_data->me_results[me_sb_addr];
2324
0
    const uint8_t          total_me_cnt     = me_results->total_me_candidate_index[me_block_offset];
2325
0
    const MeCandidate*     me_block_results = &me_results->me_candidate_array[ctx->me_cand_offset];
2326
2327
0
    const uint8_t max_refs = pcs->ppcs->pa_me_data->max_refs;
2328
0
    const uint8_t max_l0   = pcs->ppcs->pa_me_data->max_l0;
2329
2330
0
    for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) {
2331
0
        const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index];
2332
0
        const uint8_t      inter_direction      = me_block_results_ptr->direction;
2333
0
        const uint8_t      list0_ref_index      = me_block_results_ptr->ref_idx_l0;
2334
0
        const uint8_t      list1_ref_index      = me_block_results_ptr->ref_idx_l1;
2335
2336
0
        if (ctx->lpd0_ctrls.pd0_level == VERY_LIGHT_PD0 && inter_direction == BI_PRED) {
2337
0
            continue;
2338
0
        }
2339
2340
        /**************
2341
            NEWMV
2342
        ************* */
2343
0
        if (inter_direction < BI_PRED) {
2344
0
            const uint8_t list_idx = inter_direction;
2345
0
            const uint8_t ref_idx  = inter_direction ? list1_ref_index : list0_ref_index;
2346
0
            const int16_t to_inject_mv_x =
2347
0
                (me_results->me_mv_array[me_block_offset * max_refs + (inter_direction ? max_l0 : 0) + ref_idx].x) * 8;
2348
0
            const int16_t to_inject_mv_y =
2349
0
                (me_results->me_mv_array[me_block_offset * max_refs + (inter_direction ? max_l0 : 0) + ref_idx].y) * 8;
2350
0
            const uint8_t to_inject_ref_type = svt_get_ref_frame_type(list_idx, ref_idx);
2351
2352
0
            ModeDecisionCandidate* cand = &cand_array[cand_total_cnt];
2353
0
            cand->block_mi.mode         = NEWMV;
2354
0
            cand->block_mi.mv[0]        = (Mv){{to_inject_mv_x, to_inject_mv_y}};
2355
0
            cand->block_mi.ref_frame[0] = to_inject_ref_type;
2356
0
            cand->block_mi.ref_frame[1] = NONE_FRAME;
2357
0
            INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2358
0
            if (cand_total_cnt > 2) {
2359
0
                break;
2360
0
            }
2361
0
        } else if (allow_bipred) {
2362
0
            assert(inter_direction == BI_PRED);
2363
            /**************
2364
               NEW_NEWMV
2365
            ************* */
2366
0
            const uint32_t ref0_offset = me_block_offset * max_refs +
2367
0
                (me_block_results_ptr->ref0_list > 0 ? max_l0 : 0) + list0_ref_index;
2368
0
            const uint32_t ref1_offset = me_block_offset * max_refs +
2369
0
                (me_block_results_ptr->ref1_list > 0 ? max_l0 : 0) + list1_ref_index;
2370
0
            const int16_t to_inject_mv_x_l0 = (me_results->me_mv_array[ref0_offset].x) * 8;
2371
0
            const int16_t to_inject_mv_y_l0 = (me_results->me_mv_array[ref0_offset].y) * 8;
2372
0
            const int16_t to_inject_mv_x_l1 = (me_results->me_mv_array[ref1_offset].x) * 8;
2373
0
            const int16_t to_inject_mv_y_l1 = (me_results->me_mv_array[ref1_offset].y) * 8;
2374
2375
0
            MvReferenceFrame rf[2] = {svt_get_ref_frame_type(me_block_results_ptr->ref0_list, list0_ref_index),
2376
0
                                      svt_get_ref_frame_type(me_block_results_ptr->ref1_list, list1_ref_index)};
2377
2378
            // Inject AVG candidate only
2379
0
            ModeDecisionCandidate* cand   = &cand_array[cand_total_cnt];
2380
0
            cand->block_mi.mv[REF_LIST_0] = (Mv){{to_inject_mv_x_l0, to_inject_mv_y_l0}};
2381
0
            cand->block_mi.mv[REF_LIST_1] = (Mv){{to_inject_mv_x_l1, to_inject_mv_y_l1}};
2382
0
            cand->block_mi.mode           = NEW_NEWMV;
2383
0
            cand->block_mi.ref_frame[0]   = rf[0];
2384
0
            cand->block_mi.ref_frame[1]   = rf[1];
2385
0
            determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
2386
0
            INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2387
0
            if (cand_total_cnt > 2) {
2388
0
                break;
2389
0
            }
2390
0
        }
2391
0
    }
2392
    // update the total number of candidates injected
2393
0
    (*candidate_total_cnt) = cand_total_cnt;
2394
0
}
2395
2396
static void inject_new_candidates_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx,
2397
0
                                            uint32_t* candidate_total_cnt, const bool allow_bipred) {
2398
0
    const uint32_t         me_sb_addr       = ctx->me_sb_addr;
2399
0
    const uint32_t         me_block_offset  = ctx->me_block_offset;
2400
0
    ModeDecisionCandidate* cand_array       = ctx->fast_cand_array;
2401
0
    Mv                     best_pred_mv[2]  = {{{0}}, {{0}}};
2402
0
    uint32_t               cand_total_cnt   = (*candidate_total_cnt);
2403
0
    const MeSbResults*     me_results       = pcs->ppcs->pa_me_data->me_results[me_sb_addr];
2404
0
    const uint8_t          total_me_cnt     = me_results->total_me_candidate_index[me_block_offset];
2405
0
    const MeCandidate*     me_block_results = &me_results->me_candidate_array[ctx->me_cand_offset];
2406
2407
0
    for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) {
2408
0
        const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index];
2409
0
        const uint8_t      inter_direction      = me_block_results_ptr->direction;
2410
0
        const uint8_t      list0_ref_index      = me_block_results_ptr->ref_idx_l0;
2411
0
        const uint8_t      list1_ref_index      = me_block_results_ptr->ref_idx_l1;
2412
2413
0
        if (ctx->cand_reduction_ctrls.reduce_unipred_candidates >= 2) {
2414
0
            if ((total_me_cnt > 1) && (inter_direction != 2)) {
2415
0
                continue;
2416
0
            }
2417
0
        } else if (ctx->cand_reduction_ctrls.reduce_unipred_candidates) {
2418
0
            if ((total_me_cnt > 3) && (inter_direction != 2)) {
2419
0
                continue;
2420
0
            }
2421
0
        }
2422
2423
        /**************
2424
            NEWMV
2425
        ************* */
2426
0
        if (inter_direction < BI_PRED) {
2427
0
            const uint8_t list_idx           = inter_direction;
2428
0
            const uint8_t ref_idx            = inter_direction ? list1_ref_index : list0_ref_index;
2429
0
            Mv            to_inj_mv          = ctx->sb_me_mv[list_idx][ref_idx];
2430
0
            const uint8_t to_inject_ref_type = svt_get_ref_frame_type(list_idx, ref_idx);
2431
0
            if (ctx->injected_mv_count == 0 ||
2432
0
                mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, to_inject_ref_type) == false) {
2433
0
                uint8_t drl_index = 0;
2434
0
                svt_aom_choose_best_av1_mv_pred(
2435
0
                    ctx, to_inject_ref_type, NEWMV, to_inj_mv, (Mv){{0}}, &drl_index, best_pred_mv);
2436
0
                if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv, to_inj_mv, 0)) {
2437
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
2438
0
                    cand->block_mi.use_intrabc        = 0;
2439
0
                    cand->block_mi.is_interintra_used = 0;
2440
0
                    cand->skip_mode_allowed           = false;
2441
0
                    cand->block_mi.mode               = NEWMV;
2442
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
2443
0
                    cand->drl_index                   = drl_index;
2444
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
2445
0
                    cand->block_mi.ref_frame[0]       = to_inject_ref_type;
2446
0
                    cand->block_mi.ref_frame[1]       = NONE_FRAME;
2447
0
                    cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
2448
0
                    cand->block_mi.num_proj_ref       = ctx->wm_sample_info[to_inject_ref_type].num;
2449
0
                    INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2450
                    // Add the injected MV to the list of injected MVs
2451
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
2452
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
2453
0
                    ++ctx->injected_mv_count;
2454
0
                }
2455
0
            }
2456
0
        } else if (allow_bipred && inter_direction == 2 &&
2457
0
                   !(ctx->is_intra_bordered && ctx->cand_reduction_ctrls.use_neighbouring_mode_ctrls.enabled)) {
2458
            /**************
2459
               NEW_NEWMV
2460
            ************* */
2461
0
            Mv               to_inj_mv0 = ctx->sb_me_mv[me_block_results_ptr->ref0_list][list0_ref_index];
2462
0
            Mv               to_inj_mv1 = ctx->sb_me_mv[me_block_results_ptr->ref1_list][list1_ref_index];
2463
0
            MvReferenceFrame rf[2]      = {svt_get_ref_frame_type(me_block_results_ptr->ref0_list, list0_ref_index),
2464
0
                                           svt_get_ref_frame_type(me_block_results_ptr->ref1_list, list1_ref_index)};
2465
0
            uint8_t          to_inject_ref_type = av1_ref_frame_type(rf);
2466
0
            if ((ctx->injected_mv_count == 0 ||
2467
0
                 mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) {
2468
0
                uint8_t drl_index = 0;
2469
0
                svt_aom_choose_best_av1_mv_pred(
2470
0
                    ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv);
2471
0
                if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) {
2472
0
                    ModeDecisionCandidate* cand         = &cand_array[cand_total_cnt];
2473
0
                    cand->block_mi.use_intrabc          = 0;
2474
0
                    cand->block_mi.is_interintra_used   = 0;
2475
0
                    cand->skip_mode_allowed             = false;
2476
0
                    cand->drl_index                     = drl_index;
2477
0
                    cand->block_mi.mv[0].as_int         = to_inj_mv0.as_int;
2478
0
                    cand->block_mi.mv[1].as_int         = to_inj_mv1.as_int;
2479
0
                    cand->block_mi.mode                 = NEW_NEWMV;
2480
0
                    cand->block_mi.motion_mode          = SIMPLE_TRANSLATION;
2481
0
                    cand->block_mi.ref_frame[0]         = rf[0];
2482
0
                    cand->block_mi.ref_frame[1]         = rf[1];
2483
0
                    cand->pred_mv[0].as_int             = best_pred_mv[0].as_int;
2484
0
                    cand->pred_mv[1].as_int             = best_pred_mv[1].as_int;
2485
0
                    cand->block_mi.comp_group_idx       = 0;
2486
0
                    cand->block_mi.compound_idx         = 1;
2487
0
                    cand->block_mi.interinter_comp.type = COMPOUND_AVERAGE;
2488
0
                    INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2489
2490
                    // Add the injected MV to the list of injected MVs
2491
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
2492
0
                    ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
2493
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
2494
0
                    ++ctx->injected_mv_count;
2495
0
                }
2496
0
            }
2497
0
        }
2498
0
    }
2499
    // update the total number of candidates injected
2500
0
    (*candidate_total_cnt) = cand_total_cnt;
2501
0
}
2502
2503
static void inject_new_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candidate_total_cnt,
2504
0
                                  const bool allow_bipred) {
2505
0
    const uint32_t         me_sb_addr       = ctx->me_sb_addr;
2506
0
    const uint32_t         me_block_offset  = ctx->me_block_offset;
2507
0
    ModeDecisionCandidate* cand_array       = ctx->fast_cand_array;
2508
0
    Mv                     best_pred_mv[2]  = {{{0}}, {{0}}};
2509
0
    uint32_t               cand_total_cnt   = (*candidate_total_cnt);
2510
0
    const MeSbResults*     me_results       = pcs->ppcs->pa_me_data->me_results[me_sb_addr];
2511
0
    const uint8_t          total_me_cnt     = me_results->total_me_candidate_index[me_block_offset];
2512
0
    const MeCandidate*     me_block_results = &me_results->me_candidate_array[ctx->me_cand_offset];
2513
2514
0
    for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) {
2515
0
        const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index];
2516
0
        const uint8_t      inter_direction      = me_block_results_ptr->direction;
2517
0
        const uint8_t      list0_ref_index      = me_block_results_ptr->ref_idx_l0;
2518
0
        const uint8_t      list1_ref_index      = me_block_results_ptr->ref_idx_l1;
2519
2520
0
        if (ctx->cand_reduction_ctrls.reduce_unipred_candidates) {
2521
0
            if ((total_me_cnt > 3) && (inter_direction != 2)) {
2522
0
                continue;
2523
0
            }
2524
0
        }
2525
2526
        /**************
2527
            NEWMV unipred
2528
        ************* */
2529
0
        if (inter_direction < BI_PRED) {
2530
0
            const uint8_t list_idx = inter_direction;
2531
0
            const uint8_t ref_idx  = list_idx == REF_LIST_0 ? list0_ref_index : list1_ref_index;
2532
0
            if (!svt_aom_is_valid_unipred_ref(ctx, MIN(TOT_INTER_GROUP - 1, PA_ME_GROUP), list_idx, ref_idx)) {
2533
0
                continue;
2534
0
            }
2535
0
            Mv      to_inj_mv          = ctx->sb_me_mv[list_idx][ref_idx];
2536
0
            uint8_t to_inject_ref_type = svt_get_ref_frame_type(list_idx, ref_idx);
2537
0
            if ((ctx->injected_mv_count == 0 ||
2538
0
                 mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, to_inject_ref_type) == false)) {
2539
0
                uint8_t drl_index = 0;
2540
0
                svt_aom_choose_best_av1_mv_pred(
2541
0
                    ctx, to_inject_ref_type, NEWMV, to_inj_mv, (Mv){{0}}, &drl_index, best_pred_mv);
2542
0
                if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv, to_inj_mv, 0)) {
2543
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
2544
0
                    cand->block_mi.use_intrabc        = 0;
2545
0
                    cand->skip_mode_allowed           = false;
2546
0
                    cand->block_mi.mode               = NEWMV;
2547
0
                    cand->drl_index                   = drl_index;
2548
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
2549
0
                    cand->block_mi.ref_frame[0]       = to_inject_ref_type;
2550
0
                    cand->block_mi.ref_frame[1]       = NONE_FRAME;
2551
0
                    cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
2552
0
                    cand->block_mi.is_interintra_used = 0;
2553
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
2554
0
                    cand->block_mi.num_proj_ref       = ctx->wm_sample_info[to_inject_ref_type].num;
2555
0
                    INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2556
2557
0
                    const bool enable_ii   = true;
2558
0
                    const bool enable_obmc = true;
2559
0
                    const bool enable_warp = true;
2560
0
                    inj_non_simple_modes(pcs, ctx, &cand_total_cnt, enable_ii, enable_warp, enable_obmc);
2561
2562
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
2563
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
2564
0
                    ++ctx->injected_mv_count;
2565
0
                }
2566
0
            }
2567
0
        } else if (allow_bipred &&
2568
0
                   !(ctx->is_intra_bordered && ctx->cand_reduction_ctrls.use_neighbouring_mode_ctrls.enabled)) {
2569
0
            assert(inter_direction == BI_PRED);
2570
            /**************
2571
               NEW_NEWMV
2572
            ************* */
2573
0
            if (!is_valid_bipred_ref(ctx,
2574
0
                                     PA_ME_GROUP,
2575
0
                                     me_block_results_ptr->ref0_list,
2576
0
                                     list0_ref_index,
2577
0
                                     me_block_results_ptr->ref1_list,
2578
0
                                     list1_ref_index)) {
2579
0
                continue;
2580
0
            }
2581
0
            Mv      to_inj_mv0         = ctx->sb_me_mv[me_block_results_ptr->ref0_list][list0_ref_index];
2582
0
            Mv      to_inj_mv1         = ctx->sb_me_mv[me_block_results_ptr->ref1_list][list1_ref_index];
2583
0
            uint8_t to_inject_ref_type = av1_ref_frame_type(
2584
0
                (const MvReferenceFrame[]){svt_get_ref_frame_type(me_block_results_ptr->ref0_list, list0_ref_index),
2585
0
                                           svt_get_ref_frame_type(me_block_results_ptr->ref1_list, list1_ref_index)});
2586
0
            if ((ctx->injected_mv_count == 0 ||
2587
0
                 mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) {
2588
0
                uint8_t drl_index = 0;
2589
0
                svt_aom_choose_best_av1_mv_pred(
2590
0
                    ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv);
2591
0
                if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) {
2592
0
                    MvReferenceFrame rf[2] = {svt_get_ref_frame_type(me_block_results_ptr->ref0_list, list0_ref_index),
2593
0
                                              svt_get_ref_frame_type(me_block_results_ptr->ref1_list, list1_ref_index)};
2594
0
                    ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
2595
0
                    cand->block_mi.use_intrabc        = 0;
2596
0
                    cand->skip_mode_allowed           = false;
2597
0
                    cand->drl_index                   = drl_index;
2598
0
                    cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
2599
0
                    cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
2600
0
                    cand->block_mi.mode               = NEW_NEWMV;
2601
0
                    cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
2602
0
                    cand->block_mi.is_interintra_used = 0;
2603
0
                    cand->block_mi.ref_frame[0]       = rf[0];
2604
0
                    cand->block_mi.ref_frame[1]       = rf[1];
2605
0
                    cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
2606
0
                    cand->pred_mv[1].as_int           = best_pred_mv[1].as_int;
2607
0
                    determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
2608
0
                    INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2609
2610
0
                    if (ctx->inter_comp_ctrls.do_me) {
2611
0
                        ctx->cmp_store.pred0_cnt = 0;
2612
0
                        ctx->cmp_store.pred1_cnt = 0;
2613
0
                        inj_comp_modes(pcs, ctx, &cand_total_cnt);
2614
0
                    }
2615
0
                    ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
2616
0
                    ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
2617
0
                    ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
2618
0
                    ++ctx->injected_mv_count;
2619
0
                }
2620
0
            }
2621
0
        }
2622
0
    }
2623
    // update the total number of candidates injected
2624
0
    (*candidate_total_cnt) = cand_total_cnt;
2625
0
}
2626
2627
static void inject_global_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candidate_total_cnt,
2628
0
                                     const bool allow_bipred) {
2629
0
    ModeDecisionCandidate* cand_array     = ctx->fast_cand_array;
2630
0
    uint32_t               cand_total_cnt = (*candidate_total_cnt);
2631
0
    uint32_t               mi_row         = ctx->blk_org_y >> MI_SIZE_LOG2;
2632
0
    uint32_t               mi_col         = ctx->blk_org_x >> MI_SIZE_LOG2;
2633
2634
0
    for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) {
2635
0
        MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it];
2636
0
        MvReferenceFrame rf[2];
2637
0
        av1_set_ref_frame(rf, ref_pair);
2638
2639
        //single ref/list
2640
0
        if (rf[1] == NONE_FRAME) {
2641
0
            MvReferenceFrame frame_type = rf[0];
2642
0
            uint8_t          list_idx   = get_list_idx(rf[0]);
2643
0
            uint8_t          ref_idx    = get_ref_frame_idx(rf[0]);
2644
2645
0
            if (!svt_aom_is_valid_unipred_ref(ctx, GLOBAL_GROUP, list_idx, ref_idx)) {
2646
0
                continue;
2647
0
            }
2648
            // Get gm params
2649
0
            WarpedMotionParams* gm_params = &pcs->ppcs->global_motion[frame_type];
2650
0
            if (pcs->ppcs->gm_ctrls.skip_identity && gm_params->wmtype == IDENTITY) {
2651
0
                continue;
2652
0
            }
2653
0
            Mv to_inj_mv = svt_aom_gm_get_motion_vector_enc(gm_params,
2654
0
                                                            pcs->ppcs->frm_hdr.allow_high_precision_mv,
2655
0
                                                            ctx->blk_geom->bsize,
2656
0
                                                            mi_col,
2657
0
                                                            mi_row,
2658
0
                                                            0 /* force_integer_mv */);
2659
2660
0
            assert(list_idx == 0 || list_idx == 1);
2661
0
            ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
2662
0
            cand->block_mi.mode               = GLOBALMV;
2663
0
            cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
2664
0
            cand->block_mi.is_interintra_used = 0;
2665
0
            cand->wm_params_l0                = *gm_params;
2666
0
            cand->wm_params_l1                = *gm_params;
2667
0
            cand->block_mi.use_intrabc        = 0;
2668
0
            cand->skip_mode_allowed           = false;
2669
0
            cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
2670
0
            cand->drl_index                   = 0;
2671
0
            cand->block_mi.ref_frame[0]       = rf[0];
2672
0
            cand->block_mi.ref_frame[1]       = rf[1];
2673
0
            cand->block_mi.num_proj_ref       = ctx->wm_sample_info[frame_type].num;
2674
0
            INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2675
2676
0
            const bool enable_ii   = true;
2677
0
            const bool enable_obmc = false;
2678
0
            const bool enable_warp = false;
2679
0
            inj_non_simple_modes(pcs, ctx, &cand_total_cnt, enable_ii, enable_warp, enable_obmc);
2680
0
            ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
2681
0
            ctx->injected_ref_types[ctx->injected_mv_count]     = frame_type;
2682
0
            ++ctx->injected_mv_count;
2683
0
        } else if (allow_bipred) {
2684
0
            uint8_t ref_idx_0  = get_ref_frame_idx(rf[0]);
2685
0
            uint8_t ref_idx_1  = get_ref_frame_idx(rf[1]);
2686
0
            uint8_t list_idx_0 = get_list_idx(rf[0]);
2687
0
            uint8_t list_idx_1 = get_list_idx(rf[1]);
2688
2689
0
            if (!is_valid_bipred_ref(ctx, GLOBAL_GROUP, list_idx_0, ref_idx_0, list_idx_1, ref_idx_1)) {
2690
0
                return;
2691
0
            }
2692
            // Get gm params
2693
0
            WarpedMotionParams* gm_params_0 = &pcs->ppcs->global_motion[svt_get_ref_frame_type(list_idx_0, ref_idx_0)];
2694
2695
0
            WarpedMotionParams* gm_params_1 = &pcs->ppcs->global_motion[svt_get_ref_frame_type(list_idx_1, ref_idx_1)];
2696
2697
0
            if (pcs->ppcs->gm_ctrls.skip_identity &&
2698
0
                (gm_params_0->wmtype == IDENTITY || gm_params_1->wmtype == IDENTITY)) {
2699
0
                continue;
2700
0
            }
2701
0
            Mv to_inj_mv0 = svt_aom_gm_get_motion_vector_enc(gm_params_0,
2702
0
                                                             pcs->ppcs->frm_hdr.allow_high_precision_mv,
2703
0
                                                             ctx->blk_geom->bsize,
2704
0
                                                             mi_col,
2705
0
                                                             mi_row,
2706
0
                                                             0 /* force_integer_mv */);
2707
2708
0
            Mv      to_inj_mv1         = svt_aom_gm_get_motion_vector_enc(gm_params_1,
2709
0
                                                             pcs->ppcs->frm_hdr.allow_high_precision_mv,
2710
0
                                                             ctx->blk_geom->bsize,
2711
0
                                                             mi_col,
2712
0
                                                             mi_row,
2713
0
                                                             0 /* force_integer_mv */);
2714
0
            uint8_t to_inject_ref_type = av1_ref_frame_type(rf);
2715
2716
0
            ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
2717
0
            cand->block_mi.use_intrabc        = 0;
2718
0
            cand->skip_mode_allowed           = false;
2719
0
            cand->block_mi.mode               = GLOBAL_GLOBALMV;
2720
0
            cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
2721
0
            cand->wm_params_l0                = *gm_params_0;
2722
0
            cand->wm_params_l1                = *gm_params_1;
2723
0
            cand->block_mi.is_interintra_used = 0;
2724
0
            cand->drl_index                   = 0;
2725
0
            cand->block_mi.ref_frame[0]       = rf[0];
2726
0
            cand->block_mi.ref_frame[1]       = rf[1];
2727
0
            cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
2728
0
            cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
2729
0
            determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
2730
0
            INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2731
2732
0
            if (ctx->inter_comp_ctrls.do_global) {
2733
0
                ctx->cmp_store.pred0_cnt = 0;
2734
0
                ctx->cmp_store.pred1_cnt = 0;
2735
0
                inj_comp_modes(pcs, ctx, &cand_total_cnt);
2736
0
            }
2737
0
            ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
2738
0
            ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
2739
0
            ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
2740
0
            ++ctx->injected_mv_count;
2741
0
        }
2742
0
    }
2743
    // update the total number of candidates injected
2744
0
    (*candidate_total_cnt) = cand_total_cnt;
2745
0
}
2746
2747
static void inject_pme_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candidate_total_cnt,
2748
0
                                  const bool allow_bipred) {
2749
0
    ModeDecisionCandidate* cand_array      = ctx->fast_cand_array;
2750
0
    Mv                     best_pred_mv[2] = {{{0}}, {{0}}};
2751
0
    uint32_t               cand_total_cnt  = (*candidate_total_cnt);
2752
0
    for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) {
2753
0
        MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it];
2754
0
        MvReferenceFrame rf[2];
2755
0
        av1_set_ref_frame(rf, ref_pair);
2756
2757
        //single ref/list
2758
0
        if (rf[1] == NONE_FRAME) {
2759
0
            MvReferenceFrame frame_type = rf[0];
2760
0
            uint8_t          list_idx   = get_list_idx(rf[0]);
2761
0
            uint8_t          ref_idx    = get_ref_frame_idx(rf[0]);
2762
2763
0
            if (ctx->valid_pme_mv[list_idx][ref_idx]) {
2764
0
                Mv to_inj_mv = ctx->best_pme_mv[list_idx][ref_idx];
2765
0
                if ((ctx->injected_mv_count == 0 ||
2766
0
                     mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, frame_type) == false)) {
2767
0
                    uint8_t drl_index = 0;
2768
0
                    svt_aom_choose_best_av1_mv_pred(
2769
0
                        ctx, frame_type, NEWMV, to_inj_mv, (Mv){{0}}, &drl_index, best_pred_mv);
2770
0
                    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv, to_inj_mv, 0)) {
2771
0
                        ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
2772
0
                        cand->block_mi.use_intrabc        = 0;
2773
0
                        cand->skip_mode_allowed           = false;
2774
0
                        cand->block_mi.mode               = NEWMV;
2775
0
                        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
2776
0
                        cand->block_mi.is_interintra_used = 0;
2777
0
                        cand->drl_index                   = drl_index;
2778
0
                        cand->block_mi.mv[0].as_int       = to_inj_mv.as_int;
2779
0
                        cand->block_mi.ref_frame[0]       = rf[0];
2780
0
                        cand->block_mi.ref_frame[1]       = rf[1];
2781
0
                        cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
2782
0
                        cand->block_mi.num_proj_ref       = ctx->wm_sample_info[frame_type].num;
2783
0
                        INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2784
2785
0
                        const bool enable_ii   = true;
2786
0
                        const bool enable_obmc = true;
2787
0
                        const bool enable_warp = true;
2788
0
                        inj_non_simple_modes(pcs, ctx, &cand_total_cnt, enable_ii, enable_warp, enable_obmc);
2789
0
                        ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int;
2790
0
                        ctx->injected_ref_types[ctx->injected_mv_count]     = frame_type;
2791
0
                        ++ctx->injected_mv_count;
2792
0
                    }
2793
0
                }
2794
0
            }
2795
0
        } else if (allow_bipred) {
2796
0
            uint8_t ref_idx_0  = get_ref_frame_idx(rf[0]);
2797
0
            uint8_t ref_idx_1  = get_ref_frame_idx(rf[1]);
2798
0
            uint8_t list_idx_0 = get_list_idx(rf[0]);
2799
0
            uint8_t list_idx_1 = get_list_idx(rf[1]);
2800
2801
0
            if (ctx->valid_pme_mv[list_idx_0][ref_idx_0] && ctx->valid_pme_mv[list_idx_1][ref_idx_1]) {
2802
0
                Mv            to_inj_mv0         = ctx->best_pme_mv[list_idx_0][ref_idx_0];
2803
0
                Mv            to_inj_mv1         = ctx->best_pme_mv[list_idx_1][ref_idx_1];
2804
0
                const uint8_t to_inject_ref_type = av1_ref_frame_type((const MvReferenceFrame[]){
2805
0
                    svt_get_ref_frame_type(list_idx_0, ref_idx_0),
2806
0
                    svt_get_ref_frame_type(list_idx_1, ref_idx_1),
2807
0
                });
2808
0
                if ((ctx->injected_mv_count == 0 ||
2809
0
                     mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) {
2810
0
                    uint8_t drl_index = 0;
2811
0
                    svt_aom_choose_best_av1_mv_pred(
2812
0
                        ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv);
2813
0
                    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) {
2814
0
                        ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
2815
0
                        cand->block_mi.use_intrabc        = 0;
2816
0
                        cand->skip_mode_allowed           = false;
2817
0
                        cand->drl_index                   = drl_index;
2818
0
                        cand->block_mi.mv[0].as_int       = to_inj_mv0.as_int;
2819
0
                        cand->block_mi.mv[1].as_int       = to_inj_mv1.as_int;
2820
0
                        cand->block_mi.mode               = NEW_NEWMV;
2821
0
                        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
2822
0
                        cand->block_mi.is_interintra_used = 0;
2823
0
                        cand->block_mi.ref_frame[0]       = rf[0];
2824
0
                        cand->block_mi.ref_frame[1]       = rf[1];
2825
0
                        cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
2826
0
                        cand->pred_mv[1].as_int           = best_pred_mv[1].as_int;
2827
0
                        determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG);
2828
0
                        INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
2829
2830
0
                        if (ctx->inter_comp_ctrls.do_pme) {
2831
0
                            ctx->cmp_store.pred0_cnt = 0;
2832
0
                            ctx->cmp_store.pred1_cnt = 0;
2833
0
                            inj_comp_modes(pcs, ctx, &cand_total_cnt);
2834
0
                        }
2835
0
                        ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int;
2836
0
                        ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int;
2837
0
                        ctx->injected_ref_types[ctx->injected_mv_count]     = to_inject_ref_type;
2838
0
                        ++ctx->injected_mv_count;
2839
0
                    }
2840
0
                }
2841
0
            }
2842
0
        }
2843
0
    }
2844
0
    (*candidate_total_cnt) = cand_total_cnt;
2845
0
}
2846
2847
static void inject_inter_candidates_light_pd0(PictureControlSet* pcs, ModeDecisionContext* ctx,
2848
0
                                              uint32_t* candidate_total_cnt) {
2849
0
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
2850
    // Bipred prediction is only allowed when both dimensions are > 4 and the frame-header reference mode allows it.
2851
    // See AV1 spec 5.11.25
2852
0
    const bool allow_bipred = (frm_hdr->reference_mode == SINGLE_REFERENCE || ctx->blk_geom->bwidth == 4 ||
2853
0
                               ctx->blk_geom->bheight == 4)
2854
0
        ? false
2855
0
        : true;
2856
2857
0
    inject_new_candidates_light_pd0(pcs, ctx, candidate_total_cnt, allow_bipred);
2858
0
}
2859
2860
static void inject_inter_candidates_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx,
2861
0
                                              uint32_t* cand_total_cnt) {
2862
0
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
2863
    // Bipred prediction is only allowed when both dimensions are > 4 and the frame-header reference mode allows it.
2864
    // See AV1 spec 5.11.25
2865
0
    const bool allow_bipred = (frm_hdr->reference_mode == SINGLE_REFERENCE || ctx->blk_geom->bwidth == 4 ||
2866
0
                               ctx->blk_geom->bheight == 4)
2867
0
        ? false
2868
0
        : true;
2869
    // Needed in case WM/OBMC is on at the frame level (even though not used in light-PD1 path)
2870
0
    if (frm_hdr->is_motion_mode_switchable) {
2871
0
        const uint16_t mi_row = ctx->blk_org_y >> MI_SIZE_LOG2;
2872
0
        const uint16_t mi_col = ctx->blk_org_x >> MI_SIZE_LOG2;
2873
0
        svt_av1_count_overlappable_neighbors(pcs, ctx->blk_ptr, ctx->blk_geom->bsize, mi_row, mi_col);
2874
0
    } else {
2875
        // Overlappable neighbours only needed for non-"SIMPLE_TRANSLATION" candidates
2876
0
        ctx->blk_ptr->overlappable_neighbors = 0;
2877
0
    }
2878
0
    svt_aom_init_wm_samples(pcs, ctx);
2879
    // Inject MVP candidates
2880
0
    if (ctx->new_nearest_injection &&
2881
0
        !(ctx->is_intra_bordered && ctx->cand_reduction_ctrls.use_neighbouring_mode_ctrls.enabled)) {
2882
0
        inject_mvp_candidates_ii_light_pd1(pcs, ctx, cand_total_cnt, allow_bipred);
2883
0
    }
2884
2885
    // Inject ME candidates
2886
0
    if (ctx->inject_new_me) {
2887
0
        inject_new_candidates_light_pd1(pcs, ctx, cand_total_cnt, allow_bipred);
2888
0
    }
2889
0
}
2890
2891
static void svt_aom_inject_inter_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx,
2892
0
                                            uint32_t* cand_total_cnt) {
2893
0
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
2894
    // Bipred prediction is only allowed when both dimensions are > 4 and the frame-header reference mode allows it.
2895
    // See AV1 spec 5.11.25
2896
0
    const bool allow_bipred = (frm_hdr->reference_mode == SINGLE_REFERENCE || ctx->blk_geom->bwidth == 4 ||
2897
0
                               ctx->blk_geom->bheight == 4)
2898
0
        ? false
2899
0
        : true;
2900
2901
0
    const uint32_t mi_row = ctx->blk_org_y >> MI_SIZE_LOG2;
2902
0
    const uint32_t mi_col = ctx->blk_org_x >> MI_SIZE_LOG2;
2903
2904
0
    svt_av1_count_overlappable_neighbors(pcs, ctx->blk_ptr, ctx->blk_geom->bsize, mi_row, mi_col);
2905
0
    svt_aom_init_wm_samples(pcs, ctx);
2906
0
#if CONFIG_ENABLE_OBMC
2907
0
    if (ctx->obmc_ctrls.enabled && ctx->obmc_ctrls.refine_level == 0) {
2908
0
        const uint8_t is_obmc_allowed = svt_aom_obmc_motion_mode_allowed(
2909
0
                                            pcs, ctx, ctx->blk_geom->bsize, 1, LAST_FRAME, -1, NEWMV) == OBMC_CAUSAL;
2910
0
        if (is_obmc_allowed) {
2911
0
            svt_aom_precompute_obmc_data(pcs, ctx, PICTURE_BUFFER_DESC_LUMA_MASK);
2912
0
        }
2913
0
    }
2914
0
#endif
2915
    /**************
2916
         MVP
2917
    ************* */
2918
0
    if (ctx->new_nearest_injection &&
2919
0
        !(ctx->is_intra_bordered && ctx->cand_reduction_ctrls.use_neighbouring_mode_ctrls.enabled)) {
2920
0
        inject_mvp_candidates_ii(pcs, ctx, cand_total_cnt, allow_bipred);
2921
0
    }
2922
    //----------------------
2923
    //    NEAREST_NEWMV, NEW_NEARESTMV, NEAR_NEWMV, NEW_NEARMV.
2924
    //----------------------
2925
0
    if (ctx->new_nearest_near_comb_injection && allow_bipred) {
2926
0
        inject_new_nearest_new_comb_candidates(pcs, ctx, cand_total_cnt);
2927
0
    }
2928
0
    if (ctx->inject_new_me) {
2929
0
        inject_new_candidates(pcs, ctx, cand_total_cnt, allow_bipred);
2930
0
    }
2931
0
    if (ctx->global_mv_injection) {
2932
0
        inject_global_candidates(pcs, ctx, cand_total_cnt, allow_bipred);
2933
0
    }
2934
0
    if (ctx->bipred3x3_ctrls.enabled && allow_bipred) {
2935
0
        bipred_3x3_candidates_injection(pcs, ctx, cand_total_cnt);
2936
0
    }
2937
2938
0
    if (ctx->unipred3x3_injection) {
2939
0
        unipred_3x3_candidates_injection(pcs, ctx, cand_total_cnt);
2940
0
    }
2941
2942
    // determine when to inject pme candidates based on size and resolution of block
2943
0
    if (ctx->inject_new_pme && ctx->updated_enable_pme) {
2944
0
        inject_pme_candidates(pcs, ctx, cand_total_cnt, allow_bipred);
2945
0
    }
2946
0
}
2947
2948
static const TxType g_intra_mode_to_tx_type[INTRA_MODES] = {
2949
    DCT_DCT, // DC
2950
    ADST_DCT, // V
2951
    DCT_ADST, // H
2952
    DCT_DCT, // D45
2953
    ADST_ADST, // D135
2954
    ADST_DCT, // D117
2955
    DCT_ADST, // D153
2956
    DCT_ADST, // D207
2957
    ADST_DCT, // D63
2958
    ADST_ADST, // SMOOTH
2959
    ADST_DCT, // SMOOTH_V
2960
    DCT_ADST, // SMOOTH_H
2961
    ADST_ADST, // PAETH
2962
};
2963
2964
static INLINE TxType intra_mode_to_tx_type(PredictionMode pred_mode, UvPredictionMode pred_mode_uv,
2965
246k
                                           PlaneType plane_type) {
2966
246k
    const PredictionMode mode = (plane_type == PLANE_TYPE_Y) ? pred_mode : get_uv_mode(pred_mode_uv);
2967
246k
    assert(mode < INTRA_MODES);
2968
246k
    return g_intra_mode_to_tx_type[mode];
2969
246k
}
2970
2971
/* For intra prediction, the chroma transform type may not follow the luma type.
2972
This function will return the intra chroma TX type to be used, which is based on TX size and chroma mode.
2973
Refer to section 5.11.40 of the AV1 spec (compute_tx_type). */
2974
246k
TxType svt_aom_get_intra_uv_tx_type(UvPredictionMode pred_mode_uv, TxSize tx_size, int32_t reduced_tx_set) {
2975
246k
    if (txsize_sqr_up_map[tx_size] > TX_32X32) {
2976
0
        return DCT_DCT;
2977
0
    }
2978
2979
    // In intra mode, uv planes don't share the same prediction mode as y
2980
    // plane, so the tx_type should not be shared. Pass DC_PRED as luma mode because the argument
2981
    // will not be used.
2982
246k
    TxType tx_type = intra_mode_to_tx_type(DC_PRED, pred_mode_uv, PLANE_TYPE_UV);
2983
246k
    assert(tx_type < TX_TYPES);
2984
246k
    const TxSetType tx_set_type = get_ext_tx_set_type(tx_size, /*is_inter*/ 0, reduced_tx_set);
2985
246k
    return !av1_ext_tx_used[tx_set_type][tx_type] ? DCT_DCT : tx_type;
2986
246k
}
2987
2988
// Values are now correlated to quantizer.
2989
0
static INLINE int mv_check_bounds(const MvLimits* mv_limits, const Mv* mv) {
2990
0
    return (mv->y >> 3) < mv_limits->row_min || (mv->y >> 3) > mv_limits->row_max ||
2991
0
        (mv->x >> 3) < mv_limits->col_min || (mv->x >> 3) > mv_limits->col_max;
2992
0
}
2993
2994
0
static void assert_release(int statement) {
2995
0
    if (statement == 0) {
2996
0
        SVT_LOG("ASSERT_ERRRR\n");
2997
0
    }
2998
0
}
2999
3000
static void intra_bc_search(PictureControlSet* pcs, ModeDecisionContext* ctx, const SequenceControlSet* scs,
3001
0
                            BlkStruct* blk_ptr, Mv* dv_cand, uint8_t* num_dv_cand) {
3002
0
    IntraBcContext  x_st;
3003
0
    IntraBcContext* x           = &x_st;
3004
0
    uint32_t        full_lambda = ctx->hbd_md ? ctx->full_lambda_md[EB_10_BIT_MD] : ctx->full_lambda_md[EB_8_BIT_MD];
3005
3006
0
    svt_memcpy(&x->crc_calculator, &pcs->crc_calculator, sizeof(pcs->crc_calculator));
3007
0
    x->approx_inter_rate = ctx->approx_inter_rate;
3008
0
    x->xd                = blk_ptr->av1xd;
3009
0
    x->nmv_vec_cost      = ctx->md_rate_est_ctx->nmv_vec_cost;
3010
0
    x->mv_cost_stack     = ctx->md_rate_est_ctx->nmvcoststack;
3011
0
    BlockSize bsize      = ctx->blk_geom->bsize;
3012
0
    assert(bsize < BLOCK_SIZES_ALL);
3013
0
    FrameHeader*           frm_hdr    = &pcs->ppcs->frm_hdr;
3014
0
    const Av1Common* const cm         = pcs->ppcs->av1_cm;
3015
0
    MvReferenceFrame       ref_frame  = INTRA_FRAME;
3016
0
    const int              num_planes = 3;
3017
0
    MacroBlockD*           xd         = blk_ptr->av1xd;
3018
0
    const TileInfo*        tile       = &xd->tile;
3019
0
    const int              mi_row     = -xd->mb_to_top_edge / (8 * MI_SIZE);
3020
0
    const int              mi_col     = -xd->mb_to_left_edge / (8 * MI_SIZE);
3021
0
    const int              w          = block_size_wide[bsize];
3022
0
    const int              h          = block_size_high[bsize];
3023
0
    const int              sb_row     = mi_row >> scs->seq_header.sb_size_log2;
3024
0
    const int              sb_col     = mi_col >> scs->seq_header.sb_size_log2;
3025
3026
    // Set up limit values for MV components.
3027
    // Mv beyond the range do not produce new/different prediction block.
3028
0
    const int mi_width   = mi_size_wide[bsize];
3029
0
    const int mi_height  = mi_size_high[bsize];
3030
0
    x->mv_limits.row_min = -(((mi_row + mi_height) * MI_SIZE) + AOM_INTERP_EXTEND);
3031
0
    x->mv_limits.col_min = -(((mi_col + mi_width) * MI_SIZE) + AOM_INTERP_EXTEND);
3032
0
    x->mv_limits.row_max = (cm->mi_rows - mi_row) * MI_SIZE + AOM_INTERP_EXTEND;
3033
0
    x->mv_limits.col_max = (cm->mi_cols - mi_col) * MI_SIZE + AOM_INTERP_EXTEND;
3034
    //set search paramters
3035
0
    x->sadperbit16 = svt_aom_get_sad_per_bit(frm_hdr->quantization_params.base_q_idx, 0);
3036
0
    x->errorperbit = full_lambda >> RD_EPB_SHIFT;
3037
0
    x->errorperbit += (x->errorperbit == 0);
3038
    //temp buffer for hash me
3039
0
    for (int i = 0; i < 2; i++) {
3040
0
        EB_MALLOC_ARRAY_NO_CHECK(x->hash_value_buffer[i], AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
3041
0
    }
3042
3043
0
    Mv nearestmv, nearmv;
3044
0
    svt_av1_find_best_ref_mvs_from_stack(0, ctx->ref_mv_stack /*mbmi_ext*/, xd, ref_frame, &nearestmv, &nearmv, 0);
3045
0
    if (nearestmv.as_int == INVALID_MV) {
3046
0
        nearestmv.as_int = 0;
3047
0
    }
3048
0
    if (nearmv.as_int == INVALID_MV) {
3049
0
        nearmv.as_int = 0;
3050
0
    }
3051
0
    Mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3052
0
    if (dv_ref.as_int == 0) {
3053
0
        svt_aom_find_ref_dv(&dv_ref, tile, scs->seq_header.sb_mi_size, mi_row, mi_col);
3054
0
    }
3055
    // Ref DV should not have sub-pel.
3056
0
    assert((dv_ref.x & 7) == 0);
3057
0
    assert((dv_ref.y & 7) == 0);
3058
0
    ctx->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3059
3060
    /* pointer to current frame */
3061
0
    Yv12BufferConfig cur_buf;
3062
0
    svt_aom_link_eb_to_aom_buffer_desc_8bit(pcs->ppcs->enhanced_pic, &cur_buf);
3063
0
    struct Buf2D yv12_mb[MAX_PLANES];
3064
0
    svt_av1_setup_pred_block(bsize, yv12_mb, &cur_buf, mi_row, mi_col);
3065
0
    for (int i = 0; i < num_planes; ++i) {
3066
0
        x->xdplane[i].pre[0] = yv12_mb[i]; // ref in ME
3067
0
    }
3068
    // setup src for DV search same as ref
3069
0
    x->plane[0].src = x->xdplane[0].pre[0];
3070
3071
0
    enum IntrabcMotionDirection max_dir = pcs->ppcs->intrabc_ctrls.search_dir ? IBC_MOTION_LEFT : IBC_MOTION_DIRECTIONS;
3072
3073
0
    for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE; dir < max_dir; ++dir) {
3074
0
        const MvLimits tmp_mv_limits = x->mv_limits;
3075
3076
0
        switch (dir) {
3077
0
        case IBC_MOTION_ABOVE:
3078
0
            x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
3079
0
            x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w;
3080
0
            x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
3081
0
            x->mv_limits.row_max = (sb_row * scs->seq_header.sb_mi_size - mi_row) * MI_SIZE - h;
3082
0
            break;
3083
0
        case IBC_MOTION_LEFT:
3084
0
            x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
3085
0
            x->mv_limits.col_max = (sb_col * scs->seq_header.sb_mi_size - mi_col) * MI_SIZE - w;
3086
            // TODO: Minimize the overlap between above and
3087
            // left areas.
3088
0
            x->mv_limits.row_min     = (tile->mi_row_start - mi_row) * MI_SIZE;
3089
0
            int bottom_coded_mi_edge = AOMMIN((sb_row + 1) * scs->seq_header.sb_mi_size, tile->mi_row_end);
3090
0
            x->mv_limits.row_max     = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3091
0
            break;
3092
0
        default:
3093
0
            assert(0);
3094
0
        }
3095
0
        assert_release(x->mv_limits.col_min >= tmp_mv_limits.col_min);
3096
0
        assert_release(x->mv_limits.col_max <= tmp_mv_limits.col_max);
3097
0
        assert_release(x->mv_limits.row_min >= tmp_mv_limits.row_min);
3098
0
        assert_release(x->mv_limits.row_max <= tmp_mv_limits.row_max);
3099
3100
0
        svt_av1_set_mv_search_range(&x->mv_limits, &dv_ref);
3101
3102
0
        if (x->mv_limits.col_max < x->mv_limits.col_min || x->mv_limits.row_max < x->mv_limits.row_min) {
3103
0
            x->mv_limits = tmp_mv_limits;
3104
0
            continue;
3105
0
        }
3106
0
        Mv mvp_full = dv_ref;
3107
0
        mvp_full.x >>= 3;
3108
0
        mvp_full.y >>= 3;
3109
0
        x->best_mv.as_int = 0;
3110
3111
        // Hash Search
3112
0
        const AomVarianceFnPtr* fn_ptr = &svt_aom_mefn_ptr[bsize];
3113
3114
0
        int best_hash_cost = INT_MAX;
3115
0
        Mv  best_hash_mv   = {{0, 0}};
3116
3117
0
        svt_av1_intrabc_hash_search(
3118
0
            pcs, x, bsize, mi_col * MI_SIZE, mi_row * MI_SIZE, &dv_ref, 1, fn_ptr, &best_hash_cost, &best_hash_mv);
3119
3120
        // Hash produced a candidate
3121
0
        if (best_hash_cost < INT_MAX) {
3122
0
            Mv dv;
3123
0
            dv.x = best_hash_mv.x * 8;
3124
0
            dv.y = best_hash_mv.y * 8;
3125
3126
0
            dv_cand[*num_dv_cand] = dv;
3127
0
            (*num_dv_cand)++;
3128
3129
0
            x->best_mv = best_hash_mv;
3130
0
        }
3131
        // Full-pixel fallback if hash didn't produce a candidate
3132
0
        else {
3133
0
            svt_av1_full_pixel_search(pcs, x, bsize, &mvp_full, 0, x->sadperbit16, NULL, &dv_ref);
3134
3135
0
            Mv dv = {{x->best_mv.x * 8, x->best_mv.y * 8}};
3136
3137
0
            if (!mv_check_bounds(&x->mv_limits, &dv) &&
3138
0
                svt_aom_is_dv_valid(dv, xd, mi_row, mi_col, bsize, scs->seq_header.sb_size_log2)) {
3139
0
                dv_cand[*num_dv_cand] = dv;
3140
0
                (*num_dv_cand)++;
3141
0
            }
3142
0
        }
3143
3144
0
        x->mv_limits = tmp_mv_limits;
3145
0
    }
3146
3147
0
    for (int i = 0; i < 2; i++) {
3148
0
        EB_FREE_ARRAY(x->hash_value_buffer[i]);
3149
0
    }
3150
0
}
3151
3152
static void inject_intra_bc_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, const SequenceControlSet* scs,
3153
0
                                       BlkStruct* blk_ptr, uint32_t* cand_cnt) {
3154
0
    Mv      dv_cand[2];
3155
0
    uint8_t num_dv_cand = 0;
3156
3157
    //perform dv-pred + search up to 2 dv(s)
3158
0
    intra_bc_search(pcs, ctx, scs, blk_ptr, dv_cand, &num_dv_cand);
3159
3160
0
    ModeDecisionCandidate* cand_array = ctx->fast_cand_array;
3161
3162
0
    for (uint32_t dv_i = 0; dv_i < num_dv_cand; dv_i++) {
3163
0
        ModeDecisionCandidate* cand               = &cand_array[*cand_cnt];
3164
0
        cand->palette_info                        = NULL;
3165
0
        cand->block_mi.use_intrabc                = 1;
3166
0
        cand->block_mi.angle_delta[PLANE_TYPE_Y]  = 0;
3167
0
        cand->block_mi.angle_delta[PLANE_TYPE_UV] = 0;
3168
0
        cand->block_mi.uv_mode                    = UV_DC_PRED;
3169
0
        cand->block_mi.cfl_alpha_signs            = 0;
3170
0
        cand->block_mi.cfl_alpha_idx              = 0;
3171
0
        cand->transform_type[0]                   = DCT_DCT;
3172
0
        cand->transform_type_uv                   = DCT_DCT;
3173
0
        cand->block_mi.ref_frame[0]               = INTRA_FRAME;
3174
0
        cand->block_mi.ref_frame[1]               = NONE_FRAME;
3175
0
        cand->block_mi.mode                       = DC_PRED;
3176
0
        cand->block_mi.filter_intra_mode          = FILTER_INTRA_MODES;
3177
        //inter ralated
3178
0
        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
3179
0
        cand->block_mi.is_interintra_used = 0;
3180
0
        cand->skip_mode_allowed           = false;
3181
0
        cand->block_mi.mv[0].as_int       = dv_cand[dv_i].as_int;
3182
0
        cand->pred_mv[0].as_int           = ctx->ref_mv_stack[INTRA_FRAME][0].this_mv.as_int;
3183
0
        cand->drl_index                   = 0;
3184
0
        cand->block_mi.interp_filters     = av1_broadcast_interp_filter(BILINEAR);
3185
0
        INC_MD_CAND_CNT((*cand_cnt), pcs->ppcs->max_can_count);
3186
0
    }
3187
0
}
3188
3189
static void inject_intra_candidates_light_pd0(PictureControlSet* pcs, ModeDecisionContext* ctx,
3190
7.53k
                                              uint32_t* candidate_total_cnt) {
3191
7.53k
    uint32_t               cand_total_cnt     = 0;
3192
7.53k
    ModeDecisionCandidate* cand               = &ctx->fast_cand_array[cand_total_cnt];
3193
7.53k
    cand->skip_mode_allowed                   = false;
3194
7.53k
    cand->palette_info                        = NULL;
3195
7.53k
    cand->block_mi.use_intrabc                = 0;
3196
7.53k
    cand->block_mi.filter_intra_mode          = FILTER_INTRA_MODES;
3197
7.53k
    cand->block_mi.angle_delta[PLANE_TYPE_Y]  = 0;
3198
7.53k
    cand->block_mi.uv_mode                    = UV_DC_PRED;
3199
7.53k
    cand->block_mi.angle_delta[PLANE_TYPE_UV] = 0;
3200
7.53k
    cand->block_mi.cfl_alpha_signs            = 0;
3201
7.53k
    cand->block_mi.cfl_alpha_idx              = 0;
3202
7.53k
    cand->transform_type[0]                   = DCT_DCT;
3203
7.53k
    cand->transform_type_uv                   = DCT_DCT;
3204
7.53k
    cand->block_mi.ref_frame[0]               = INTRA_FRAME;
3205
7.53k
    cand->block_mi.ref_frame[1]               = NONE_FRAME;
3206
7.53k
    cand->block_mi.mode                       = DC_PRED;
3207
7.53k
    cand->block_mi.motion_mode                = SIMPLE_TRANSLATION;
3208
7.53k
    cand->block_mi.is_interintra_used         = 0;
3209
7.53k
    INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
3210
    // update the total number of candidates injected
3211
7.53k
    (*candidate_total_cnt) = cand_total_cnt;
3212
7.53k
    return;
3213
7.53k
}
3214
3215
static void inject_intra_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, const bool dc_cand_only_flag,
3216
246k
                                    uint32_t* candidate_total_cnt) {
3217
246k
    FrameHeader*           frm_hdr          = &pcs->ppcs->frm_hdr;
3218
246k
    PredictionMode         intra_mode_start = DC_PRED;
3219
246k
    PredictionMode         intra_mode_end   = dc_cand_only_flag ? DC_PRED : ctx->intra_ctrls.intra_mode_end;
3220
246k
    uint32_t               cand_total_cnt   = *candidate_total_cnt;
3221
246k
    ModeDecisionCandidate* cand_array       = ctx->fast_cand_array;
3222
246k
    const bool    use_angle_delta = ctx->intra_ctrls.angular_pred_level ? av1_use_angle_delta(ctx->blk_geom->bsize) : 0;
3223
246k
    const uint8_t disable_angle_prediction                = (ctx->intra_ctrls.angular_pred_level == 0);
3224
246k
    uint8_t       directional_mode_skip_mask[INTRA_MODES] = {0};
3225
246k
    if (ctx->intra_ctrls.angular_pred_level >= 4) {
3226
1.38M
        for (uint8_t i = D45_PRED; i < INTRA_MODE_END; i++) {
3227
1.26M
            directional_mode_skip_mask[i] = 1;
3228
1.26M
        }
3229
126k
    }
3230
246k
    const TxSize tx_size_uv = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1);
3231
3232
492k
    for (PredictionMode intra_mode = intra_mode_start; intra_mode <= intra_mode_end; ++intra_mode) {
3233
245k
        if (av1_is_directional_mode(intra_mode) &&
3234
0
            (disable_angle_prediction || directional_mode_skip_mask[intra_mode])) {
3235
0
            continue;
3236
0
        }
3237
3238
245k
        const uint8_t angle_delta_count = av1_is_directional_mode(intra_mode) &&
3239
0
                ctx->intra_ctrls.angular_pred_level <= 2 && use_angle_delta
3240
245k
            ? 7
3241
245k
            : 1;
3242
3243
491k
        for (uint8_t angle_delta_counter = 0; angle_delta_counter < angle_delta_count; ++angle_delta_counter) {
3244
245k
            int32_t angle_delta = CLIP((angle_delta_count == 1 ? 0 : angle_delta_counter - MAX_ANGLE_DELTA),
3245
245k
                                       -MAX_ANGLE_DELTA,
3246
245k
                                       MAX_ANGLE_DELTA);
3247
245k
            if ((ctx->intra_ctrls.angular_pred_level >= 2 &&
3248
126k
                 (angle_delta == -1 || angle_delta == 1 || angle_delta == -2 || angle_delta == 2)) ||
3249
245k
                (ctx->intra_ctrls.angular_pred_level >= 3 && angle_delta != 0)) {
3250
0
                continue;
3251
0
            }
3252
245k
            ModeDecisionCandidate* cand               = &cand_array[cand_total_cnt];
3253
245k
            cand->skip_mode_allowed                   = false;
3254
245k
            cand->palette_info                        = NULL;
3255
245k
            cand->block_mi.mode                       = intra_mode;
3256
245k
            cand->block_mi.use_intrabc                = 0;
3257
245k
            cand->block_mi.filter_intra_mode          = FILTER_INTRA_MODES;
3258
245k
            cand->block_mi.angle_delta[PLANE_TYPE_Y]  = angle_delta;
3259
245k
            cand->block_mi.uv_mode                    = ctx->ind_uv_avail ? ctx->best_uv_mode[intra_mode]
3260
245k
                                                                          : intra_luma_to_chroma[intra_mode];
3261
245k
            cand->block_mi.angle_delta[PLANE_TYPE_UV] = ctx->ind_uv_avail ? ctx->best_uv_angle[intra_mode]
3262
245k
                                                                          : cand->block_mi.angle_delta[PLANE_TYPE_Y];
3263
245k
            cand->block_mi.cfl_alpha_signs            = 0;
3264
245k
            cand->block_mi.cfl_alpha_idx              = 0;
3265
245k
            cand->transform_type[0]                   = DCT_DCT;
3266
245k
            cand->transform_type_uv                   = svt_aom_get_intra_uv_tx_type(
3267
245k
                cand->block_mi.uv_mode, tx_size_uv, frm_hdr->reduced_tx_set);
3268
3269
245k
            if (svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && cand->transform_type_uv != DCT_DCT) {
3270
0
                continue;
3271
0
            }
3272
245k
            cand->block_mi.ref_frame[0]       = INTRA_FRAME;
3273
245k
            cand->block_mi.ref_frame[1]       = NONE_FRAME;
3274
245k
            cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
3275
245k
            cand->block_mi.is_interintra_used = 0;
3276
245k
            INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
3277
245k
        }
3278
245k
    }
3279
3280
    // update the total number of candidates injected
3281
246k
    (*candidate_total_cnt) = cand_total_cnt;
3282
3283
246k
    return;
3284
246k
}
3285
3286
static void inject_filter_intra_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx,
3287
0
                                           uint32_t* candidate_total_cnt) {
3288
0
    FilterIntraMode intra_mode_start = FILTER_DC_PRED;
3289
0
    FilterIntraMode intra_mode_end   = ctx->intra_ctrls.intra_mode_end == PAETH_PRED ? FILTER_PAETH_PRED
3290
0
          : ctx->intra_ctrls.intra_mode_end >= D157_PRED                             ? FILTER_D157_PRED
3291
0
          : ctx->intra_ctrls.intra_mode_end >= H_PRED                                ? FILTER_H_PRED
3292
0
          : ctx->intra_ctrls.intra_mode_end >= V_PRED                                ? FILTER_V_PRED
3293
0
                                                                                     : FILTER_DC_PRED;
3294
0
    intra_mode_end                   = MIN(intra_mode_end, ctx->filter_intra_ctrls.max_filter_intra_mode);
3295
3296
0
    const TxSize           tx_size_uv     = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1);
3297
0
    uint32_t               cand_total_cnt = *candidate_total_cnt;
3298
0
    ModeDecisionCandidate* cand_array     = ctx->fast_cand_array;
3299
0
    FrameHeader*           frm_hdr        = &pcs->ppcs->frm_hdr;
3300
3301
0
    for (FilterIntraMode filter_intra_mode = intra_mode_start; filter_intra_mode <= intra_mode_end;
3302
0
         filter_intra_mode++) {
3303
0
        ModeDecisionCandidate* cand              = &cand_array[cand_total_cnt];
3304
0
        cand->skip_mode_allowed                  = false;
3305
0
        cand->block_mi.mode                      = DC_PRED;
3306
0
        cand->block_mi.use_intrabc               = 0;
3307
0
        cand->block_mi.filter_intra_mode         = filter_intra_mode;
3308
0
        cand->palette_info                       = NULL;
3309
0
        cand->block_mi.angle_delta[PLANE_TYPE_Y] = 0;
3310
3311
0
        cand->block_mi.uv_mode = ctx->ind_uv_avail ? ctx->best_uv_mode[fimode_to_intramode[filter_intra_mode]]
3312
0
                                                   : intra_luma_to_chroma[fimode_to_intramode[filter_intra_mode]];
3313
0
        cand->block_mi.angle_delta[PLANE_TYPE_UV] = ctx->ind_uv_avail
3314
0
            ? ctx->best_uv_angle[fimode_to_intramode[filter_intra_mode]]
3315
0
            : cand->block_mi.angle_delta[PLANE_TYPE_Y];
3316
3317
0
        cand->block_mi.cfl_alpha_signs = 0;
3318
0
        cand->block_mi.cfl_alpha_idx   = 0;
3319
0
        cand->transform_type[0]        = DCT_DCT;
3320
0
        cand->transform_type_uv        = svt_aom_get_intra_uv_tx_type(
3321
0
            cand->block_mi.uv_mode, tx_size_uv, frm_hdr->reduced_tx_set);
3322
0
        if (svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && cand->transform_type_uv != DCT_DCT) {
3323
0
            continue;
3324
0
        }
3325
0
        cand->block_mi.ref_frame[0]       = INTRA_FRAME;
3326
0
        cand->block_mi.ref_frame[1]       = NONE_FRAME;
3327
0
        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
3328
0
        cand->block_mi.is_interintra_used = 0;
3329
0
        INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
3330
0
    }
3331
3332
    // update the total number of candidates injected
3333
0
    (*candidate_total_cnt) = cand_total_cnt;
3334
3335
0
    return;
3336
0
}
3337
3338
static void inject_zz_backup_candidate(PictureControlSet* pcs, ModeDecisionContext* ctx,
3339
0
                                       uint32_t* candidate_total_cnt) {
3340
0
    ModeDecisionCandidate* cand_array      = ctx->fast_cand_array;
3341
0
    Mv                     best_pred_mv[2] = {{{0}}, {{0}}};
3342
0
    uint32_t               cand_total_cnt  = (*candidate_total_cnt);
3343
0
    cand_array[cand_total_cnt].drl_index   = 0;
3344
0
    svt_aom_choose_best_av1_mv_pred(ctx,
3345
0
                                    svt_get_ref_frame_type(REF_LIST_0, 0),
3346
0
                                    NEWMV,
3347
0
                                    (Mv){{0}},
3348
0
                                    (Mv){{0}},
3349
0
                                    &cand_array[cand_total_cnt].drl_index,
3350
0
                                    best_pred_mv);
3351
0
    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, (Mv){{0, 0}}, (Mv){{0, 0}}, 0)) {
3352
0
        ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
3353
0
        cand->block_mi.use_intrabc        = 0;
3354
0
        cand->skip_mode_allowed           = false;
3355
0
        cand->block_mi.mode               = NEWMV;
3356
0
        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
3357
0
        cand->block_mi.mv[0]              = (Mv){{0, 0}};
3358
0
        cand->block_mi.ref_frame[0]       = svt_get_ref_frame_type(REF_LIST_0, 0);
3359
0
        cand->block_mi.ref_frame[1]       = NONE_FRAME;
3360
0
        cand->transform_type[0]           = DCT_DCT;
3361
0
        cand->transform_type_uv           = DCT_DCT;
3362
0
        cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
3363
0
        cand->block_mi.is_interintra_used = 0;
3364
0
        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
3365
0
        cand->block_mi.num_proj_ref       = ctx->wm_sample_info[svt_get_ref_frame_type(REF_LIST_0, 0)].num;
3366
0
        INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
3367
        // update the total number of candidates injected
3368
0
        (*candidate_total_cnt) = cand_total_cnt;
3369
0
    }
3370
0
}
3371
3372
1.01M
int svt_av1_allow_palette(int allow_palette, BlockSize bsize) {
3373
1.01M
    assert(bsize < BLOCK_SIZES_ALL);
3374
1.01M
    return allow_palette && block_size_wide[bsize] <= 64 && block_size_high[bsize] <= 64 && bsize >= BLOCK_8X8;
3375
1.01M
}
3376
3377
void search_palette_luma(PictureControlSet* pcs, ModeDecisionContext* ctx, PaletteInfo* palette_cand,
3378
                         uint8_t* palette_size_array, uint32_t* tot_palette_cands);
3379
3380
0
static void inject_palette_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candidate_total_cnt) {
3381
0
    uint32_t               can_total_cnt      = *candidate_total_cnt;
3382
0
    ModeDecisionCandidate* cand_array         = ctx->fast_cand_array;
3383
0
    const TxSize           tx_size_uv         = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1);
3384
0
    uint32_t               tot_palette_cands  = 0;
3385
0
    PaletteInfo*           palette_cand_array = ctx->palette_cand_array;
3386
    // MD palette search
3387
0
    uint8_t* palette_size_array_0 = ctx->palette_size_array_0;
3388
3389
0
    search_palette_luma(pcs, ctx, palette_cand_array, palette_size_array_0, &tot_palette_cands);
3390
3391
0
    for (uint32_t cand_i = 0; cand_i < tot_palette_cands; ++cand_i) {
3392
0
        ModeDecisionCandidate* cand       = &cand_array[can_total_cnt];
3393
0
        cand->block_mi.is_interintra_used = 0;
3394
0
        cand->palette_size[0]             = palette_size_array_0[cand_i];
3395
        // Palette is not supported for chroma
3396
0
        cand->palette_size[1] = 0;
3397
0
        cand->palette_info    = &palette_cand_array[cand_i];
3398
0
        assert(palette_size_array_0[cand_i] < 9);
3399
        //to re check these fields
3400
0
        cand->skip_mode_allowed    = false;
3401
0
        cand->block_mi.mode        = DC_PRED;
3402
0
        cand->block_mi.use_intrabc = 0;
3403
3404
0
        cand->block_mi.filter_intra_mode         = FILTER_INTRA_MODES;
3405
0
        cand->block_mi.angle_delta[PLANE_TYPE_Y] = 0;
3406
        // Palette is not supported for chroma mode, so we can set the intra chroma mode to anything. To use palette
3407
        // for chroma, we must force DC_PRED to be used for the intra chroma mode
3408
0
        assert(cand_array[can_total_cnt].palette_size[1] == 0);
3409
0
        cand->block_mi.uv_mode = ctx->ind_uv_avail ? ctx->best_uv_mode[DC_PRED] : intra_luma_to_chroma[DC_PRED];
3410
0
        cand->block_mi.angle_delta[PLANE_TYPE_UV] = ctx->ind_uv_avail ? ctx->best_uv_angle[DC_PRED]
3411
0
                                                                      : cand->block_mi.angle_delta[PLANE_TYPE_Y];
3412
0
        cand->block_mi.cfl_alpha_signs            = 0;
3413
0
        cand->block_mi.cfl_alpha_idx              = 0;
3414
0
        cand->transform_type[0]                   = DCT_DCT;
3415
0
        cand->transform_type_uv                   = svt_aom_get_intra_uv_tx_type(
3416
0
            cand->block_mi.uv_mode, tx_size_uv, pcs->ppcs->frm_hdr.reduced_tx_set);
3417
0
        if (svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && cand->transform_type_uv != DCT_DCT) {
3418
0
            continue;
3419
0
        }
3420
0
        cand->block_mi.ref_frame[0] = INTRA_FRAME;
3421
0
        cand->block_mi.ref_frame[1] = NONE_FRAME;
3422
0
        cand->block_mi.motion_mode  = SIMPLE_TRANSLATION;
3423
0
        INC_MD_CAND_CNT(can_total_cnt, pcs->ppcs->max_can_count);
3424
0
    }
3425
3426
    // update the total number of candidates injected
3427
0
    (*candidate_total_cnt) = can_total_cnt;
3428
3429
0
    return;
3430
0
}
3431
3432
0
static INLINE void eliminate_candidate_based_on_pme_me_results(ModeDecisionContext* ctx, uint8_t* dc_cand_only_flag) {
3433
0
    if (ctx->md_pme_dist != (uint32_t)~0 || ctx->md_me_dist != (uint32_t)~0) {
3434
0
        uint32_t th = ctx->cand_reduction_ctrls.cand_elimination_ctrls.dc_only_th;
3435
0
        th *= ctx->blk_geom->bheight * ctx->blk_geom->bwidth;
3436
0
        const uint32_t best_me_distotion = MIN(ctx->md_pme_dist, ctx->md_me_dist);
3437
0
        if (best_me_distotion < th) {
3438
0
            *dc_cand_only_flag = 1;
3439
0
        }
3440
0
    }
3441
0
}
3442
3443
static bool valid_ref_frame_type(MvReferenceFrame rf[2], const MvReferenceFrame ref_frame_type_arr[],
3444
0
                                 uint8_t tot_ref_frame_types) {
3445
    // INTRA_FRAME is added in candidates sometimes, skip validation
3446
0
    if (rf[0] == INTRA_FRAME) {
3447
0
        return true;
3448
0
    }
3449
3450
0
    for (uint8_t i = 0; i < tot_ref_frame_types; i++) {
3451
0
        MvReferenceFrame rf_in_arr[2];
3452
0
        av1_set_ref_frame(rf_in_arr, ref_frame_type_arr[i]);
3453
0
        if (rf[0] == rf_in_arr[0] && rf[1] == rf_in_arr[1]) {
3454
0
            return true;
3455
0
        }
3456
0
    }
3457
0
    return false;
3458
0
}
3459
3460
// refer to inject_zz_backup_candidate, but use BWD ref instead of LAST
3461
static void inject_sframe_backup_candidate(PictureControlSet* pcs, ModeDecisionContext* ctx,
3462
0
                                           uint32_t* candidate_total_cnt) {
3463
0
    ModeDecisionCandidate* cand_array      = ctx->fast_cand_array;
3464
0
    Mv                     best_pred_mv[2] = {{{0}}, {{0}}};
3465
0
    uint32_t               cand_total_cnt  = (*candidate_total_cnt);
3466
0
    cand_array[cand_total_cnt].drl_index   = 0;
3467
0
    svt_aom_choose_best_av1_mv_pred(ctx,
3468
0
                                    svt_get_ref_frame_type(REF_LIST_1, 0),
3469
0
                                    NEWMV,
3470
0
                                    (Mv){{0}},
3471
0
                                    (Mv){{0}},
3472
0
                                    &cand_array[cand_total_cnt].drl_index,
3473
0
                                    best_pred_mv);
3474
0
    if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, (Mv){{0, 0}}, (Mv){{0, 0}}, 0)) {
3475
0
        ModeDecisionCandidate* cand       = &cand_array[cand_total_cnt];
3476
0
        cand->block_mi.use_intrabc        = 0;
3477
0
        cand->skip_mode_allowed           = false;
3478
0
        cand->block_mi.mode               = NEWMV;
3479
0
        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
3480
0
        cand->block_mi.mv[0]              = (Mv){{0, 0}};
3481
0
        cand->block_mi.ref_frame[0]       = svt_get_ref_frame_type(REF_LIST_1, 0);
3482
0
        cand->block_mi.ref_frame[1]       = NONE_FRAME;
3483
0
        cand->transform_type[0]           = DCT_DCT;
3484
0
        cand->transform_type_uv           = DCT_DCT;
3485
0
        cand->pred_mv[0].as_int           = best_pred_mv[0].as_int;
3486
0
        cand->block_mi.is_interintra_used = 0;
3487
0
        cand->block_mi.motion_mode        = SIMPLE_TRANSLATION;
3488
0
        cand->block_mi.num_proj_ref       = ctx->wm_sample_info[svt_get_ref_frame_type(REF_LIST_1, 0)].num;
3489
0
        INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count);
3490
        // update the total number of candidates injected
3491
0
        (*candidate_total_cnt) = cand_total_cnt;
3492
0
    }
3493
0
}
3494
3495
// in MD stage 0, candidates are injected by different tools, but for S-Frame in RA mode
3496
// the ref frame types in ref_list0 has be pruned in PD for the reversed direction of ref MVs
3497
// here to check and reject the candidates if mismatches the available frame types array
3498
0
static uint32_t reject_candidate_sframe(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t cand_total_cnt) {
3499
0
    for (uint32_t i = 0; i < cand_total_cnt;) {
3500
0
        if (!valid_ref_frame_type(
3501
0
                ctx->fast_cand_array[i].block_mi.ref_frame, ctx->ref_frame_type_arr, ctx->tot_ref_frame_types)) {
3502
0
            for (uint32_t j = i; j < cand_total_cnt; j++) {
3503
0
                memcpy(&ctx->fast_cand_array[j], &ctx->fast_cand_array[j + 1], sizeof(ModeDecisionCandidate));
3504
0
            }
3505
0
            cand_total_cnt--;
3506
0
            continue;
3507
0
        }
3508
0
        i++;
3509
0
    }
3510
    // zero candidate in fast cand array risks in md stage 0, add a candidate from ref list1 as backup
3511
0
    if (cand_total_cnt == 0) {
3512
0
        inject_sframe_backup_candidate(pcs, ctx, &cand_total_cnt);
3513
0
    }
3514
0
    assert(cand_total_cnt > 0);
3515
0
    return cand_total_cnt;
3516
0
}
3517
3518
EbErrorType generate_md_stage_0_cand_light_pd0(ModeDecisionContext* ctx, uint32_t* candidate_total_count_ptr,
3519
7.53k
                                               PictureControlSet* pcs) {
3520
7.53k
    const SliceType slice_type     = pcs->slice_type;
3521
7.53k
    uint32_t        cand_total_cnt = 0;
3522
    //----------------------
3523
    // Intra
3524
7.53k
    if (ctx->blk_geom->sq_size < 128 && ctx->intra_ctrls.enable_intra) {
3525
7.53k
        inject_intra_candidates_light_pd0(pcs, ctx, &cand_total_cnt);
3526
7.53k
    }
3527
3528
7.53k
    if (slice_type != I_SLICE) {
3529
0
        inject_inter_candidates_light_pd0(pcs, ctx, &cand_total_cnt);
3530
0
    }
3531
3532
    // For I_SLICE, DC is always injected, and therefore there is no a risk of no candidates @ md_stage_0()
3533
    // For non I_SLICE, there is a risk of no candidates @ md_stage_0() because of the INTER candidates pruning techniques
3534
7.53k
    if (slice_type != I_SLICE && cand_total_cnt == 0) {
3535
0
        inject_zz_backup_candidate(pcs, ctx, &cand_total_cnt);
3536
0
    }
3537
3538
7.53k
    if (pcs->ppcs->sframe_ref_pruned) {
3539
0
        cand_total_cnt = reject_candidate_sframe(pcs, ctx, cand_total_cnt);
3540
0
    }
3541
3542
7.53k
    *candidate_total_count_ptr = cand_total_cnt;
3543
3544
7.53k
    return EB_ErrorNone;
3545
7.53k
}
3546
3547
/*
3548
   generate candidates for light pd1
3549
*/
3550
void generate_md_stage_0_cand_light_pd1(ModeDecisionContext* ctx, uint32_t* candidate_total_count_ptr,
3551
0
                                        PictureControlSet* pcs) {
3552
0
    const SliceType slice_type     = pcs->slice_type;
3553
0
    uint32_t        cand_total_cnt = 0;
3554
    // Reset duplicates variables
3555
0
    ctx->injected_mv_count = 0;
3556
0
    ctx->inject_new_me     = 1;
3557
0
    if (slice_type != I_SLICE) {
3558
0
        inject_inter_candidates_light_pd1(pcs, ctx, &cand_total_cnt);
3559
0
    }
3560
    //----------------------
3561
    // Intra
3562
0
    if (ctx->intra_ctrls.enable_intra && ctx->blk_geom->sq_size < 128) {
3563
0
        uint8_t dc_cand_only_flag = ctx->intra_ctrls.intra_mode_end == DC_PRED || is_dc_only_safe(pcs, ctx);
3564
0
        if (ctx->cand_reduction_ctrls.cand_elimination_ctrls.enabled && !dc_cand_only_flag &&
3565
0
            ctx->md_me_dist != (uint32_t)~0) {
3566
0
            uint32_t th = ctx->cand_reduction_ctrls.cand_elimination_ctrls.dc_only_th;
3567
0
            th *= (ctx->blk_geom->bheight * ctx->blk_geom->bwidth);
3568
0
            if (ctx->md_me_dist < th) {
3569
0
                dc_cand_only_flag = 1;
3570
0
            }
3571
0
        }
3572
0
        inject_intra_candidates(pcs, ctx, dc_cand_only_flag, &cand_total_cnt);
3573
0
    }
3574
3575
    // For I_SLICE, DC is always injected, and therefore there is no a risk of no candidates @ md_syage_0()
3576
    // For non I_SLICE, there is a risk of no candidates @ md_stage_0() because of the INTER candidates pruning techniques
3577
0
    if (slice_type != I_SLICE && cand_total_cnt == 0) {
3578
0
        inject_zz_backup_candidate(pcs, ctx, &cand_total_cnt);
3579
0
    }
3580
3581
0
    if (pcs->ppcs->sframe_ref_pruned) {
3582
0
        cand_total_cnt = reject_candidate_sframe(pcs, ctx, cand_total_cnt);
3583
0
    }
3584
3585
0
    *candidate_total_count_ptr = cand_total_cnt;
3586
0
}
3587
3588
EbErrorType generate_md_stage_0_cand(PictureControlSet* pcs, ModeDecisionContext* ctx, const PC_TREE* const pc_tree,
3589
245k
                                     uint32_t* candidate_total_count_ptr) {
3590
245k
    const SequenceControlSet* scs            = pcs->scs;
3591
245k
    const SliceType           slice_type     = pcs->slice_type;
3592
245k
    uint32_t                  cand_total_cnt = 0;
3593
    // Reset duplicates variables
3594
245k
    ctx->injected_mv_count = 0;
3595
245k
    ctx->inject_new_me     = 1;
3596
245k
    ctx->inject_new_pme    = 1;
3597
    //----------------------
3598
    // Intra
3599
245k
    if (ctx->intra_ctrls.enable_intra) {
3600
245k
        uint8_t dc_cand_only_flag = ctx->intra_ctrls.intra_mode_end == DC_PRED || is_dc_only_safe(pcs, ctx);
3601
245k
        if (ctx->cand_reduction_ctrls.cand_elimination_ctrls.enabled) {
3602
0
            eliminate_candidate_based_on_pme_me_results(ctx, &dc_cand_only_flag);
3603
0
        }
3604
246k
        if (ctx->blk_geom->sq_size < 128) {
3605
246k
            inject_intra_candidates(pcs, ctx, dc_cand_only_flag, &cand_total_cnt);
3606
246k
        }
3607
245k
        if (ctx->filter_intra_ctrls.enabled && svt_aom_filter_intra_allowed_bsize(ctx->blk_geom->bsize)) {
3608
0
            inject_filter_intra_candidates(pcs, ctx, &cand_total_cnt);
3609
0
        }
3610
3611
245k
        bool eval_intrabc = true;
3612
3613
245k
        if (svt_av1_allow_palette(ctx->md_palette_level, ctx->blk_geom->bsize)) {
3614
0
            uint32_t palette_start_cnt = cand_total_cnt;
3615
3616
0
            inject_palette_candidates(pcs, ctx, &cand_total_cnt);
3617
3618
0
            eval_intrabc = cand_total_cnt > palette_start_cnt;
3619
0
        }
3620
3621
245k
        if (ctx->md_allow_intrabc) {
3622
0
            if (!pcs->ppcs->intrabc_ctrls.palette_hint || eval_intrabc) {
3623
0
                bool do_intra_bc = true;
3624
3625
0
                if (ctx->shape == PART_N) {
3626
0
                    if (pcs->ppcs->intrabc_ctrls.b4_parent_gating && ctx->blk_geom->sq_size == 4 &&
3627
0
                        pc_tree->parent->tested_blk[PART_N][0]) {
3628
0
                        if (pc_tree->parent->block_data[PART_N][0]->block_mi.use_intrabc == 0) {
3629
0
                            do_intra_bc = false;
3630
0
                        }
3631
0
                    }
3632
0
                } else {
3633
0
                    if (pcs->ppcs->intrabc_ctrls.nsq_parent_gating && pc_tree->tested_blk[PART_N][0]) {
3634
0
                        if (pc_tree->block_data[PART_N][0]->block_mi.use_intrabc == 0) {
3635
0
                            do_intra_bc = false;
3636
0
                        }
3637
0
                    }
3638
0
                }
3639
3640
0
                if (do_intra_bc) {
3641
0
                    inject_intra_bc_candidates(pcs, ctx, scs, ctx->blk_ptr, &cand_total_cnt);
3642
0
                }
3643
0
            }
3644
0
        }
3645
245k
    }
3646
245k
    if (slice_type != I_SLICE) {
3647
0
        svt_aom_inject_inter_candidates(pcs, ctx, &cand_total_cnt);
3648
0
    }
3649
    // For I_SLICE, DC is always injected, and therefore there is no a risk of no candidates @ md_syage_0()
3650
    // For non I_SLICE, there is a risk of no candidates @ md_stage_0() because of the INTER candidates pruning techniques
3651
245k
    if (slice_type != I_SLICE && cand_total_cnt == 0) {
3652
0
        inject_zz_backup_candidate(pcs, ctx, &cand_total_cnt);
3653
0
    }
3654
3655
245k
    if (pcs->ppcs->sframe_ref_pruned) {
3656
0
        cand_total_cnt = reject_candidate_sframe(pcs, ctx, cand_total_cnt);
3657
0
    }
3658
3659
245k
    *candidate_total_count_ptr = cand_total_cnt;
3660
3661
245k
    memset(ctx->md_stage_0_count, 0, CAND_CLASS_TOTAL * sizeof(uint32_t));
3662
245k
    bool merge_inter_cands = 0;
3663
246k
    if (ctx->nic_ctrls.pruning_ctrls.merge_inter_cands_mult != (uint8_t)~0) {
3664
246k
        uint16_t th = (ctx->nic_ctrls.pruning_ctrls.merge_inter_cands_mult * (63 - pcs->scs->static_config.qp)) >> 1;
3665
246k
        if ((MIN(ctx->md_me_dist, ctx->md_pme_dist) / (ctx->blk_geom->bwidth * ctx->blk_geom->bheight)) < th) {
3666
231k
            merge_inter_cands = 1;
3667
231k
        }
3668
246k
    }
3669
3670
492k
    for (uint32_t cand_i = 0; cand_i < cand_total_cnt; cand_i++) {
3671
246k
        ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_i];
3672
246k
        if (is_intra_mode(cand->block_mi.mode)) {
3673
            // Intra prediction
3674
246k
            if ((cand->palette_info == NULL || cand->palette_size[0] == 0) && cand->block_mi.use_intrabc == 0) {
3675
246k
                cand->cand_class = CAND_CLASS_0;
3676
246k
                ctx->md_stage_0_count[CAND_CLASS_0]++;
3677
18.4E
            } else if (cand->block_mi.use_intrabc == 0) {
3678
                // Palette Prediction
3679
0
                cand->cand_class = CAND_CLASS_3;
3680
0
                ctx->md_stage_0_count[CAND_CLASS_3]++;
3681
18.4E
            } else {
3682
                // Intra-BC Prediction
3683
18.4E
                cand->cand_class = CAND_CLASS_4;
3684
18.4E
                ctx->md_stage_0_count[CAND_CLASS_4]++;
3685
18.4E
            }
3686
18.4E
        } else { // INTER
3687
18.4E
            if (cand->block_mi.mode == NEWMV || cand->block_mi.mode == NEW_NEWMV || merge_inter_cands) {
3688
                // MV Prediction
3689
0
                cand->cand_class = CAND_CLASS_2;
3690
0
                ctx->md_stage_0_count[CAND_CLASS_2]++;
3691
18.4E
            } else {
3692
                //MVP Prediction
3693
18.4E
                cand->cand_class = CAND_CLASS_1;
3694
18.4E
                ctx->md_stage_0_count[CAND_CLASS_1]++;
3695
18.4E
            }
3696
18.4E
        }
3697
246k
    }
3698
245k
    return EB_ErrorNone;
3699
245k
}
3700
3701
uint8_t av1_drl_ctx(const CandidateMv* ref_mv_stack, int32_t ref_idx);
3702
3703
/***************************************
3704
* Update symbols for light-PD1 path
3705
***************************************/
3706
void svt_aom_product_full_mode_decision_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx,
3707
0
                                                  ModeDecisionCandidateBuffer* cand_bf) {
3708
0
    BlkStruct*             blk_ptr = ctx->blk_ptr;
3709
0
    ModeDecisionCandidate* cand    = cand_bf->cand;
3710
0
    blk_ptr->total_rate            = cand_bf->total_rate;
3711
3712
    // Set common signals (INTER/INTRA)
3713
0
    svt_memcpy(&blk_ptr->block_mi, &cand->block_mi, sizeof(BlockModeInfo));
3714
0
    blk_ptr->palette_size[0] = blk_ptr->palette_size[1] = 0;
3715
3716
    // Set INTER mode signals
3717
0
    if (is_inter_mode(cand->block_mi.mode)) {
3718
0
        blk_ptr->drl_index = cand->drl_index;
3719
0
        assert(IMPLIES(
3720
0
            is_inter_compound_mode(cand->block_mi.mode) && blk_ptr->block_mi.interinter_comp.type == COMPOUND_AVERAGE,
3721
0
            (blk_ptr->block_mi.comp_group_idx == 0 && blk_ptr->block_mi.compound_idx == 1)));
3722
3723
        // Set MVs
3724
0
        blk_ptr->predmv[0].as_int = cand->pred_mv[0].as_int;
3725
0
        if (has_second_ref(&blk_ptr->block_mi)) {
3726
0
            blk_ptr->predmv[1].as_int = cand->pred_mv[1].as_int;
3727
0
        }
3728
3729
0
        const int8_t ref_frame_type = av1_ref_frame_type(blk_ptr->block_mi.ref_frame);
3730
        // Store winning inter_mode_ctx in blk to avoid storing for all ref frames for EC
3731
0
        blk_ptr->inter_mode_ctx = ctx->inter_mode_ctx[ref_frame_type];
3732
        // Store drl_ctx in blk to avoid storing final_ref_mv_stack for EC
3733
0
        if (blk_ptr->block_mi.mode == NEWMV || blk_ptr->block_mi.mode == NEW_NEWMV) {
3734
0
            for (uint8_t idx = 0; idx < 2; ++idx) {
3735
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
3736
0
                    blk_ptr->drl_ctx[idx] = av1_drl_ctx(ctx->ref_mv_stack[ref_frame_type], idx);
3737
0
                } else {
3738
0
                    blk_ptr->drl_ctx[idx] = -1;
3739
0
                }
3740
0
            }
3741
0
        }
3742
3743
0
        if (have_nearmv_in_inter_mode(blk_ptr->block_mi.mode)) {
3744
            // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
3745
0
            for (uint8_t idx = 1; idx < 3; ++idx) {
3746
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
3747
0
                    blk_ptr->drl_ctx_near[idx - 1] = av1_drl_ctx(ctx->ref_mv_stack[ref_frame_type], idx);
3748
0
                } else {
3749
0
                    blk_ptr->drl_ctx_near[idx - 1] = -1;
3750
0
                }
3751
0
            }
3752
0
        }
3753
0
    } else { // Set INTRA mode signals
3754
0
        cand->skip_mode_allowed = false;
3755
0
    }
3756
    // Set TX and coeff-related data
3757
0
    blk_ptr->block_has_coeff   = ((cand_bf->block_has_coeff) > 0) ? true : false;
3758
0
    ctx->blk_ptr->cnt_nz_coeff = cand_bf->cnt_nz_coeff;
3759
3760
    // If skip_mode is allowed, and block has no coeffs, use skip_mode
3761
0
    if (cand->skip_mode_allowed == true) {
3762
0
        blk_ptr->block_mi.skip_mode |= !blk_ptr->block_has_coeff;
3763
0
    }
3764
3765
0
    assert(IMPLIES(pcs->ppcs->frm_hdr.interpolation_filter == SWITCHABLE && blk_ptr->block_mi.skip_mode,
3766
0
                   cand->block_mi.interp_filters == 0));
3767
0
    if (blk_ptr->block_mi.skip_mode) {
3768
0
        blk_ptr->block_has_coeff = 0;
3769
0
        cand_bf->y_has_coeff     = 0;
3770
0
        cand_bf->u_has_coeff     = 0;
3771
0
        cand_bf->v_has_coeff     = 0;
3772
0
    }
3773
0
    blk_ptr->block_mi.skip = !blk_ptr->block_has_coeff;
3774
3775
0
    const uint16_t txb_itr       = 0;
3776
0
    const int32_t  txb_1d_offset = 0, txb_1d_offset_uv = 0;
3777
0
    blk_ptr->y_has_coeff         = cand_bf->y_has_coeff;
3778
0
    blk_ptr->u_has_coeff         = cand_bf->u_has_coeff;
3779
0
    blk_ptr->v_has_coeff         = cand_bf->v_has_coeff;
3780
0
    blk_ptr->tx_type[txb_itr]    = cand->transform_type[txb_itr];
3781
0
    blk_ptr->tx_type_uv          = cand->transform_type_uv;
3782
0
    blk_ptr->quant_dc.y[txb_itr] = cand_bf->quant_dc.y[txb_itr];
3783
0
    blk_ptr->quant_dc.u[txb_itr] = cand_bf->quant_dc.u[txb_itr];
3784
0
    blk_ptr->quant_dc.v[txb_itr] = cand_bf->quant_dc.v[txb_itr];
3785
3786
0
    if (ctx->bypass_encdec) {
3787
0
        blk_ptr->eob.y[txb_itr] = cand_bf->eob.y[txb_itr];
3788
0
        blk_ptr->eob.u[txb_itr] = cand_bf->eob.u[txb_itr];
3789
0
        blk_ptr->eob.v[txb_itr] = cand_bf->eob.v[txb_itr];
3790
0
        int32_t* src_ptr;
3791
0
        int32_t* dst_ptr;
3792
3793
0
        const TxSize tx_size   = tx_depth_to_tx_size[blk_ptr->block_mi.tx_depth][ctx->blk_geom->bsize];
3794
0
        const int    tx_width  = tx_size_wide[tx_size];
3795
0
        const int    tx_height = tx_size_high[tx_size];
3796
3797
        // only one TX unit, so no need to bitmask
3798
0
        if (blk_ptr->y_has_coeff) {
3799
0
            src_ptr = &(((int32_t*)cand_bf->quant->y_buffer)[txb_1d_offset]);
3800
0
            dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->y_buffer) + ctx->coded_area_sb;
3801
0
            svt_memcpy(dst_ptr, src_ptr, tx_width * tx_height * sizeof(int32_t));
3802
0
        }
3803
0
        ctx->coded_area_sb += tx_width * tx_height;
3804
3805
0
        const TxSize tx_size_uv   = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1);
3806
0
        const int    tx_width_uv  = tx_size_wide[tx_size_uv];
3807
0
        const int    tx_height_uv = tx_size_high[tx_size_uv];
3808
        // Cb
3809
        // only one TX unit, so no need to bitmask
3810
0
        if (blk_ptr->u_has_coeff) {
3811
0
            src_ptr = &(((int32_t*)cand_bf->quant->u_buffer)[txb_1d_offset_uv]);
3812
0
            dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->u_buffer) +
3813
0
                ctx->coded_area_sb_uv;
3814
0
            svt_memcpy(dst_ptr, src_ptr, tx_width_uv * tx_height_uv * sizeof(int32_t));
3815
0
        }
3816
3817
        // Cr
3818
        // only one TX unit, so no need to bitmask
3819
0
        if (blk_ptr->v_has_coeff) {
3820
0
            src_ptr = &(((int32_t*)cand_bf->quant->v_buffer)[txb_1d_offset_uv]);
3821
0
            dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->v_buffer) +
3822
0
                ctx->coded_area_sb_uv;
3823
0
            svt_memcpy(dst_ptr, src_ptr, tx_width_uv * tx_height_uv * sizeof(int32_t));
3824
0
        }
3825
0
        ctx->coded_area_sb_uv += tx_width_uv * tx_height_uv;
3826
0
    }
3827
0
}
3828
3829
0
static INLINE double derive_ssim_threshold_factor_for_full_md(SequenceControlSet* scs) {
3830
0
    return scs->input_resolution >= INPUT_SIZE_1080p_RANGE ? 1.02 : 1.03;
3831
0
}
3832
3833
/***************************************
3834
* Full Mode Decision
3835
***************************************/
3836
uint32_t svt_aom_product_full_mode_decision(PictureControlSet* pcs, ModeDecisionContext* ctx,
3837
                                            ModeDecisionCandidateBuffer** buffer_ptr_array,
3838
247k
                                            uint32_t candidate_total_count, uint32_t* best_candidate_index_array) {
3839
247k
    SequenceControlSet* scs                = pcs->scs;
3840
247k
    BlkStruct*          blk_ptr            = ctx->blk_ptr;
3841
247k
    uint32_t            lowest_cost_index  = best_candidate_index_array[0];
3842
247k
    const bool          use_ssim_full_cost = ctx->tune_ssim_level > SSIM_LVL_0 ? true : false;
3843
3844
    // Find the candidate with the lowest cost
3845
    // Only need to sort if have multiple candidates
3846
247k
    if (ctx->md_stage_3_total_count > 1) {
3847
0
        if (use_ssim_full_cost) {
3848
            // Pass one: find candidate with the lowest SSD cost
3849
0
            uint64_t ssd_lowest_cost = 0xFFFFFFFFFFFFFFFFull;
3850
0
            for (uint32_t i = 0; i < candidate_total_count; ++i) {
3851
0
                uint32_t cand_index = best_candidate_index_array[i];
3852
0
                uint64_t cost       = *(buffer_ptr_array[cand_index]->full_cost);
3853
0
                if (cost < ssd_lowest_cost) {
3854
0
                    lowest_cost_index = cand_index;
3855
0
                    ssd_lowest_cost   = cost;
3856
0
                }
3857
0
            }
3858
3859
            // Pass two: among the candidates with SSD cost not greater than the threshold, find the one with the lowest SSIM cost
3860
0
            const double   threshold_factor   = derive_ssim_threshold_factor_for_full_md(scs);
3861
0
            const uint64_t ssd_cost_threshold = (uint64_t)(threshold_factor * ssd_lowest_cost);
3862
0
            uint64_t       ssim_lowest_cost   = 0xFFFFFFFFFFFFFFFFull;
3863
0
            for (uint32_t i = 0; i < candidate_total_count; ++i) {
3864
0
                uint32_t cand_index = best_candidate_index_array[i];
3865
3866
0
                uint64_t ssim_cost = *(buffer_ptr_array[cand_index]->full_cost_ssim);
3867
0
                uint64_t ssd_cost  = *(buffer_ptr_array[cand_index]->full_cost);
3868
0
                if (ssim_cost < ssim_lowest_cost) {
3869
0
                    if (ssd_cost <= ssd_cost_threshold) {
3870
0
                        lowest_cost_index = cand_index;
3871
0
                        ssim_lowest_cost  = ssim_cost;
3872
0
                        ssd_lowest_cost   = ssd_cost;
3873
0
                    }
3874
0
                } else if (ssim_cost == ssim_lowest_cost) {
3875
                    // if two candidates have the same ssim cost, choose the one with lower ssd cost
3876
0
                    if (ssd_cost < ssd_lowest_cost) {
3877
0
                        lowest_cost_index = cand_index;
3878
0
                        ssd_lowest_cost   = ssd_cost;
3879
0
                    }
3880
0
                }
3881
0
            }
3882
0
        } else { // fallback to SSD based RD cost
3883
0
            uint64_t lowest_cost = 0xFFFFFFFFFFFFFFFFull;
3884
0
            for (uint32_t i = 0; i < candidate_total_count; ++i) {
3885
0
                uint32_t cand_index = best_candidate_index_array[i];
3886
3887
0
                uint64_t cost = *(buffer_ptr_array[cand_index]->full_cost);
3888
0
                if (scs->vq_ctrls.sharpness_ctrls.unipred_bias && pcs->ppcs->is_noise_level &&
3889
0
                    is_inter_singleref_mode(buffer_ptr_array[cand_index]->cand->block_mi.mode)) {
3890
0
                    cost = (cost * uni_psy_bias[pcs->ppcs->picture_qp]) / 100;
3891
0
                }
3892
3893
0
                if (cost < lowest_cost) {
3894
0
                    lowest_cost_index = cand_index;
3895
0
                    lowest_cost       = cost;
3896
0
                }
3897
0
            }
3898
0
        }
3899
0
    }
3900
247k
    ModeDecisionCandidateBuffer* cand_bf = buffer_ptr_array[lowest_cost_index];
3901
247k
    ModeDecisionCandidate*       cand    = cand_bf->cand;
3902
247k
    blk_ptr->total_rate                  = cand_bf->total_rate;
3903
247k
    if (!(ctx->pd_pass == PD_PASS_1 && ctx->fixed_partition)) {
3904
        // When lambda tuning is on, lambda of each block is set separately, however at interdepth decision the sb lambda is used
3905
239k
        uint32_t full_lambda = ctx->hbd_md ? ctx->full_sb_lambda_md[EB_10_BIT_MD] : ctx->full_sb_lambda_md[EB_8_BIT_MD];
3906
239k
        ctx->blk_ptr->cost   = RDCOST(full_lambda, cand_bf->total_rate, cand_bf->full_dist);
3907
239k
        ctx->blk_ptr->full_dist = cand_bf->full_dist;
3908
239k
    }
3909
3910
    // Set common signals (INTER/INTRA)
3911
247k
    svt_memcpy(&blk_ptr->block_mi, &cand->block_mi, sizeof(BlockModeInfo));
3912
    // Set INTER mode signals
3913
    // INTER signals set first b/c INTER shuts Palette, so INTRA must overwrite if Palette + intrabc is used
3914
247k
    if (is_inter_block(&blk_ptr->block_mi)) {
3915
0
        blk_ptr->drl_index = cand->drl_index;
3916
0
        assert(IMPLIES(
3917
0
            is_inter_compound_mode(cand->block_mi.mode) && blk_ptr->block_mi.interinter_comp.type == COMPOUND_AVERAGE,
3918
0
            (blk_ptr->block_mi.comp_group_idx == 0 && blk_ptr->block_mi.compound_idx == 1)));
3919
3920
0
        blk_ptr->palette_size[0] = blk_ptr->palette_size[1] = 0;
3921
        // Set MVs
3922
0
        blk_ptr->predmv[0].as_int = cand->pred_mv[0].as_int;
3923
0
        if (has_second_ref(&blk_ptr->block_mi)) {
3924
0
            blk_ptr->predmv[1].as_int = cand->pred_mv[1].as_int;
3925
0
        }
3926
0
        if (blk_ptr->block_mi.motion_mode == WARPED_CAUSAL ||
3927
0
            (cand->block_mi.mode == GLOBALMV || cand->block_mi.mode == GLOBAL_GLOBALMV)) {
3928
0
            svt_memcpy(&ctx->blk_ptr->wm_params_l0, &cand->wm_params_l0, sizeof(WarpedMotionParams));
3929
0
            svt_memcpy(&ctx->blk_ptr->wm_params_l1, &cand->wm_params_l1, sizeof(WarpedMotionParams));
3930
0
        }
3931
3932
0
        if (ctx->pd_pass == PD_PASS_1) {
3933
0
            const int8_t ref_frame_type = av1_ref_frame_type(blk_ptr->block_mi.ref_frame);
3934
            // Store winning inter_mode_ctx in blk to avoid storing for all ref frames for EC
3935
0
            blk_ptr->inter_mode_ctx = ctx->inter_mode_ctx[ref_frame_type];
3936
            // Store drl_ctx in blk to avoid storing final_ref_mv_stack for EC
3937
0
            if (blk_ptr->block_mi.mode == NEWMV || blk_ptr->block_mi.mode == NEW_NEWMV) {
3938
0
                for (uint8_t idx = 0; idx < 2; ++idx) {
3939
0
                    if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
3940
0
                        blk_ptr->drl_ctx[idx] = av1_drl_ctx(ctx->ref_mv_stack[ref_frame_type], idx);
3941
0
                    } else {
3942
0
                        blk_ptr->drl_ctx[idx] = -1;
3943
0
                    }
3944
0
                }
3945
0
            }
3946
3947
0
            if (have_nearmv_in_inter_mode(blk_ptr->block_mi.mode)) {
3948
                // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
3949
0
                for (uint8_t idx = 1; idx < 3; ++idx) {
3950
0
                    if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
3951
0
                        blk_ptr->drl_ctx_near[idx - 1] = av1_drl_ctx(ctx->ref_mv_stack[ref_frame_type], idx);
3952
0
                    } else {
3953
0
                        blk_ptr->drl_ctx_near[idx - 1] = -1;
3954
0
                    }
3955
0
                }
3956
0
            }
3957
0
        }
3958
0
    }
3959
3960
    // Set INTRA mode signals
3961
247k
    if (is_intra_mode(blk_ptr->block_mi.mode)) {
3962
246k
        if (!cand->palette_info) {
3963
246k
            blk_ptr->palette_size[0] = blk_ptr->palette_size[1] = 0;
3964
18.4E
        } else if (svt_av1_allow_palette(ctx->md_palette_level, ctx->blk_geom->bsize)) {
3965
0
            memcpy(&blk_ptr->palette_info->pmi, &cand->palette_info->pmi, sizeof(PaletteModeInfo));
3966
0
            memcpy(blk_ptr->palette_info->color_idx_map, cand->palette_info->color_idx_map, MAX_PALETTE_SQUARE);
3967
0
            blk_ptr->palette_size[0] = cand->palette_size[0];
3968
0
            blk_ptr->palette_size[1] = cand->palette_size[1];
3969
0
        }
3970
3971
246k
        if (blk_ptr->block_mi.use_intrabc == 0) {
3972
246k
            cand->skip_mode_allowed = false;
3973
246k
        }
3974
246k
    }
3975
3976
    // Set TX and coeff-related data
3977
247k
    blk_ptr->block_has_coeff   = ((cand_bf->block_has_coeff) > 0) ? true : false;
3978
247k
    ctx->blk_ptr->cnt_nz_coeff = cand_bf->cnt_nz_coeff;
3979
3980
    // If skip_mode is allowed, and block has no coeffs, use skip_mode
3981
247k
    if (cand->skip_mode_allowed == true) {
3982
0
        blk_ptr->block_mi.skip_mode |= !blk_ptr->block_has_coeff;
3983
0
    }
3984
3985
247k
    assert(IMPLIES(pcs->ppcs->frm_hdr.interpolation_filter == SWITCHABLE && blk_ptr->block_mi.skip_mode,
3986
247k
                   cand->block_mi.interp_filters == 0));
3987
247k
    if (blk_ptr->block_mi.skip_mode) {
3988
0
        blk_ptr->block_has_coeff = 0;
3989
0
        cand_bf->y_has_coeff     = 0;
3990
0
        cand_bf->u_has_coeff     = 0;
3991
0
        cand_bf->v_has_coeff     = 0;
3992
0
    }
3993
3994
247k
    blk_ptr->block_mi.skip = !blk_ptr->block_has_coeff;
3995
247k
    blk_ptr->y_has_coeff   = cand_bf->y_has_coeff;
3996
247k
    blk_ptr->u_has_coeff   = cand_bf->u_has_coeff;
3997
247k
    blk_ptr->v_has_coeff   = cand_bf->v_has_coeff;
3998
247k
    svt_memcpy(blk_ptr->tx_type, cand->transform_type, sizeof(TxType) * MAX_TXB_COUNT);
3999
247k
    blk_ptr->tx_type_uv = cand->transform_type_uv;
4000
247k
    svt_memcpy(&blk_ptr->quant_dc, &cand_bf->quant_dc, sizeof(QuantDcData));
4001
247k
    svt_memcpy(&blk_ptr->eob, &cand_bf->eob, sizeof(EobData));
4002
4003
    // If bypassing EncDec, save recon/coeff
4004
247k
    if (ctx->bypass_encdec && ctx->pd_pass == PD_PASS_1) {
4005
127k
        const uint16_t tu_total_count = tx_blocks_per_depth[ctx->blk_geom->bsize][blk_ptr->block_mi.tx_depth];
4006
127k
        int32_t        txb_1d_offset = 0, txb_1d_offset_uv = 0;
4007
127k
        const TxSize   tx_size      = tx_depth_to_tx_size[blk_ptr->block_mi.tx_depth][ctx->blk_geom->bsize];
4008
127k
        const int      tx_width     = tx_size_wide[tx_size];
4009
127k
        const int      tx_height    = tx_size_high[tx_size];
4010
127k
        const TxSize   tx_size_uv   = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1);
4011
127k
        const int      tx_width_uv  = tx_size_wide[tx_size_uv];
4012
127k
        const int      tx_height_uv = tx_size_high[tx_size_uv];
4013
605k
        for (uint16_t txb_itr = 0; txb_itr < tu_total_count; txb_itr++) {
4014
478k
            const bool uv_pass = (blk_ptr->block_mi.tx_depth == 0 || txb_itr == 0);
4015
4016
478k
            int32_t* src_ptr = &(((int32_t*)cand_bf->quant->y_buffer)[txb_1d_offset]);
4017
478k
            int32_t* dst_ptr = &(((int32_t*)ctx->blk_ptr->coeff_tmp->y_buffer)[txb_1d_offset]);
4018
4019
478k
            if (ctx->fixed_partition) {
4020
7.54k
                dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->y_buffer) +
4021
7.54k
                    ctx->coded_area_sb;
4022
7.54k
                ctx->coded_area_sb += tx_width * tx_height;
4023
7.54k
            }
4024
4025
478k
            if (blk_ptr->y_has_coeff & (1 << txb_itr)) {
4026
5.61k
                svt_memcpy(dst_ptr, src_ptr, tx_width * tx_height * sizeof(int32_t));
4027
5.61k
            }
4028
4029
478k
            txb_1d_offset += tx_width * tx_height;
4030
4031
478k
            if (ctx->has_uv && uv_pass) {
4032
                // Cb
4033
126k
                src_ptr = &(((int32_t*)cand_bf->quant->u_buffer)[txb_1d_offset_uv]);
4034
126k
                dst_ptr = &(((int32_t*)ctx->blk_ptr->coeff_tmp->u_buffer)[txb_1d_offset_uv]);
4035
4036
126k
                if (ctx->fixed_partition) {
4037
7.54k
                    dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->u_buffer) +
4038
7.54k
                        ctx->coded_area_sb_uv;
4039
7.54k
                }
4040
4041
126k
                if (blk_ptr->u_has_coeff & (1 << txb_itr)) {
4042
5.46k
                    svt_memcpy(dst_ptr, src_ptr, tx_width_uv * tx_height_uv * sizeof(int32_t));
4043
5.46k
                }
4044
4045
                // Cr
4046
126k
                src_ptr = &(((int32_t*)cand_bf->quant->v_buffer)[txb_1d_offset_uv]);
4047
126k
                dst_ptr = &(((int32_t*)ctx->blk_ptr->coeff_tmp->v_buffer)[txb_1d_offset_uv]);
4048
4049
126k
                if (ctx->fixed_partition) {
4050
7.54k
                    dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->v_buffer) +
4051
7.54k
                        ctx->coded_area_sb_uv;
4052
7.54k
                    ctx->coded_area_sb_uv += tx_width_uv * tx_height_uv;
4053
7.54k
                }
4054
4055
126k
                if (blk_ptr->v_has_coeff & (1 << txb_itr)) {
4056
5.46k
                    svt_memcpy(dst_ptr, src_ptr, tx_width_uv * tx_height_uv * sizeof(int32_t));
4057
5.46k
                }
4058
4059
126k
                txb_1d_offset_uv += tx_width_uv * tx_height_uv;
4060
126k
            }
4061
478k
        }
4062
127k
    }
4063
4064
247k
    return lowest_cost_index;
4065
247k
}
4066
4067
// Return the end column for the current superblock, in unit of TPL blocks.
4068
0
static int get_superblock_tpl_column_end(PictureParentControlSet* ppcs, int mi_col, int num_mi_w) {
4069
0
    const int mib_size_log2 = ppcs->scs->seq_header.sb_size == BLOCK_128X128 ? 5 : 4;
4070
    // Find the start column of this superblock.
4071
0
    const int sb_mi_col_start = (mi_col >> mib_size_log2) << mib_size_log2;
4072
    // Same but in superres upscaled dimension.
4073
0
    const int sb_mi_col_start_sr = coded_to_superres_mi(sb_mi_col_start, ppcs->superres_denom);
4074
    // Width of this superblock in mi units.
4075
0
    const int sb_mi_width = mi_size_wide[ppcs->scs->seq_header.sb_size];
4076
    // Same but in superres upscaled dimension.
4077
0
    const int sb_mi_width_sr = coded_to_superres_mi(sb_mi_width, ppcs->superres_denom);
4078
    // Superblock end in mi units.
4079
0
    const int sb_mi_end = sb_mi_col_start_sr + sb_mi_width_sr;
4080
    // Superblock end in TPL units.
4081
0
    return (sb_mi_end + num_mi_w - 1) / num_mi_w;
4082
0
}
4083
4084
0
void aom_av1_set_ssim_rdmult(ModeDecisionContext* ctx, PictureControlSet* pcs, const int mi_row, const int mi_col) {
4085
0
    const Av1Common* const cm    = pcs->ppcs->av1_cm;
4086
0
    BlockSize              bsize = ctx->blk_geom->bsize;
4087
4088
0
    const int bsize_base = BLOCK_16X16;
4089
0
    const int num_mi_w   = mi_size_wide[bsize_base];
4090
0
    const int num_mi_h   = mi_size_high[bsize_base];
4091
0
    const int num_cols   = (cm->mi_cols + num_mi_w - 1) / num_mi_w;
4092
0
    const int num_rows   = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
4093
0
    const int num_bcols  = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w;
4094
0
    const int num_brows  = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
4095
0
    int       row, col;
4096
0
    double    num_of_mi          = 0.0;
4097
0
    double    geom_mean_of_scale = 1.0;
4098
0
    for (row = mi_row / num_mi_w; row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
4099
0
        for (col = mi_col / num_mi_h; col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
4100
0
            const int index = row * num_cols + col;
4101
0
            geom_mean_of_scale *= pcs->ppcs->pa_me_data->ssim_rdmult_scaling_factors[index];
4102
0
            num_of_mi += 1.0;
4103
0
        }
4104
0
    }
4105
0
    geom_mean_of_scale = pow(geom_mean_of_scale, (1.0 / num_of_mi));
4106
0
    if (!pcs->ppcs->blk_lambda_tuning) {
4107
0
        ctx->full_lambda_md[EB_8_BIT_MD] =
4108
0
            (uint32_t)((double)ctx->ed_ctx->pic_full_lambda[EB_8_BIT_MD] * geom_mean_of_scale + 0.5);
4109
0
        ctx->full_lambda_md[EB_10_BIT_MD] =
4110
0
            (uint32_t)((double)ctx->ed_ctx->pic_full_lambda[EB_10_BIT_MD] * geom_mean_of_scale + 0.5);
4111
4112
0
        ctx->fast_lambda_md[EB_8_BIT_MD] =
4113
0
            (uint32_t)((double)ctx->ed_ctx->pic_fast_lambda[EB_8_BIT_MD] * geom_mean_of_scale + 0.5);
4114
0
        ctx->fast_lambda_md[EB_10_BIT_MD] =
4115
0
            (uint32_t)((double)ctx->ed_ctx->pic_fast_lambda[EB_10_BIT_MD] * geom_mean_of_scale + 0.5);
4116
0
    } else {
4117
0
        ctx->full_lambda_md[EB_8_BIT_MD]  = (uint32_t)((double)ctx->full_lambda_md[EB_8_BIT_MD] * geom_mean_of_scale +
4118
0
                                                      0.5);
4119
0
        ctx->full_lambda_md[EB_10_BIT_MD] = (uint32_t)((double)ctx->full_lambda_md[EB_10_BIT_MD] * geom_mean_of_scale +
4120
0
                                                       0.5);
4121
4122
0
        ctx->fast_lambda_md[EB_8_BIT_MD]  = (uint32_t)((double)ctx->fast_lambda_md[EB_8_BIT_MD] * geom_mean_of_scale +
4123
0
                                                      0.5);
4124
0
        ctx->fast_lambda_md[EB_10_BIT_MD] = (uint32_t)((double)ctx->fast_lambda_md[EB_10_BIT_MD] * geom_mean_of_scale +
4125
0
                                                       0.5);
4126
0
    }
4127
0
}
4128
4129
0
void svt_aom_set_tuned_blk_lambda(ModeDecisionContext* ctx, PictureControlSet* pcs) {
4130
0
    PictureParentControlSet* ppcs = pcs->ppcs;
4131
0
    Av1Common*               cm   = ppcs->av1_cm;
4132
4133
0
    BlockSize bsize  = ctx->blk_geom->bsize;
4134
0
    int       mi_row = ctx->blk_org_y / 4;
4135
0
    int       mi_col = ctx->blk_org_x / 4;
4136
4137
0
    const int mi_col_sr         = coded_to_superres_mi(mi_col, ppcs->superres_denom);
4138
0
    const int mi_cols_sr        = ((ppcs->enhanced_unscaled_pic->width + 15) / 16) << 2; // picture column boundary
4139
0
    const int block_mi_width_sr = coded_to_superres_mi(mi_size_wide[bsize], ppcs->superres_denom);
4140
0
    const int bsize_base        = ppcs->tpl_ctrls.synth_blk_size == 32 ? BLOCK_32X32 : BLOCK_16X16;
4141
0
    const int num_mi_w          = mi_size_wide[bsize_base];
4142
0
    const int num_mi_h          = mi_size_high[bsize_base];
4143
0
    const int num_cols          = (mi_cols_sr + num_mi_w - 1) / num_mi_w;
4144
0
    const int num_rows          = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
4145
0
    const int num_bcols         = (block_mi_width_sr + num_mi_w - 1) / num_mi_w;
4146
0
    const int num_brows         = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
4147
4148
    // This is required because the end col of superblock may be off by 1 in case
4149
    // of superres.
4150
0
    const int sb_bcol_end = get_superblock_tpl_column_end(ppcs, mi_col, num_mi_w);
4151
0
    int       row, col;
4152
0
    int32_t   base_block_count   = 0;
4153
0
    double    geom_mean_of_scale = 0.0;
4154
0
    for (row = mi_row / num_mi_w; row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
4155
0
        for (col = mi_col_sr / num_mi_h; col < num_cols && col < mi_col_sr / num_mi_h + num_bcols && col < sb_bcol_end;
4156
0
             ++col) {
4157
0
            const int index = row * num_cols + col;
4158
0
            geom_mean_of_scale += log(ppcs->pa_me_data->tpl_sb_rdmult_scaling_factors[index]);
4159
0
            ++base_block_count;
4160
0
        }
4161
0
    }
4162
    // When superres is on, base_block_count could be zero.
4163
    // This function's counterpart in AOM, av1_get_hier_tpl_rdmult, will encounter division by zero
4164
0
    if (base_block_count == 0) {
4165
        // return a large number to indicate invalid state
4166
0
        ctx->full_lambda_md[EB_8_BIT_MD]  = SUPERRES_INVALID_STATE;
4167
0
        ctx->full_lambda_md[EB_10_BIT_MD] = SUPERRES_INVALID_STATE;
4168
4169
0
        ctx->fast_lambda_md[EB_8_BIT_MD]  = SUPERRES_INVALID_STATE;
4170
0
        ctx->fast_lambda_md[EB_10_BIT_MD] = SUPERRES_INVALID_STATE;
4171
0
        return;
4172
0
    }
4173
4174
0
    geom_mean_of_scale = exp(geom_mean_of_scale / base_block_count);
4175
4176
0
    ctx->full_lambda_md[EB_8_BIT_MD] =
4177
0
        (uint32_t)((double)ctx->ed_ctx->pic_full_lambda[EB_8_BIT_MD] * geom_mean_of_scale + 0.5);
4178
0
    ctx->full_lambda_md[EB_10_BIT_MD] =
4179
0
        (uint32_t)((double)ctx->ed_ctx->pic_full_lambda[EB_10_BIT_MD] * geom_mean_of_scale + 0.5);
4180
4181
0
    ctx->fast_lambda_md[EB_8_BIT_MD] =
4182
0
        (uint32_t)((double)ctx->ed_ctx->pic_fast_lambda[EB_8_BIT_MD] * geom_mean_of_scale + 0.5);
4183
0
    ctx->fast_lambda_md[EB_10_BIT_MD] =
4184
0
        (uint32_t)((double)ctx->ed_ctx->pic_fast_lambda[EB_10_BIT_MD] * geom_mean_of_scale + 0.5);
4185
0
    if (ppcs->scs->static_config.tune == TUNE_SSIM || ppcs->scs->static_config.tune == TUNE_IQ ||
4186
0
        ppcs->scs->static_config.tune == TUNE_MS_SSIM) {
4187
0
        aom_av1_set_ssim_rdmult(ctx, pcs, mi_row, mi_col);
4188
0
    }
4189
0
}
4190
4191
double similarity(uint32_t sum_s, uint32_t sum_r, uint32_t sum_sq_s, uint32_t sum_sq_r, uint32_t sum_sxr, int count,
4192
                  uint32_t bd);
4193
4194
0
double svt_ssim_4x4_c(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp) {
4195
0
    const int32_t count = 4 * 4;
4196
4197
0
    uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
4198
0
    uint32_t i, j;
4199
0
    for (i = 0; i < 4; i++) {
4200
0
        for (j = 0; j < 4; j++) {
4201
0
            sum_s += s[j];
4202
0
            sum_r += r[j];
4203
0
            sum_sq_s += s[j] * s[j];
4204
0
            sum_sq_r += r[j] * r[j];
4205
0
            sum_sxr += s[j] * r[j];
4206
0
        }
4207
4208
0
        s += sp;
4209
0
        r += rp;
4210
0
    }
4211
4212
    //
4213
    // similarity
4214
    //
4215
0
    double score = similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, count, 8);
4216
0
    return score;
4217
0
}
4218
4219
0
double svt_ssim_8x8_c(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp) {
4220
0
    const int32_t count = 8 * 8;
4221
4222
    //
4223
    // is similar to svt_aom_ssim_parms_8x8_c, but supports MxN block size
4224
    //
4225
0
    uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
4226
0
    uint32_t i, j;
4227
0
    for (i = 0; i < 8; i++) {
4228
0
        for (j = 0; j < 8; j++) {
4229
0
            sum_s += s[j];
4230
0
            sum_r += r[j];
4231
0
            sum_sq_s += s[j] * s[j];
4232
0
            sum_sq_r += r[j] * r[j];
4233
0
            sum_sxr += s[j] * r[j];
4234
0
        }
4235
4236
0
        s += sp;
4237
0
        r += rp;
4238
0
    }
4239
4240
    //
4241
    // similarity
4242
    //
4243
0
    double score = similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, count, 8);
4244
0
    return score;
4245
0
}
4246
4247
0
double svt_ssim_4x4_hbd_c(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp) {
4248
0
    const int32_t count = 4 * 4;
4249
4250
0
    uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
4251
0
    uint32_t i, j;
4252
0
    for (i = 0; i < 4; i++) {
4253
0
        for (j = 0; j < 4; j++) {
4254
0
            sum_s += s[j];
4255
0
            sum_r += r[j];
4256
0
            sum_sq_s += s[j] * s[j];
4257
0
            sum_sq_r += r[j] * r[j];
4258
0
            sum_sxr += s[j] * r[j];
4259
0
        }
4260
4261
0
        s += sp;
4262
0
        r += rp;
4263
0
    }
4264
4265
    //
4266
    // similarity
4267
    //
4268
0
    double score = similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, count, 10);
4269
0
    return score;
4270
0
}
4271
4272
0
double svt_ssim_8x8_hbd_c(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp) {
4273
0
    const int32_t count = 8 * 8;
4274
4275
0
    uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
4276
0
    uint32_t i, j;
4277
0
    for (i = 0; i < 8; i++) {
4278
0
        for (j = 0; j < 8; j++) {
4279
0
            sum_s += s[j];
4280
0
            sum_r += r[j];
4281
0
            sum_sq_s += s[j] * s[j];
4282
0
            sum_sq_r += r[j] * r[j];
4283
0
            sum_sxr += s[j] * r[j];
4284
0
        }
4285
4286
0
        s += sp;
4287
0
        r += rp;
4288
0
    }
4289
4290
    //
4291
    // similarity
4292
    //
4293
0
    double score = similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, count, 10);
4294
0
    return score;
4295
0
}
4296
4297
static double ssim_8x8_blocks(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp, uint32_t width,
4298
0
                              uint32_t height) {
4299
0
    uint32_t i, j;
4300
0
    int      samples    = 0;
4301
0
    double   ssim_total = 0;
4302
4303
    // sample point start with each 4x4 location
4304
0
    for (i = 0; i <= height - 8; i += 8, s += sp * 8, r += rp * 8) {
4305
0
        for (j = 0; j <= width - 8; j += 8) {
4306
0
            double v = svt_ssim_8x8(s + j, sp, r + j, rp);
4307
0
            v        = CLIP3(0, 1, v);
4308
0
            ssim_total += v;
4309
0
            samples++;
4310
0
        }
4311
0
    }
4312
0
    assert(samples > 0);
4313
0
    ssim_total /= samples;
4314
0
    assert(ssim_total <= 1.0 && ssim_total >= 0);
4315
0
    return ssim_total;
4316
0
}
4317
4318
static double ssim_4x4_blocks(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp, uint32_t width,
4319
0
                              uint32_t height) {
4320
0
    uint32_t i, j;
4321
0
    int      samples    = 0;
4322
0
    double   ssim_total = 0;
4323
4324
    // sample point start with each 2x2 location
4325
0
    for (i = 0; i <= height - 4; i += 4, s += sp * 4, r += rp * 4) {
4326
0
        for (j = 0; j <= width - 4; j += 4) {
4327
0
            double v = svt_ssim_4x4(s + j, sp, r + j, rp);
4328
0
            v        = CLIP3(0, 1, v);
4329
0
            ssim_total += v;
4330
0
            samples++;
4331
0
        }
4332
0
    }
4333
0
    assert(samples > 0);
4334
0
    ssim_total /= samples;
4335
0
    assert(ssim_total <= 1.0 && ssim_total >= 0);
4336
0
    return ssim_total;
4337
0
}
4338
4339
0
static double ssim(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp, uint32_t width, uint32_t height) {
4340
0
    assert((width % 4) == 0 && (height % 4) == 0);
4341
0
    if ((width % 8) == 0 && (height % 8) == 0) {
4342
0
        return ssim_8x8_blocks(s, sp, r, rp, width, height);
4343
0
    } else {
4344
0
        return ssim_4x4_blocks(s, sp, r, rp, width, height);
4345
0
    }
4346
0
}
4347
4348
static double ssim_8x8_blocks_hbd(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp, uint32_t width,
4349
0
                                  uint32_t height) {
4350
0
    uint32_t i, j;
4351
0
    int      samples    = 0;
4352
0
    double   ssim_total = 0;
4353
4354
    // sample point start with each 4x4 location
4355
0
    for (i = 0; i <= height - 8; i += 8, s += sp * 8, r += rp * 8) {
4356
0
        for (j = 0; j <= width - 8; j += 8) {
4357
0
            double v = svt_ssim_8x8_hbd(s + j, sp, r + j, rp);
4358
0
            v        = CLIP3(0, 1, v);
4359
0
            ssim_total += v;
4360
0
            samples++;
4361
0
        }
4362
0
    }
4363
0
    assert(samples > 0);
4364
0
    ssim_total /= samples;
4365
0
    assert(ssim_total <= 1.0 && ssim_total >= 0);
4366
0
    return ssim_total;
4367
0
}
4368
4369
static double ssim_4x4_blocks_hbd(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp, uint32_t width,
4370
0
                                  uint32_t height) {
4371
0
    uint32_t i, j;
4372
0
    int      samples    = 0;
4373
0
    double   ssim_total = 0;
4374
4375
    // sample point start with each 2x2 location
4376
0
    for (i = 0; i <= height - 4; i += 4, s += sp * 4, r += rp * 4) {
4377
0
        for (j = 0; j <= width - 4; j += 4) {
4378
0
            double v = svt_ssim_4x4_hbd(s + j, sp, r + j, rp);
4379
0
            v        = CLIP3(0, 1, v);
4380
0
            ssim_total += v;
4381
0
            samples++;
4382
0
        }
4383
0
    }
4384
0
    assert(samples > 0);
4385
0
    ssim_total /= samples;
4386
0
    assert(ssim_total <= 1.0 && ssim_total >= 0);
4387
0
    return ssim_total;
4388
0
}
4389
4390
static double ssim_hbd(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp, uint32_t width,
4391
0
                       uint32_t height) {
4392
0
    assert((width % 4) == 0 && (height % 4) == 0);
4393
0
    if ((width % 8) == 0 && (height % 8) == 0) {
4394
0
        return ssim_8x8_blocks_hbd(s, sp, r, rp, width, height);
4395
0
    } else {
4396
0
        return ssim_4x4_blocks_hbd(s, sp, r, rp, width, height);
4397
0
    }
4398
0
}
4399
4400
uint64_t svt_spatial_full_distortion_ssim_kernel(uint8_t* input, uint32_t input_offset, uint32_t input_stride,
4401
                                                 uint8_t* recon, int32_t recon_offset, uint32_t recon_stride,
4402
0
                                                 uint32_t area_width, uint32_t area_height, bool hbd, double ac_bias) {
4403
0
    uint8_t        m     = 1;
4404
0
    const uint32_t count = area_width * area_height;
4405
4406
    // SSIM
4407
0
    uint64_t spatial_distortion;
4408
0
    double   ssim_score;
4409
4410
    // AC SAD
4411
0
    uint64_t psy_distortion = 0;
4412
4413
0
    if (!hbd) {
4414
0
        ssim_score = ssim(
4415
0
            input + input_offset, input_stride, recon + recon_offset, recon_stride, area_width, area_height);
4416
0
        if (ac_bias) {
4417
0
            uint64_t ac_distortion = svt_psy_distortion(
4418
0
                input + input_offset, input_stride, recon + recon_offset, recon_stride, area_width, area_height);
4419
0
            psy_distortion = (uint64_t)(ac_distortion * ac_bias);
4420
0
        }
4421
0
    } else {
4422
0
        m          = 8;
4423
0
        ssim_score = ssim_hbd((uint16_t*)input + input_offset,
4424
0
                              input_stride,
4425
0
                              (uint16_t*)recon + recon_offset,
4426
0
                              recon_stride,
4427
0
                              area_width,
4428
0
                              area_height);
4429
0
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
4430
0
        if (ac_bias) {
4431
0
            uint64_t ac_distortion = svt_psy_distortion_hbd((uint16_t*)input + input_offset,
4432
0
                                                            input_stride,
4433
0
                                                            (uint16_t*)recon + recon_offset,
4434
0
                                                            recon_stride,
4435
0
                                                            area_width,
4436
0
                                                            area_height);
4437
0
            psy_distortion         = (uint64_t)(ac_distortion * ac_bias);
4438
0
        }
4439
0
#endif
4440
0
    }
4441
4442
0
    spatial_distortion        = (uint64_t)((1 - ssim_score) * count * 100 * 7 * m);
4443
0
    uint64_t total_distortion = spatial_distortion + psy_distortion;
4444
4445
0
    return total_distortion;
4446
0
}