Coverage Report

Created: 2022-08-24 06:15

/src/aom/av1/encoder/intra_mode_search.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2020, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include "av1/common/av1_common_int.h"
13
#include "av1/common/reconintra.h"
14
15
#include "av1/encoder/intra_mode_search.h"
16
#include "av1/encoder/intra_mode_search_utils.h"
17
#include "av1/encoder/palette.h"
18
#include "av1/encoder/speed_features.h"
19
#include "av1/encoder/tx_search.h"
20
21
/*!\cond */
22
static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = {
23
  DC_PRED,       H_PRED,        V_PRED,    SMOOTH_PRED, PAETH_PRED,
24
  SMOOTH_V_PRED, SMOOTH_H_PRED, D135_PRED, D203_PRED,   D157_PRED,
25
  D67_PRED,      D113_PRED,     D45_PRED,
26
};
27
28
static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
29
  UV_DC_PRED,     UV_CFL_PRED,   UV_H_PRED,        UV_V_PRED,
30
  UV_SMOOTH_PRED, UV_PAETH_PRED, UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED,
31
  UV_D135_PRED,   UV_D203_PRED,  UV_D157_PRED,     UV_D67_PRED,
32
  UV_D113_PRED,   UV_D45_PRED,
33
};
34
35
// The bitmask corresponds to the filter intra modes as defined in enums.h
36
// FILTER_INTRA_MODE enumeration type. Setting a bit to 0 in the mask means to
37
// disable the evaluation of corresponding filter intra mode. The table
38
// av1_derived_filter_intra_mode_used_flag is used when speed feature
39
// prune_filter_intra_level is 1. The evaluated filter intra modes are union
40
// of the following:
41
// 1) FILTER_DC_PRED
42
// 2) mode that corresponds to best mode so far of DC_PRED, V_PRED, H_PRED,
43
// D157_PRED and PAETH_PRED. (Eg: FILTER_V_PRED if best mode so far is V_PRED).
44
static const uint8_t av1_derived_filter_intra_mode_used_flag[INTRA_MODES] = {
45
  0x01,  // DC_PRED:           0000 0001
46
  0x03,  // V_PRED:            0000 0011
47
  0x05,  // H_PRED:            0000 0101
48
  0x01,  // D45_PRED:          0000 0001
49
  0x01,  // D135_PRED:         0000 0001
50
  0x01,  // D113_PRED:         0000 0001
51
  0x09,  // D157_PRED:         0000 1001
52
  0x01,  // D203_PRED:         0000 0001
53
  0x01,  // D67_PRED:          0000 0001
54
  0x01,  // SMOOTH_PRED:       0000 0001
55
  0x01,  // SMOOTH_V_PRED:     0000 0001
56
  0x01,  // SMOOTH_H_PRED:     0000 0001
57
  0x11   // PAETH_PRED:        0001 0001
58
};
59
60
// The bitmask corresponds to the chroma intra modes as defined in enums.h
61
// UV_PREDICTION_MODE enumeration type. Setting a bit to 0 in the mask means to
62
// disable the evaluation of corresponding chroma intra mode. The table
63
// av1_derived_chroma_intra_mode_used_flag is used when speed feature
64
// prune_chroma_modes_using_luma_winner is enabled. The evaluated chroma
65
// intra modes are union of the following:
66
// 1) UV_DC_PRED
67
// 2) UV_SMOOTH_PRED
68
// 3) UV_CFL_PRED
69
// 4) mode that corresponds to luma intra mode winner (Eg : UV_V_PRED if luma
70
// intra mode winner is V_PRED).
71
static const uint16_t av1_derived_chroma_intra_mode_used_flag[INTRA_MODES] = {
72
  0x2201,  // DC_PRED:           0010 0010 0000 0001
73
  0x2203,  // V_PRED:            0010 0010 0000 0011
74
  0x2205,  // H_PRED:            0010 0010 0000 0101
75
  0x2209,  // D45_PRED:          0010 0010 0000 1001
76
  0x2211,  // D135_PRED:         0010 0010 0001 0001
77
  0x2221,  // D113_PRED:         0010 0010 0010 0001
78
  0x2241,  // D157_PRED:         0010 0010 0100 0001
79
  0x2281,  // D203_PRED:         0010 0010 1000 0001
80
  0x2301,  // D67_PRED:          0010 0011 0000 0001
81
  0x2201,  // SMOOTH_PRED:       0010 0010 0000 0001
82
  0x2601,  // SMOOTH_V_PRED:     0010 0110 0000 0001
83
  0x2a01,  // SMOOTH_H_PRED:     0010 1010 0000 0001
84
  0x3201   // PAETH_PRED:        0011 0010 0000 0001
85
};
86
87
DECLARE_ALIGNED(16, static const uint8_t, all_zeros[MAX_SB_SIZE]) = { 0 };
88
DECLARE_ALIGNED(16, static const uint16_t,
89
                highbd_all_zeros[MAX_SB_SIZE]) = { 0 };
90
// Returns a factor to be applied to the RD value based on how well the
91
// reconstructed block variance matches the source variance.
92
static double intra_rd_variance_factor(const AV1_COMP *cpi, MACROBLOCK *x,
93
3.02M
                                       BLOCK_SIZE bs) {
94
3.02M
  double threshold = 1.0 - (0.25 * cpi->oxcf.speed);
95
  // For non-positive threshold values, the comparison of source and
96
  // reconstructed variances with threshold evaluates to false
97
  // (src_var < threshold/rec_var < threshold) as these metrics are greater than
98
  // than 0. Hence further calculations are skipped.
99
3.02M
  if (threshold <= 0) return 1.0;
100
101
18.4E
  MACROBLOCKD *xd = &x->e_mbd;
102
18.4E
  double variance_rd_factor = 1.0;
103
18.4E
  double src_var = 0.0;
104
18.4E
  double rec_var = 0.0;
105
18.4E
  double var_diff = 0.0;
106
18.4E
  unsigned int sse;
107
18.4E
  int i, j;
108
18.4E
  int right_overflow =
109
18.4E
      (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0;
110
18.4E
  int bottom_overflow =
111
18.4E
      (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0;
112
113
18.4E
  const int bw = MI_SIZE * mi_size_wide[bs] - right_overflow;
114
18.4E
  const int bh = MI_SIZE * mi_size_high[bs] - bottom_overflow;
115
18.4E
  const int blocks = (bw * bh) / 16;
116
117
18.4E
  for (i = 0; i < bh; i += 4) {
118
0
    for (j = 0; j < bw; j += 4) {
119
0
      if (is_cur_buf_hbd(xd)) {
120
0
        src_var +=
121
0
            log(1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf(
122
0
                          x->plane[0].src.buf + i * x->plane[0].src.stride + j,
123
0
                          x->plane[0].src.stride,
124
0
                          CONVERT_TO_BYTEPTR(highbd_all_zeros), 0, &sse) /
125
0
                          16.0);
126
0
        rec_var += log(
127
0
            1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf(
128
0
                      xd->plane[0].dst.buf + i * xd->plane[0].dst.stride + j,
129
0
                      xd->plane[0].dst.stride,
130
0
                      CONVERT_TO_BYTEPTR(highbd_all_zeros), 0, &sse) /
131
0
                      16.0);
132
0
      } else {
133
0
        src_var +=
134
0
            log(1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf(
135
0
                          x->plane[0].src.buf + i * x->plane[0].src.stride + j,
136
0
                          x->plane[0].src.stride, all_zeros, 0, &sse) /
137
0
                          16.0);
138
0
        rec_var += log(
139
0
            1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf(
140
0
                      xd->plane[0].dst.buf + i * xd->plane[0].dst.stride + j,
141
0
                      xd->plane[0].dst.stride, all_zeros, 0, &sse) /
142
0
                      16.0);
143
0
      }
144
0
    }
145
0
  }
146
18.4E
  src_var /= (double)blocks;
147
18.4E
  rec_var /= (double)blocks;
148
149
  // Dont allow 0 to prevent / 0 below.
150
18.4E
  src_var += 0.000001;
151
18.4E
  rec_var += 0.000001;
152
153
18.4E
  if (src_var >= rec_var) {
154
0
    var_diff = (src_var - rec_var);
155
0
    if ((var_diff > 0.5) && (rec_var < threshold)) {
156
0
      variance_rd_factor = 1.0 + ((var_diff * 2) / src_var);
157
0
    }
158
18.4E
  } else {
159
18.4E
    var_diff = (rec_var - src_var);
160
18.4E
    if ((var_diff > 0.5) && (src_var < threshold)) {
161
0
      variance_rd_factor = 1.0 + (var_diff / (2 * src_var));
162
0
    }
163
18.4E
  }
164
165
  // Limit adjustment;
166
18.4E
  variance_rd_factor = AOMMIN(3.0, variance_rd_factor);
167
168
18.4E
  return variance_rd_factor;
169
3.02M
}
170
/*!\endcond */
171
172
/*!\brief Search for the best filter_intra mode when coding intra frame.
173
 *
174
 * \ingroup intra_mode_search
175
 * \callergraph
176
 * This function loops through all filter_intra modes to find the best one.
177
 *
178
 * \return Returns 1 if a new filter_intra mode is selected; 0 otherwise.
179
 */
180
static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
181
                                    int *rate, int *rate_tokenonly,
182
                                    int64_t *distortion, int *skippable,
183
                                    BLOCK_SIZE bsize, int mode_cost,
184
                                    PREDICTION_MODE best_mode_so_far,
185
                                    int64_t *best_rd, int64_t *best_model_rd,
186
57.2k
                                    PICK_MODE_CONTEXT *ctx) {
187
  // Skip the evaluation of filter intra modes.
188
57.2k
  if (cpi->sf.intra_sf.prune_filter_intra_level == 2) return 0;
189
190
57.2k
  MACROBLOCKD *const xd = &x->e_mbd;
191
57.2k
  MB_MODE_INFO *mbmi = xd->mi[0];
192
57.2k
  int filter_intra_selected_flag = 0;
193
57.2k
  FILTER_INTRA_MODE mode;
194
57.2k
  TX_SIZE best_tx_size = TX_8X8;
195
57.2k
  FILTER_INTRA_MODE_INFO filter_intra_mode_info;
196
57.2k
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
197
57.2k
  av1_zero(filter_intra_mode_info);
198
57.2k
  mbmi->filter_intra_mode_info.use_filter_intra = 1;
199
57.2k
  mbmi->mode = DC_PRED;
200
57.2k
  mbmi->palette_mode_info.palette_size[0] = 0;
201
202
  // Skip the evaluation of filter-intra if cached MB_MODE_INFO does not have
203
  // filter-intra as winner.
204
57.2k
  if (x->use_mb_mode_cache &&
205
57.2k
      !x->mb_mode_cache->filter_intra_mode_info.use_filter_intra)
206
0
    return 0;
207
208
343k
  for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
209
286k
    int64_t this_rd;
210
286k
    RD_STATS tokenonly_rd_stats;
211
286k
    mbmi->filter_intra_mode_info.filter_intra_mode = mode;
212
213
286k
    if ((cpi->sf.intra_sf.prune_filter_intra_level == 1) &&
214
286k
        !(av1_derived_filter_intra_mode_used_flag[best_mode_so_far] &
215
286k
          (1 << mode)))
216
227k
      continue;
217
218
    // Skip the evaluation of modes that do not match with the winner mode in
219
    // x->mb_mode_cache.
220
58.8k
    if (x->use_mb_mode_cache &&
221
58.8k
        mode != x->mb_mode_cache->filter_intra_mode_info.filter_intra_mode)
222
0
      continue;
223
224
58.8k
    if (model_intra_yrd_and_prune(cpi, x, bsize, best_model_rd)) {
225
1.16k
      continue;
226
1.16k
    }
227
57.7k
    av1_pick_uniform_tx_size_type_yrd(cpi, x, &tokenonly_rd_stats, bsize,
228
57.7k
                                      *best_rd);
229
57.7k
    if (tokenonly_rd_stats.rate == INT_MAX) continue;
230
54.1k
    const int this_rate =
231
54.1k
        tokenonly_rd_stats.rate +
232
54.1k
        intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost);
233
54.1k
    this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
234
235
    // Visual quality adjustment based on recon vs source variance.
236
54.1k
    if ((cpi->oxcf.mode == ALLINTRA) && (this_rd != INT64_MAX)) {
237
54.1k
      this_rd = (int64_t)(this_rd * intra_rd_variance_factor(cpi, x, bsize));
238
54.1k
    }
239
240
    // Collect mode stats for multiwinner mode processing
241
54.1k
    const int txfm_search_done = 1;
242
54.1k
    store_winner_mode_stats(
243
54.1k
        &cpi->common, x, mbmi, NULL, NULL, NULL, 0, NULL, bsize, this_rd,
244
54.1k
        cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
245
54.1k
    if (this_rd < *best_rd) {
246
1.97k
      *best_rd = this_rd;
247
1.97k
      best_tx_size = mbmi->tx_size;
248
1.97k
      filter_intra_mode_info = mbmi->filter_intra_mode_info;
249
1.97k
      av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
250
1.97k
      memcpy(ctx->blk_skip, x->txfm_search_info.blk_skip,
251
1.97k
             sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
252
1.97k
      *rate = this_rate;
253
1.97k
      *rate_tokenonly = tokenonly_rd_stats.rate;
254
1.97k
      *distortion = tokenonly_rd_stats.dist;
255
1.97k
      *skippable = tokenonly_rd_stats.skip_txfm;
256
1.97k
      filter_intra_selected_flag = 1;
257
1.97k
    }
258
54.1k
  }
259
260
57.2k
  if (filter_intra_selected_flag) {
261
1.97k
    mbmi->mode = DC_PRED;
262
1.97k
    mbmi->tx_size = best_tx_size;
263
1.97k
    mbmi->filter_intra_mode_info = filter_intra_mode_info;
264
1.97k
    av1_copy_array(ctx->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
265
1.97k
    return 1;
266
55.2k
  } else {
267
55.2k
    return 0;
268
55.2k
  }
269
57.2k
}
270
271
void av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
272
118k
                      int *val_count, int *num_colors) {
273
118k
  const int max_pix_val = 1 << 8;
274
118k
  memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
275
2.01M
  for (int r = 0; r < rows; ++r) {
276
32.2M
    for (int c = 0; c < cols; ++c) {
277
30.3M
      const int this_val = src[r * stride + c];
278
30.3M
      assert(this_val < max_pix_val);
279
30.3M
      ++val_count[this_val];
280
30.3M
    }
281
1.89M
  }
282
118k
  int n = 0;
283
30.4M
  for (int i = 0; i < max_pix_val; ++i) {
284
30.3M
    if (val_count[i]) ++n;
285
30.3M
  }
286
118k
  *num_colors = n;
287
118k
}
288
289
void av1_count_colors_highbd(const uint8_t *src8, int stride, int rows,
290
                             int cols, int bit_depth, int *val_count,
291
                             int *bin_val_count, int *num_color_bins,
292
0
                             int *num_colors) {
293
0
  assert(bit_depth <= 12);
294
0
  const int max_bin_val = 1 << 8;
295
0
  const int max_pix_val = 1 << bit_depth;
296
0
  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
297
0
  memset(bin_val_count, 0, max_bin_val * sizeof(val_count[0]));
298
0
  if (val_count != NULL)
299
0
    memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
300
0
  for (int r = 0; r < rows; ++r) {
301
0
    for (int c = 0; c < cols; ++c) {
302
      /*
303
       * Down-convert the pixels to 8-bit domain before counting.
304
       * This provides consistency of behavior for palette search
305
       * between lbd and hbd encodes. This down-converted pixels
306
       * are only used for calculating the threshold (n).
307
       */
308
0
      const int this_val = ((src[r * stride + c]) >> (bit_depth - 8));
309
0
      assert(this_val < max_bin_val);
310
0
      if (this_val >= max_bin_val) continue;
311
0
      ++bin_val_count[this_val];
312
0
      if (val_count != NULL) ++val_count[(src[r * stride + c])];
313
0
    }
314
0
  }
315
0
  int n = 0;
316
  // Count the colors based on 8-bit domain used to gate the palette path
317
0
  for (int i = 0; i < max_bin_val; ++i) {
318
0
    if (bin_val_count[i]) ++n;
319
0
  }
320
0
  *num_color_bins = n;
321
322
  // Count the actual hbd colors used to create top_colors
323
0
  n = 0;
324
0
  if (val_count != NULL) {
325
0
    for (int i = 0; i < max_pix_val; ++i) {
326
0
      if (val_count[i]) ++n;
327
0
    }
328
0
    *num_colors = n;
329
0
  }
330
0
}
331
332
6.83M
void set_y_mode_and_delta_angle(const int mode_idx, MB_MODE_INFO *const mbmi) {
333
6.83M
  if (mode_idx < INTRA_MODE_END) {
334
1.45M
    mbmi->mode = intra_rd_search_mode_order[mode_idx];
335
1.45M
    mbmi->angle_delta[PLANE_TYPE_Y] = 0;
336
5.38M
  } else {
337
5.38M
    mbmi->mode = (mode_idx - INTRA_MODE_END) / (MAX_ANGLE_DELTA * 2) + V_PRED;
338
5.38M
    int angle_delta = (mode_idx - INTRA_MODE_END) % (MAX_ANGLE_DELTA * 2);
339
5.38M
    mbmi->angle_delta[PLANE_TYPE_Y] =
340
5.38M
        (angle_delta < 3 ? (angle_delta - 3) : (angle_delta - 2));
341
5.38M
  }
342
6.83M
}
343
344
static AOM_INLINE int get_model_rd_index_for_pruning(
345
    const MACROBLOCK *const x,
346
3.92M
    const INTRA_MODE_SPEED_FEATURES *const intra_sf) {
347
3.92M
  const int top_intra_model_count_allowed =
348
3.92M
      intra_sf->top_intra_model_count_allowed;
349
3.92M
  if (!intra_sf->adapt_top_model_rd_count_using_neighbors)
350
3.92M
    return top_intra_model_count_allowed - 1;
351
352
18.4E
  const MACROBLOCKD *const xd = &x->e_mbd;
353
18.4E
  const PREDICTION_MODE mode = xd->mi[0]->mode;
354
18.4E
  int model_rd_index_for_pruning = top_intra_model_count_allowed - 1;
355
18.4E
  int is_left_mode_neq_cur_mode = 0, is_above_mode_neq_cur_mode = 0;
356
18.4E
  if (xd->left_available)
357
0
    is_left_mode_neq_cur_mode = xd->left_mbmi->mode != mode;
358
18.4E
  if (xd->up_available)
359
0
    is_above_mode_neq_cur_mode = xd->above_mbmi->mode != mode;
360
  // The pruning of luma intra modes is made more aggressive at lower quantizers
361
  // and vice versa. The value for model_rd_index_for_pruning is derived as
362
  // follows.
363
  // qidx 0 to 127: Reduce the index of a candidate used for comparison only if
364
  // the current mode does not match either of the available neighboring modes.
365
  // qidx 128 to 255: Reduce the index of a candidate used for comparison only
366
  // if the current mode does not match both the available neighboring modes.
367
18.4E
  if (x->qindex <= 127) {
368
0
    if (is_left_mode_neq_cur_mode || is_above_mode_neq_cur_mode)
369
0
      model_rd_index_for_pruning = AOMMAX(model_rd_index_for_pruning - 1, 0);
370
18.4E
  } else {
371
18.4E
    if (is_left_mode_neq_cur_mode && is_above_mode_neq_cur_mode)
372
0
      model_rd_index_for_pruning = AOMMAX(model_rd_index_for_pruning - 1, 0);
373
18.4E
  }
374
18.4E
  return model_rd_index_for_pruning;
375
3.92M
}
376
377
int prune_intra_y_mode(int64_t this_model_rd, int64_t *best_model_rd,
378
                       int64_t top_intra_model_rd[], int max_model_cnt_allowed,
379
3.92M
                       int model_rd_index_for_pruning) {
380
3.92M
  const double thresh_best = 1.50;
381
3.92M
  const double thresh_top = 1.00;
382
14.9M
  for (int i = 0; i < max_model_cnt_allowed; i++) {
383
11.4M
    if (this_model_rd < top_intra_model_rd[i]) {
384
726k
      for (int j = max_model_cnt_allowed - 1; j > i; j--) {
385
364k
        top_intra_model_rd[j] = top_intra_model_rd[j - 1];
386
364k
      }
387
362k
      top_intra_model_rd[i] = this_model_rd;
388
362k
      break;
389
362k
    }
390
11.4M
  }
391
3.92M
  if (top_intra_model_rd[model_rd_index_for_pruning] != INT64_MAX &&
392
3.92M
      this_model_rd >
393
3.70M
          thresh_top * top_intra_model_rd[model_rd_index_for_pruning])
394
231k
    return 1;
395
396
3.69M
  if (this_model_rd != INT64_MAX &&
397
3.69M
      this_model_rd > thresh_best * (*best_model_rd))
398
2.92k
    return 1;
399
3.69M
  if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
400
3.69M
  return 0;
401
3.69M
}
402
403
// Run RD calculation with given chroma intra prediction angle., and return
404
// the RD cost. Update the best mode info. if the RD cost is the best so far.
405
static int64_t pick_intra_angle_routine_sbuv(
406
    const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
407
    int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats,
408
3.04k
    int *best_angle_delta, int64_t *best_rd) {
409
3.04k
  MB_MODE_INFO *mbmi = x->e_mbd.mi[0];
410
3.04k
  assert(!is_inter_block(mbmi));
411
3.04k
  int this_rate;
412
3.04k
  int64_t this_rd;
413
3.04k
  RD_STATS tokenonly_rd_stats;
414
415
3.04k
  if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
416
33
    return INT64_MAX;
417
3.01k
  this_rate = tokenonly_rd_stats.rate +
418
3.01k
              intra_mode_info_cost_uv(cpi, x, mbmi, bsize, rate_overhead);
419
3.01k
  this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
420
3.01k
  if (this_rd < *best_rd) {
421
573
    *best_rd = this_rd;
422
573
    *best_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV];
423
573
    *rate = this_rate;
424
573
    rd_stats->rate = tokenonly_rd_stats.rate;
425
573
    rd_stats->dist = tokenonly_rd_stats.dist;
426
573
    rd_stats->skip_txfm = tokenonly_rd_stats.skip_txfm;
427
573
  }
428
3.01k
  return this_rd;
429
3.04k
}
430
431
/*!\brief Search for the best angle delta for chroma prediction
432
 *
433
 * \ingroup intra_mode_search
434
 * \callergraph
435
 * Given a chroma directional intra prediction mode, this function will try to
436
 * estimate the best delta_angle.
437
 *
438
 * \returns Return if there is a new mode with smaller rdcost than best_rd.
439
 */
440
static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
441
                                    BLOCK_SIZE bsize, int rate_overhead,
442
                                    int64_t best_rd, int *rate,
443
606
                                    RD_STATS *rd_stats) {
444
606
  MACROBLOCKD *const xd = &x->e_mbd;
445
606
  MB_MODE_INFO *mbmi = xd->mi[0];
446
606
  assert(!is_inter_block(mbmi));
447
606
  int i, angle_delta, best_angle_delta = 0;
448
606
  int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
449
450
606
  rd_stats->rate = INT_MAX;
451
606
  rd_stats->skip_txfm = 0;
452
606
  rd_stats->dist = INT64_MAX;
453
6.66k
  for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
454
455
1.75k
  for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
456
2.32k
    for (i = 0; i < 2; ++i) {
457
1.75k
      best_rd_in = (best_rd == INT64_MAX)
458
1.75k
                       ? INT64_MAX
459
1.75k
                       : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5)));
460
1.75k
      mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta;
461
1.75k
      this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead,
462
1.75k
                                              best_rd_in, rate, rd_stats,
463
1.75k
                                              &best_angle_delta, &best_rd);
464
1.75k
      rd_cost[2 * angle_delta + i] = this_rd;
465
1.75k
      if (angle_delta == 0) {
466
606
        if (this_rd == INT64_MAX) return 0;
467
573
        rd_cost[1] = this_rd;
468
573
        break;
469
606
      }
470
1.75k
    }
471
1.17k
  }
472
473
573
  assert(best_rd != INT64_MAX);
474
1.71k
  for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
475
1.14k
    int64_t rd_thresh;
476
3.43k
    for (i = 0; i < 2; ++i) {
477
2.29k
      int skip_search = 0;
478
2.29k
      rd_thresh = best_rd + (best_rd >> 5);
479
2.29k
      if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
480
2.29k
          rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
481
999
        skip_search = 1;
482
2.29k
      if (!skip_search) {
483
1.29k
        mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta;
484
1.29k
        pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd,
485
1.29k
                                      rate, rd_stats, &best_angle_delta,
486
1.29k
                                      &best_rd);
487
1.29k
      }
488
2.29k
    }
489
1.14k
  }
490
491
573
  mbmi->angle_delta[PLANE_TYPE_UV] = best_angle_delta;
492
573
  return rd_stats->rate != INT_MAX;
493
606
}
494
495
#define PLANE_SIGN_TO_JOINT_SIGN(plane, a, b) \
496
616k
  (plane == CFL_PRED_U ? a * CFL_SIGNS + b - 1 : b * CFL_SIGNS + a - 1)
497
498
static void cfl_idx_to_sign_and_alpha(int cfl_idx, CFL_SIGN_TYPE *cfl_sign,
499
1.77M
                                      int *cfl_alpha) {
500
1.77M
  int cfl_linear_idx = cfl_idx - CFL_INDEX_ZERO;
501
1.77M
  if (cfl_linear_idx == 0) {
502
385k
    *cfl_sign = CFL_SIGN_ZERO;
503
385k
    *cfl_alpha = 0;
504
1.38M
  } else {
505
1.38M
    *cfl_sign = cfl_linear_idx > 0 ? CFL_SIGN_POS : CFL_SIGN_NEG;
506
1.38M
    *cfl_alpha = abs(cfl_linear_idx) - 1;
507
1.38M
  }
508
1.77M
}
509
510
static int64_t cfl_compute_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
511
                              int plane, TX_SIZE tx_size,
512
                              BLOCK_SIZE plane_bsize, int cfl_idx,
513
616k
                              int fast_mode, RD_STATS *rd_stats) {
514
616k
  assert(IMPLIES(fast_mode, rd_stats == NULL));
515
616k
  const AV1_COMMON *const cm = &cpi->common;
516
616k
  MACROBLOCKD *const xd = &x->e_mbd;
517
616k
  MB_MODE_INFO *const mbmi = xd->mi[0];
518
616k
  int cfl_plane = get_cfl_pred_type(plane);
519
616k
  CFL_SIGN_TYPE cfl_sign;
520
616k
  int cfl_alpha;
521
616k
  cfl_idx_to_sign_and_alpha(cfl_idx, &cfl_sign, &cfl_alpha);
522
  // We conly build CFL for a given plane, the other plane's sign is dummy
523
616k
  int dummy_sign = CFL_SIGN_NEG;
524
616k
  const int8_t orig_cfl_alpha_signs = mbmi->cfl_alpha_signs;
525
616k
  const uint8_t orig_cfl_alpha_idx = mbmi->cfl_alpha_idx;
526
616k
  mbmi->cfl_alpha_signs =
527
616k
      PLANE_SIGN_TO_JOINT_SIGN(cfl_plane, cfl_sign, dummy_sign);
528
616k
  mbmi->cfl_alpha_idx = (cfl_alpha << CFL_ALPHABET_SIZE_LOG2) + cfl_alpha;
529
616k
  int64_t cfl_cost;
530
616k
  if (fast_mode) {
531
231k
    cfl_cost =
532
231k
        intra_model_rd(cm, x, plane, plane_bsize, tx_size, /*use_hadamard=*/0);
533
385k
  } else {
534
385k
    av1_init_rd_stats(rd_stats);
535
385k
    av1_txfm_rd_in_plane(x, cpi, rd_stats, INT64_MAX, 0, plane, plane_bsize,
536
385k
                         tx_size, FTXS_NONE, 0);
537
385k
    av1_rd_cost_update(x->rdmult, rd_stats);
538
385k
    cfl_cost = rd_stats->rdcost;
539
385k
  }
540
616k
  mbmi->cfl_alpha_signs = orig_cfl_alpha_signs;
541
616k
  mbmi->cfl_alpha_idx = orig_cfl_alpha_idx;
542
616k
  return cfl_cost;
543
616k
}
544
545
static void cfl_pick_plane_parameter(const AV1_COMP *const cpi, MACROBLOCK *x,
546
                                     int plane, TX_SIZE tx_size,
547
                                     int cfl_search_range,
548
77.0k
                                     RD_STATS cfl_rd_arr[CFL_MAGS_SIZE]) {
549
77.0k
  assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE);
550
77.0k
  MACROBLOCKD *const xd = &x->e_mbd;
551
552
77.0k
  xd->cfl.use_dc_pred_cache = 1;
553
554
77.0k
  MB_MODE_INFO *const mbmi = xd->mi[0];
555
77.0k
  assert(mbmi->uv_mode == UV_CFL_PRED);
556
77.0k
  const MACROBLOCKD_PLANE *pd = &xd->plane[plane];
557
77.0k
  const BLOCK_SIZE plane_bsize =
558
77.0k
      get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
559
560
77.0k
  const int dir_ls[2] = { 1, -1 };
561
562
77.0k
  int est_best_cfl_idx = CFL_INDEX_ZERO;
563
77.0k
  if (cfl_search_range < CFL_MAGS_SIZE) {
564
77.0k
    int fast_mode = 1;
565
77.0k
    int start_cfl_idx = CFL_INDEX_ZERO;
566
77.0k
    int64_t best_cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize,
567
77.0k
                                           start_cfl_idx, fast_mode, NULL);
568
231k
    for (int si = 0; si < 2; ++si) {
569
154k
      const int dir = dir_ls[si];
570
154k
      for (int i = 1; i < CFL_MAGS_SIZE; ++i) {
571
154k
        int cfl_idx = start_cfl_idx + dir * i;
572
154k
        if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break;
573
154k
        int64_t cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize,
574
154k
                                          cfl_idx, fast_mode, NULL);
575
154k
        if (cfl_cost < best_cfl_cost) {
576
0
          best_cfl_cost = cfl_cost;
577
0
          est_best_cfl_idx = cfl_idx;
578
154k
        } else {
579
154k
          break;
580
154k
        }
581
154k
      }
582
154k
    }
583
77.0k
  }
584
585
2.61M
  for (int cfl_idx = 0; cfl_idx < CFL_MAGS_SIZE; ++cfl_idx) {
586
2.54M
    av1_invalid_rd_stats(&cfl_rd_arr[cfl_idx]);
587
2.54M
  }
588
589
77.0k
  int fast_mode = 0;
590
77.0k
  int start_cfl_idx = est_best_cfl_idx;
591
77.0k
  cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, start_cfl_idx, fast_mode,
592
77.0k
                 &cfl_rd_arr[start_cfl_idx]);
593
231k
  for (int si = 0; si < 2; ++si) {
594
154k
    const int dir = dir_ls[si];
595
462k
    for (int i = 1; i < cfl_search_range; ++i) {
596
308k
      int cfl_idx = start_cfl_idx + dir * i;
597
308k
      if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break;
598
308k
      cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, cfl_idx, fast_mode,
599
308k
                     &cfl_rd_arr[cfl_idx]);
600
308k
    }
601
154k
  }
602
77.0k
  xd->cfl.use_dc_pred_cache = 0;
603
77.0k
  xd->cfl.dc_pred_is_cached[0] = 0;
604
77.0k
  xd->cfl.dc_pred_is_cached[1] = 0;
605
77.0k
}
606
607
/*!\brief Pick the optimal parameters for Chroma to Luma (CFL) component
608
 *
609
 * \ingroup intra_mode_search
610
 * \callergraph
611
 *
612
 * This function will use DCT_DCT followed by computing SATD (sum of absolute
613
 * transformed differences) to estimate the RD score and find the best possible
614
 * CFL parameter.
615
 *
616
 * Then the function will apply a full RD search near the best possible CFL
617
 * parameter to find the best actual CFL parameter.
618
 *
619
 * Side effect:
620
 * We use ths buffers in x->plane[] and xd->plane[] as throw-away buffers for RD
621
 * search.
622
 *
623
 * \param[in] x                Encoder prediction block structure.
624
 * \param[in] cpi              Top-level encoder instance structure.
625
 * \param[in] tx_size          Transform size.
626
 * \param[in] ref_best_rd      Reference best RD.
627
 * \param[in] cfl_search_range The search range of full RD search near the
628
 *                             estimated best CFL parameter.
629
 *
630
 * \param[out]   best_rd_stats          RD stats of the best CFL parameter
631
 * \param[out]   best_cfl_alpha_idx     Best CFL alpha index
632
 * \param[out]   best_cfl_alpha_signs   Best CFL joint signs
633
 *
634
 */
635
static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
636
                             TX_SIZE tx_size, int64_t ref_best_rd,
637
                             int cfl_search_range, RD_STATS *best_rd_stats,
638
                             uint8_t *best_cfl_alpha_idx,
639
38.5k
                             int8_t *best_cfl_alpha_signs) {
640
38.5k
  assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE);
641
38.5k
  const ModeCosts *mode_costs = &x->mode_costs;
642
38.5k
  RD_STATS cfl_rd_arr_u[CFL_MAGS_SIZE];
643
38.5k
  RD_STATS cfl_rd_arr_v[CFL_MAGS_SIZE];
644
645
38.5k
  av1_invalid_rd_stats(best_rd_stats);
646
647
38.5k
  cfl_pick_plane_parameter(cpi, x, 1, tx_size, cfl_search_range, cfl_rd_arr_u);
648
38.5k
  cfl_pick_plane_parameter(cpi, x, 2, tx_size, cfl_search_range, cfl_rd_arr_v);
649
650
1.30M
  for (int ui = 0; ui < CFL_MAGS_SIZE; ++ui) {
651
1.27M
    if (cfl_rd_arr_u[ui].rate == INT_MAX) continue;
652
192k
    int cfl_alpha_u;
653
192k
    CFL_SIGN_TYPE cfl_sign_u;
654
192k
    cfl_idx_to_sign_and_alpha(ui, &cfl_sign_u, &cfl_alpha_u);
655
6.54M
    for (int vi = 0; vi < CFL_MAGS_SIZE; ++vi) {
656
6.35M
      if (cfl_rd_arr_v[vi].rate == INT_MAX) continue;
657
962k
      int cfl_alpha_v;
658
962k
      CFL_SIGN_TYPE cfl_sign_v;
659
962k
      cfl_idx_to_sign_and_alpha(vi, &cfl_sign_v, &cfl_alpha_v);
660
      // cfl_sign_u == CFL_SIGN_ZERO && cfl_sign_v == CFL_SIGN_ZERO is not a
661
      // valid parameter for CFL
662
962k
      if (cfl_sign_u == CFL_SIGN_ZERO && cfl_sign_v == CFL_SIGN_ZERO) continue;
663
924k
      int joint_sign = cfl_sign_u * CFL_SIGNS + cfl_sign_v - 1;
664
924k
      RD_STATS rd_stats = cfl_rd_arr_u[ui];
665
924k
      av1_merge_rd_stats(&rd_stats, &cfl_rd_arr_v[vi]);
666
924k
      if (rd_stats.rate != INT_MAX) {
667
924k
        rd_stats.rate +=
668
924k
            mode_costs->cfl_cost[joint_sign][CFL_PRED_U][cfl_alpha_u];
669
924k
        rd_stats.rate +=
670
924k
            mode_costs->cfl_cost[joint_sign][CFL_PRED_V][cfl_alpha_v];
671
924k
      }
672
924k
      av1_rd_cost_update(x->rdmult, &rd_stats);
673
924k
      if (rd_stats.rdcost < best_rd_stats->rdcost) {
674
115k
        *best_rd_stats = rd_stats;
675
115k
        *best_cfl_alpha_idx =
676
115k
            (cfl_alpha_u << CFL_ALPHABET_SIZE_LOG2) + cfl_alpha_v;
677
115k
        *best_cfl_alpha_signs = joint_sign;
678
115k
      }
679
924k
    }
680
192k
  }
681
38.5k
  if (best_rd_stats->rdcost >= ref_best_rd) {
682
38.4k
    av1_invalid_rd_stats(best_rd_stats);
683
    // Set invalid CFL parameters here since the rdcost is not better than
684
    // ref_best_rd.
685
38.4k
    *best_cfl_alpha_idx = 0;
686
38.4k
    *best_cfl_alpha_signs = 0;
687
38.4k
    return 0;
688
38.4k
  }
689
71
  return 1;
690
38.5k
}
691
692
int64_t av1_rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
693
                                    int *rate, int *rate_tokenonly,
694
                                    int64_t *distortion, int *skippable,
695
68.3k
                                    BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
696
68.3k
  const AV1_COMMON *const cm = &cpi->common;
697
68.3k
  MACROBLOCKD *xd = &x->e_mbd;
698
68.3k
  MB_MODE_INFO *mbmi = xd->mi[0];
699
68.3k
  assert(!is_inter_block(mbmi));
700
68.3k
  MB_MODE_INFO best_mbmi = *mbmi;
701
68.3k
  int64_t best_rd = INT64_MAX, this_rd;
702
68.3k
  const ModeCosts *mode_costs = &x->mode_costs;
703
68.3k
  const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
704
705
68.3k
  init_sbuv_mode(mbmi);
706
707
  // Return if the current block does not correspond to a chroma block.
708
68.3k
  if (!xd->is_chroma_ref) {
709
4.77k
    *rate = 0;
710
4.77k
    *rate_tokenonly = 0;
711
4.77k
    *distortion = 0;
712
4.77k
    *skippable = 1;
713
4.77k
    return INT64_MAX;
714
4.77k
  }
715
716
  // Only store reconstructed luma when there's chroma RDO. When there's no
717
  // chroma RDO, the reconstructed luma will be stored in encode_superblock().
718
63.5k
  xd->cfl.store_y = store_cfl_required_rdo(cm, x);
719
63.5k
  if (xd->cfl.store_y) {
720
    // Restore reconstructed luma values.
721
    // TODO(chiyotsai@google.com): right now we are re-computing the txfm in
722
    // this function everytime we search through uv modes. There is some
723
    // potential speed up here if we cache the result to avoid redundant
724
    // computation.
725
38.5k
    av1_encode_intra_block_plane(cpi, x, mbmi->bsize, AOM_PLANE_Y,
726
38.5k
                                 DRY_RUN_NORMAL,
727
38.5k
                                 cpi->optimize_seg_arr[mbmi->segment_id]);
728
38.5k
    xd->cfl.store_y = 0;
729
38.5k
  }
730
63.5k
  IntraModeSearchState intra_search_state;
731
63.5k
  init_intra_mode_search_state(&intra_search_state);
732
733
  // Search through all non-palette modes.
734
953k
  for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) {
735
890k
    int this_rate;
736
890k
    RD_STATS tokenonly_rd_stats;
737
890k
    UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx];
738
890k
    const int is_diagonal_mode = av1_is_diagonal_mode(get_uv_mode(mode));
739
890k
    const int is_directional_mode = av1_is_directional_mode(get_uv_mode(mode));
740
741
890k
    if (is_diagonal_mode && !cpi->oxcf.intra_mode_cfg.enable_diagonal_intra)
742
0
      continue;
743
890k
    if (is_directional_mode &&
744
890k
        !cpi->oxcf.intra_mode_cfg.enable_directional_intra)
745
0
      continue;
746
747
890k
    if (!(cpi->sf.intra_sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] &
748
890k
          (1 << mode)))
749
0
      continue;
750
890k
    if (!intra_mode_cfg->enable_smooth_intra && mode >= UV_SMOOTH_PRED &&
751
890k
        mode <= UV_SMOOTH_H_PRED)
752
0
      continue;
753
754
890k
    if (!intra_mode_cfg->enable_paeth_intra && mode == UV_PAETH_PRED) continue;
755
756
890k
    assert(mbmi->mode < INTRA_MODES);
757
890k
    if (cpi->sf.intra_sf.prune_chroma_modes_using_luma_winner &&
758
890k
        !(av1_derived_chroma_intra_mode_used_flag[mbmi->mode] & (1 << mode)))
759
698k
      continue;
760
761
191k
    mbmi->uv_mode = mode;
762
763
    // Init variables for cfl and angle delta
764
191k
    const SPEED_FEATURES *sf = &cpi->sf;
765
191k
    mbmi->angle_delta[PLANE_TYPE_UV] = 0;
766
191k
    if (mode == UV_CFL_PRED) {
767
63.5k
      if (!is_cfl_allowed(xd) || !intra_mode_cfg->enable_cfl_intra) continue;
768
38.5k
      assert(!is_directional_mode);
769
38.5k
      const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
770
38.5k
      if (!cfl_rd_pick_alpha(x, cpi, uv_tx_size, best_rd,
771
38.5k
                             sf->intra_sf.cfl_search_range, &tokenonly_rd_stats,
772
38.5k
                             &mbmi->cfl_alpha_idx, &mbmi->cfl_alpha_signs)) {
773
38.4k
        continue;
774
38.4k
      }
775
128k
    } else if (is_directional_mode && av1_use_angle_delta(mbmi->bsize) &&
776
128k
               intra_mode_cfg->enable_angle_delta) {
777
606
      if (sf->intra_sf.chroma_intra_pruning_with_hog &&
778
606
          !intra_search_state.dir_mode_skip_mask_ready) {
779
606
        static const float thresh[2][4] = {
780
606
          { -1.2f, 0.0f, 0.0f, 1.2f },    // Interframe
781
606
          { -1.2f, -1.2f, -0.6f, 0.4f },  // Intraframe
782
606
        };
783
606
        const int is_chroma = 1;
784
606
        const int is_intra_frame = frame_is_intra_only(cm);
785
606
        prune_intra_mode_with_hog(
786
606
            x, bsize, cm->seq_params->sb_size,
787
606
            thresh[is_intra_frame]
788
606
                  [sf->intra_sf.chroma_intra_pruning_with_hog - 1],
789
606
            intra_search_state.directional_mode_skip_mask, is_chroma);
790
606
        intra_search_state.dir_mode_skip_mask_ready = 1;
791
606
      }
792
606
      if (intra_search_state.directional_mode_skip_mask[mode]) {
793
0
        continue;
794
0
      }
795
796
      // Search through angle delta
797
606
      const int rate_overhead =
798
606
          mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode];
799
606
      if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
800
606
                                    &this_rate, &tokenonly_rd_stats))
801
33
        continue;
802
127k
    } else {
803
      // Predict directly if we don't need to search for angle delta.
804
127k
      if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
805
8.03k
        continue;
806
8.03k
      }
807
127k
    }
808
120k
    const int mode_cost =
809
120k
        mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode];
810
120k
    this_rate = tokenonly_rd_stats.rate +
811
120k
                intra_mode_info_cost_uv(cpi, x, mbmi, bsize, mode_cost);
812
120k
    this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
813
814
120k
    if (this_rd < best_rd) {
815
64.2k
      best_mbmi = *mbmi;
816
64.2k
      best_rd = this_rd;
817
64.2k
      *rate = this_rate;
818
64.2k
      *rate_tokenonly = tokenonly_rd_stats.rate;
819
64.2k
      *distortion = tokenonly_rd_stats.dist;
820
64.2k
      *skippable = tokenonly_rd_stats.skip_txfm;
821
64.2k
    }
822
120k
  }
823
824
  // Search palette mode
825
63.5k
  const int try_palette =
826
63.5k
      cpi->oxcf.tool_cfg.enable_palette &&
827
63.5k
      av1_allow_palette(cpi->common.features.allow_screen_content_tools,
828
63.5k
                        mbmi->bsize);
829
63.5k
  if (try_palette) {
830
0
    uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map;
831
0
    av1_rd_pick_palette_intra_sbuv(
832
0
        cpi, x,
833
0
        mode_costs
834
0
            ->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][UV_DC_PRED],
835
0
        best_palette_color_map, &best_mbmi, &best_rd, rate, rate_tokenonly,
836
0
        distortion, skippable);
837
0
  }
838
839
63.5k
  *mbmi = best_mbmi;
840
  // Make sure we actually chose a mode
841
63.5k
  assert(best_rd < INT64_MAX);
842
63.5k
  return best_rd;
843
68.3k
}
844
845
// Searches palette mode for luma channel in inter frame.
846
int av1_search_palette_mode(IntraModeSearchState *intra_search_state,
847
                            const AV1_COMP *cpi, MACROBLOCK *x,
848
                            BLOCK_SIZE bsize, unsigned int ref_frame_cost,
849
                            PICK_MODE_CONTEXT *ctx, RD_STATS *this_rd_cost,
850
0
                            int64_t best_rd) {
851
0
  const AV1_COMMON *const cm = &cpi->common;
852
0
  MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
853
0
  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
854
0
  const int num_planes = av1_num_planes(cm);
855
0
  MACROBLOCKD *const xd = &x->e_mbd;
856
0
  int rate2 = 0;
857
0
  int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd;
858
0
  int skippable = 0;
859
0
  uint8_t *const best_palette_color_map =
860
0
      x->palette_buffer->best_palette_color_map;
861
0
  uint8_t *const color_map = xd->plane[0].color_index_map;
862
0
  MB_MODE_INFO best_mbmi_palette = *mbmi;
863
0
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
864
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
865
0
  const ModeCosts *mode_costs = &x->mode_costs;
866
0
  const int *const intra_mode_cost =
867
0
      mode_costs->mbmode_cost[size_group_lookup[bsize]];
868
0
  const int rows = block_size_high[bsize];
869
0
  const int cols = block_size_wide[bsize];
870
871
0
  mbmi->mode = DC_PRED;
872
0
  mbmi->uv_mode = UV_DC_PRED;
873
0
  mbmi->ref_frame[0] = INTRA_FRAME;
874
0
  mbmi->ref_frame[1] = NONE_FRAME;
875
0
  av1_zero(pmi->palette_size);
876
877
0
  RD_STATS rd_stats_y;
878
0
  av1_invalid_rd_stats(&rd_stats_y);
879
0
  av1_rd_pick_palette_intra_sby(cpi, x, bsize, intra_mode_cost[DC_PRED],
880
0
                                &best_mbmi_palette, best_palette_color_map,
881
0
                                &best_rd_palette, &rd_stats_y.rate, NULL,
882
0
                                &rd_stats_y.dist, &rd_stats_y.skip_txfm, NULL,
883
0
                                ctx, best_blk_skip, best_tx_type_map);
884
0
  if (rd_stats_y.rate == INT_MAX || pmi->palette_size[0] == 0) {
885
0
    this_rd_cost->rdcost = INT64_MAX;
886
0
    return skippable;
887
0
  }
888
889
0
  memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
890
0
         sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize));
891
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
892
0
  memcpy(color_map, best_palette_color_map,
893
0
         rows * cols * sizeof(best_palette_color_map[0]));
894
895
0
  skippable = rd_stats_y.skip_txfm;
896
0
  distortion2 = rd_stats_y.dist;
897
0
  rate2 = rd_stats_y.rate + ref_frame_cost;
898
0
  if (num_planes > 1) {
899
0
    if (intra_search_state->rate_uv_intra == INT_MAX) {
900
      // We have not found any good uv mode yet, so we need to search for it.
901
0
      TX_SIZE uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
902
0
      av1_rd_pick_intra_sbuv_mode(cpi, x, &intra_search_state->rate_uv_intra,
903
0
                                  &intra_search_state->rate_uv_tokenonly,
904
0
                                  &intra_search_state->dist_uvs,
905
0
                                  &intra_search_state->skip_uvs, bsize, uv_tx);
906
0
      intra_search_state->mode_uv = mbmi->uv_mode;
907
0
      intra_search_state->pmi_uv = *pmi;
908
0
      intra_search_state->uv_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV];
909
0
    }
910
911
    // We have found at least one good uv mode before, so copy and paste it
912
    // over.
913
0
    mbmi->uv_mode = intra_search_state->mode_uv;
914
0
    pmi->palette_size[1] = intra_search_state->pmi_uv.palette_size[1];
915
0
    if (pmi->palette_size[1] > 0) {
916
0
      memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
917
0
             intra_search_state->pmi_uv.palette_colors + PALETTE_MAX_SIZE,
918
0
             2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
919
0
    }
920
0
    mbmi->angle_delta[PLANE_TYPE_UV] = intra_search_state->uv_angle_delta;
921
0
    skippable = skippable && intra_search_state->skip_uvs;
922
0
    distortion2 += intra_search_state->dist_uvs;
923
0
    rate2 += intra_search_state->rate_uv_intra;
924
0
  }
925
926
0
  if (skippable) {
927
0
    rate2 -= rd_stats_y.rate;
928
0
    if (num_planes > 1) rate2 -= intra_search_state->rate_uv_tokenonly;
929
0
    rate2 += mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][1];
930
0
  } else {
931
0
    rate2 += mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
932
0
  }
933
0
  this_rd = RDCOST(x->rdmult, rate2, distortion2);
934
0
  this_rd_cost->rate = rate2;
935
0
  this_rd_cost->dist = distortion2;
936
0
  this_rd_cost->rdcost = this_rd;
937
0
  return skippable;
938
0
}
939
940
/*!\brief Get the intra prediction by searching through tx_type and tx_size.
941
 *
942
 * \ingroup intra_mode_search
943
 * \callergraph
944
 * Currently this function is only used in the intra frame code path for
945
 * winner-mode processing.
946
 *
947
 * \return Returns whether the current mode is an improvement over best_rd.
948
 */
949
static AOM_INLINE int intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
950
                                      BLOCK_SIZE bsize, const int *bmode_costs,
951
                                      int64_t *best_rd, int *rate,
952
                                      int *rate_tokenonly, int64_t *distortion,
953
                                      int *skippable, MB_MODE_INFO *best_mbmi,
954
136k
                                      PICK_MODE_CONTEXT *ctx) {
955
136k
  MACROBLOCKD *const xd = &x->e_mbd;
956
136k
  MB_MODE_INFO *const mbmi = xd->mi[0];
957
136k
  RD_STATS rd_stats;
958
  // In order to improve txfm search avoid rd based breakouts during winner
959
  // mode evaluation. Hence passing ref_best_rd as a maximum value
960
136k
  av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
961
136k
  if (rd_stats.rate == INT_MAX) return 0;
962
136k
  int this_rate_tokenonly = rd_stats.rate;
963
136k
  if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->bsize)) {
964
    // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
965
    // in the tokenonly rate, but for intra blocks, tx_size is always coded
966
    // (prediction granularity), so we account for it in the full rate,
967
    // not the tokenonly rate.
968
87.7k
    this_rate_tokenonly -= tx_size_cost(x, bsize, mbmi->tx_size);
969
87.7k
  }
970
136k
  const int this_rate =
971
136k
      rd_stats.rate +
972
136k
      intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]);
973
136k
  const int64_t this_rd = RDCOST(x->rdmult, this_rate, rd_stats.dist);
974
136k
  if (this_rd < *best_rd) {
975
28.7k
    *best_mbmi = *mbmi;
976
28.7k
    *best_rd = this_rd;
977
28.7k
    *rate = this_rate;
978
28.7k
    *rate_tokenonly = this_rate_tokenonly;
979
28.7k
    *distortion = rd_stats.dist;
980
28.7k
    *skippable = rd_stats.skip_txfm;
981
28.7k
    av1_copy_array(ctx->blk_skip, x->txfm_search_info.blk_skip,
982
28.7k
                   ctx->num_4x4_blk);
983
28.7k
    av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
984
28.7k
    return 1;
985
28.7k
  }
986
107k
  return 0;
987
136k
}
988
989
/*!\brief Search for the best filter_intra mode when coding inter frame.
990
 *
991
 * \ingroup intra_mode_search
992
 * \callergraph
993
 * This function loops through all filter_intra modes to find the best one.
994
 *
995
 * \return Returns nothing, but updates the mbmi and rd_stats.
996
 */
997
static INLINE void handle_filter_intra_mode(const AV1_COMP *cpi, MACROBLOCK *x,
998
                                            BLOCK_SIZE bsize,
999
                                            const PICK_MODE_CONTEXT *ctx,
1000
                                            RD_STATS *rd_stats_y, int mode_cost,
1001
                                            int64_t best_rd,
1002
0
                                            int64_t best_rd_so_far) {
1003
0
  MACROBLOCKD *const xd = &x->e_mbd;
1004
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
1005
0
  assert(mbmi->mode == DC_PRED &&
1006
0
         av1_filter_intra_allowed_bsize(&cpi->common, bsize));
1007
1008
0
  RD_STATS rd_stats_y_fi;
1009
0
  int filter_intra_selected_flag = 0;
1010
0
  TX_SIZE best_tx_size = mbmi->tx_size;
1011
0
  FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED;
1012
0
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1013
0
  memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
1014
0
         sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
1015
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1016
0
  av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
1017
0
  mbmi->filter_intra_mode_info.use_filter_intra = 1;
1018
0
  for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED; fi_mode < FILTER_INTRA_MODES;
1019
0
       ++fi_mode) {
1020
0
    mbmi->filter_intra_mode_info.filter_intra_mode = fi_mode;
1021
0
    av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y_fi, bsize, best_rd);
1022
0
    if (rd_stats_y_fi.rate == INT_MAX) continue;
1023
0
    const int this_rate_tmp =
1024
0
        rd_stats_y_fi.rate +
1025
0
        intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost);
1026
0
    const int64_t this_rd_tmp =
1027
0
        RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist);
1028
1029
0
    if (this_rd_tmp != INT64_MAX && this_rd_tmp / 2 > best_rd) {
1030
0
      break;
1031
0
    }
1032
0
    if (this_rd_tmp < best_rd_so_far) {
1033
0
      best_tx_size = mbmi->tx_size;
1034
0
      av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
1035
0
      memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
1036
0
             sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
1037
0
      best_fi_mode = fi_mode;
1038
0
      *rd_stats_y = rd_stats_y_fi;
1039
0
      filter_intra_selected_flag = 1;
1040
0
      best_rd_so_far = this_rd_tmp;
1041
0
    }
1042
0
  }
1043
1044
0
  mbmi->tx_size = best_tx_size;
1045
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
1046
0
  memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
1047
0
         sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
1048
1049
0
  if (filter_intra_selected_flag) {
1050
0
    mbmi->filter_intra_mode_info.use_filter_intra = 1;
1051
0
    mbmi->filter_intra_mode_info.filter_intra_mode = best_fi_mode;
1052
0
  } else {
1053
0
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
1054
0
  }
1055
0
}
1056
1057
// Evaluate a given luma intra-mode in inter frames.
1058
int av1_handle_intra_y_mode(IntraModeSearchState *intra_search_state,
1059
                            const AV1_COMP *cpi, MACROBLOCK *x,
1060
                            BLOCK_SIZE bsize, unsigned int ref_frame_cost,
1061
                            const PICK_MODE_CONTEXT *ctx, RD_STATS *rd_stats_y,
1062
                            int64_t best_rd, int *mode_cost_y, int64_t *rd_y,
1063
                            int64_t *best_model_rd,
1064
0
                            int64_t top_intra_model_rd[]) {
1065
0
  const AV1_COMMON *cm = &cpi->common;
1066
0
  const INTRA_MODE_SPEED_FEATURES *const intra_sf = &cpi->sf.intra_sf;
1067
0
  MACROBLOCKD *const xd = &x->e_mbd;
1068
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
1069
0
  assert(mbmi->ref_frame[0] == INTRA_FRAME);
1070
0
  const PREDICTION_MODE mode = mbmi->mode;
1071
0
  const ModeCosts *mode_costs = &x->mode_costs;
1072
0
  const int mode_cost =
1073
0
      mode_costs->mbmode_cost[size_group_lookup[bsize]][mode] + ref_frame_cost;
1074
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
1075
1076
0
  int known_rate = mode_cost;
1077
0
  const int intra_cost_penalty = av1_get_intra_cost_penalty(
1078
0
      cm->quant_params.base_qindex, cm->quant_params.y_dc_delta_q,
1079
0
      cm->seq_params->bit_depth);
1080
1081
0
  if (mode != DC_PRED && mode != PAETH_PRED) known_rate += intra_cost_penalty;
1082
0
  known_rate += AOMMIN(mode_costs->skip_txfm_cost[skip_ctx][0],
1083
0
                       mode_costs->skip_txfm_cost[skip_ctx][1]);
1084
0
  const int64_t known_rd = RDCOST(x->rdmult, known_rate, 0);
1085
0
  if (known_rd > best_rd) {
1086
0
    intra_search_state->skip_intra_modes = 1;
1087
0
    return 0;
1088
0
  }
1089
1090
0
  const int is_directional_mode = av1_is_directional_mode(mode);
1091
0
  if (is_directional_mode && av1_use_angle_delta(bsize) &&
1092
0
      cpi->oxcf.intra_mode_cfg.enable_angle_delta) {
1093
0
    if (intra_sf->intra_pruning_with_hog &&
1094
0
        !intra_search_state->dir_mode_skip_mask_ready) {
1095
0
      const float thresh[4] = { -1.2f, 0.0f, 0.0f, 1.2f };
1096
0
      const int is_chroma = 0;
1097
0
      prune_intra_mode_with_hog(x, bsize, cm->seq_params->sb_size,
1098
0
                                thresh[intra_sf->intra_pruning_with_hog - 1],
1099
0
                                intra_search_state->directional_mode_skip_mask,
1100
0
                                is_chroma);
1101
0
      intra_search_state->dir_mode_skip_mask_ready = 1;
1102
0
    }
1103
0
    if (intra_search_state->directional_mode_skip_mask[mode]) return 0;
1104
0
  }
1105
0
  const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]);
1106
0
  const int64_t this_model_rd =
1107
0
      intra_model_rd(&cpi->common, x, 0, bsize, tx_size, /*use_hadamard=*/1);
1108
1109
0
  const int model_rd_index_for_pruning =
1110
0
      get_model_rd_index_for_pruning(x, intra_sf);
1111
1112
0
  if (prune_intra_y_mode(this_model_rd, best_model_rd, top_intra_model_rd,
1113
0
                         intra_sf->top_intra_model_count_allowed,
1114
0
                         model_rd_index_for_pruning))
1115
0
    return 0;
1116
0
  av1_init_rd_stats(rd_stats_y);
1117
0
  av1_pick_uniform_tx_size_type_yrd(cpi, x, rd_stats_y, bsize, best_rd);
1118
1119
  // Pick filter intra modes.
1120
0
  if (mode == DC_PRED && av1_filter_intra_allowed_bsize(cm, bsize)) {
1121
0
    int try_filter_intra = 1;
1122
0
    int64_t best_rd_so_far = INT64_MAX;
1123
0
    if (rd_stats_y->rate != INT_MAX) {
1124
      // best_rd_so_far is the rdcost of DC_PRED without using filter_intra.
1125
      // Later, in filter intra search, best_rd_so_far is used for comparison.
1126
0
      mbmi->filter_intra_mode_info.use_filter_intra = 0;
1127
0
      const int tmp_rate =
1128
0
          rd_stats_y->rate +
1129
0
          intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost);
1130
0
      best_rd_so_far = RDCOST(x->rdmult, tmp_rate, rd_stats_y->dist);
1131
0
      try_filter_intra = (best_rd_so_far / 2) <= best_rd;
1132
0
    } else if (intra_sf->skip_filter_intra_in_inter_frames >= 1) {
1133
      // As rd cost of luma intra dc mode is more than best_rd (i.e.,
1134
      // rd_stats_y->rate = INT_MAX), skip the evaluation of filter intra modes.
1135
0
      try_filter_intra = 0;
1136
0
    }
1137
1138
0
    if (try_filter_intra) {
1139
0
      handle_filter_intra_mode(cpi, x, bsize, ctx, rd_stats_y, mode_cost,
1140
0
                               best_rd, best_rd_so_far);
1141
0
    }
1142
0
  }
1143
1144
0
  if (rd_stats_y->rate == INT_MAX) return 0;
1145
1146
0
  *mode_cost_y = intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost);
1147
0
  const int rate_y = rd_stats_y->skip_txfm
1148
0
                         ? mode_costs->skip_txfm_cost[skip_ctx][1]
1149
0
                         : rd_stats_y->rate;
1150
0
  *rd_y = RDCOST(x->rdmult, rate_y + *mode_cost_y, rd_stats_y->dist);
1151
0
  if (best_rd < (INT64_MAX / 2) && *rd_y > (best_rd + (best_rd >> 2))) {
1152
0
    intra_search_state->skip_intra_modes = 1;
1153
0
    return 0;
1154
0
  }
1155
1156
0
  return 1;
1157
0
}
1158
1159
int av1_search_intra_uv_modes_in_interframe(
1160
    IntraModeSearchState *intra_search_state, const AV1_COMP *cpi,
1161
    MACROBLOCK *x, BLOCK_SIZE bsize, RD_STATS *rd_stats,
1162
0
    const RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv, int64_t best_rd) {
1163
0
  const AV1_COMMON *cm = &cpi->common;
1164
0
  MACROBLOCKD *const xd = &x->e_mbd;
1165
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
1166
0
  assert(mbmi->ref_frame[0] == INTRA_FRAME);
1167
1168
  // TODO(chiyotsai@google.com): Consolidate the chroma search code here with
1169
  // the one in av1_search_palette_mode.
1170
0
  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
1171
0
  const int try_palette =
1172
0
      cpi->oxcf.tool_cfg.enable_palette &&
1173
0
      av1_allow_palette(cm->features.allow_screen_content_tools, mbmi->bsize);
1174
1175
0
  assert(intra_search_state->rate_uv_intra == INT_MAX);
1176
0
  if (intra_search_state->rate_uv_intra == INT_MAX) {
1177
    // If no good uv-predictor had been found, search for it.
1178
0
    const TX_SIZE uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
1179
0
    av1_rd_pick_intra_sbuv_mode(cpi, x, &intra_search_state->rate_uv_intra,
1180
0
                                &intra_search_state->rate_uv_tokenonly,
1181
0
                                &intra_search_state->dist_uvs,
1182
0
                                &intra_search_state->skip_uvs, bsize, uv_tx);
1183
0
    intra_search_state->mode_uv = mbmi->uv_mode;
1184
0
    if (try_palette) intra_search_state->pmi_uv = *pmi;
1185
0
    intra_search_state->uv_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV];
1186
1187
0
    const int uv_rate = intra_search_state->rate_uv_tokenonly;
1188
0
    const int64_t uv_dist = intra_search_state->dist_uvs;
1189
0
    const int64_t uv_rd = RDCOST(x->rdmult, uv_rate, uv_dist);
1190
0
    if (uv_rd > best_rd) {
1191
      // If there is no good intra uv-mode available, we can skip all intra
1192
      // modes.
1193
0
      intra_search_state->skip_intra_modes = 1;
1194
0
      return 0;
1195
0
    }
1196
0
  }
1197
1198
  // If we are here, then the encoder has found at least one good intra uv
1199
  // predictor, so we can directly copy its statistics over.
1200
  // TODO(any): the stats here is not right if the best uv mode is CFL but the
1201
  // best y mode is palette.
1202
0
  rd_stats_uv->rate = intra_search_state->rate_uv_tokenonly;
1203
0
  rd_stats_uv->dist = intra_search_state->dist_uvs;
1204
0
  rd_stats_uv->skip_txfm = intra_search_state->skip_uvs;
1205
0
  rd_stats->skip_txfm = rd_stats_y->skip_txfm && rd_stats_uv->skip_txfm;
1206
0
  mbmi->uv_mode = intra_search_state->mode_uv;
1207
0
  if (try_palette) {
1208
0
    pmi->palette_size[1] = intra_search_state->pmi_uv.palette_size[1];
1209
0
    memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
1210
0
           intra_search_state->pmi_uv.palette_colors + PALETTE_MAX_SIZE,
1211
0
           2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
1212
0
  }
1213
0
  mbmi->angle_delta[PLANE_TYPE_UV] = intra_search_state->uv_angle_delta;
1214
1215
0
  return 1;
1216
0
}
1217
1218
// Finds the best non-intrabc mode on an intra frame.
1219
int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
1220
                                   int *rate, int *rate_tokenonly,
1221
                                   int64_t *distortion, int *skippable,
1222
                                   BLOCK_SIZE bsize, int64_t best_rd,
1223
112k
                                   PICK_MODE_CONTEXT *ctx) {
1224
112k
  MACROBLOCKD *const xd = &x->e_mbd;
1225
112k
  MB_MODE_INFO *const mbmi = xd->mi[0];
1226
112k
  assert(!is_inter_block(mbmi));
1227
112k
  int64_t best_model_rd = INT64_MAX;
1228
112k
  int is_directional_mode;
1229
112k
  uint8_t directional_mode_skip_mask[INTRA_MODES] = { 0 };
1230
  // Flag to check rd of any intra mode is better than best_rd passed to this
1231
  // function
1232
112k
  int beat_best_rd = 0;
1233
112k
  const int *bmode_costs;
1234
112k
  const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
1235
112k
  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
1236
112k
  const int try_palette =
1237
112k
      cpi->oxcf.tool_cfg.enable_palette &&
1238
112k
      av1_allow_palette(cpi->common.features.allow_screen_content_tools,
1239
112k
                        mbmi->bsize);
1240
112k
  uint8_t *best_palette_color_map =
1241
112k
      try_palette ? x->palette_buffer->best_palette_color_map : NULL;
1242
112k
  const MB_MODE_INFO *above_mi = xd->above_mbmi;
1243
112k
  const MB_MODE_INFO *left_mi = xd->left_mbmi;
1244
112k
  const PREDICTION_MODE A = av1_above_block_mode(above_mi);
1245
112k
  const PREDICTION_MODE L = av1_left_block_mode(left_mi);
1246
112k
  const int above_ctx = intra_mode_context[A];
1247
112k
  const int left_ctx = intra_mode_context[L];
1248
112k
  bmode_costs = x->mode_costs.y_mode_costs[above_ctx][left_ctx];
1249
1250
112k
  mbmi->angle_delta[PLANE_TYPE_Y] = 0;
1251
112k
  const INTRA_MODE_SPEED_FEATURES *const intra_sf = &cpi->sf.intra_sf;
1252
112k
  if (intra_sf->intra_pruning_with_hog) {
1253
    // Less aggressive thresholds are used here than those used in inter frame
1254
    // encoding in av1_handle_intra_y_mode() because we want key frames/intra
1255
    // frames to have higher quality.
1256
112k
    const float thresh[4] = { -1.2f, -1.2f, -0.6f, 0.4f };
1257
112k
    const int is_chroma = 0;
1258
112k
    prune_intra_mode_with_hog(x, bsize, cpi->common.seq_params->sb_size,
1259
112k
                              thresh[intra_sf->intra_pruning_with_hog - 1],
1260
112k
                              directional_mode_skip_mask, is_chroma);
1261
112k
  }
1262
112k
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
1263
112k
  pmi->palette_size[0] = 0;
1264
1265
  // Set params for mode evaluation
1266
112k
  set_mode_eval_params(cpi, x, MODE_EVAL);
1267
1268
112k
  MB_MODE_INFO best_mbmi = *mbmi;
1269
112k
  zero_winner_mode_stats(bsize, MAX_WINNER_MODE_COUNT_INTRA,
1270
112k
                         x->winner_mode_stats);
1271
112k
  x->winner_mode_count = 0;
1272
1273
  // Searches the intra-modes except for intrabc, palette, and filter_intra.
1274
112k
  int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
1275
560k
  for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
1276
448k
    top_intra_model_rd[i] = INT64_MAX;
1277
448k
  }
1278
6.95M
  for (int mode_idx = INTRA_MODE_START; mode_idx < LUMA_MODE_COUNT;
1279
6.83M
       ++mode_idx) {
1280
6.83M
    set_y_mode_and_delta_angle(mode_idx, mbmi);
1281
6.83M
    RD_STATS this_rd_stats;
1282
6.83M
    int this_rate, this_rate_tokenonly, s;
1283
6.83M
    int is_diagonal_mode;
1284
6.83M
    int64_t this_distortion, this_rd;
1285
1286
6.83M
    is_diagonal_mode = av1_is_diagonal_mode(mbmi->mode);
1287
6.83M
    if (is_diagonal_mode && !intra_mode_cfg->enable_diagonal_intra) continue;
1288
6.83M
    if (av1_is_directional_mode(mbmi->mode) &&
1289
6.83M
        !intra_mode_cfg->enable_directional_intra)
1290
0
      continue;
1291
1292
    // The smooth prediction mode appears to be more frequently picked
1293
    // than horizontal / vertical smooth prediction modes. Hence treat
1294
    // them differently in speed features.
1295
6.83M
    if ((!intra_mode_cfg->enable_smooth_intra ||
1296
6.83M
         intra_sf->disable_smooth_intra) &&
1297
6.83M
        (mbmi->mode == SMOOTH_H_PRED || mbmi->mode == SMOOTH_V_PRED))
1298
224k
      continue;
1299
6.61M
    if (!intra_mode_cfg->enable_smooth_intra && mbmi->mode == SMOOTH_PRED)
1300
0
      continue;
1301
1302
    // The functionality of filter intra modes and smooth prediction
1303
    // overlap. Hence smooth prediction is pruned only if all the
1304
    // filter intra modes are enabled.
1305
6.61M
    if (intra_sf->disable_smooth_intra &&
1306
6.61M
        intra_sf->prune_filter_intra_level == 0 && mbmi->mode == SMOOTH_PRED)
1307
0
      continue;
1308
6.61M
    if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
1309
0
      continue;
1310
1311
    // Skip the evaluation of modes that do not match with the winner mode in
1312
    // x->mb_mode_cache.
1313
6.61M
    if (x->use_mb_mode_cache && mbmi->mode != x->mb_mode_cache->mode) continue;
1314
1315
6.61M
    is_directional_mode = av1_is_directional_mode(mbmi->mode);
1316
6.61M
    if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
1317
5.04M
    if (is_directional_mode &&
1318
5.04M
        !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
1319
5.04M
        mbmi->angle_delta[PLANE_TYPE_Y] != 0)
1320
1.11M
      continue;
1321
1322
    // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
1323
3.92M
    if (!(intra_sf->intra_y_mode_mask[max_txsize_lookup[bsize]] &
1324
3.92M
          (1 << mbmi->mode)))
1325
0
      continue;
1326
1327
3.92M
    const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]);
1328
3.92M
    const int64_t this_model_rd =
1329
3.92M
        intra_model_rd(&cpi->common, x, 0, bsize, tx_size, /*use_hadamard=*/1);
1330
1331
3.92M
    const int model_rd_index_for_pruning =
1332
3.92M
        get_model_rd_index_for_pruning(x, intra_sf);
1333
1334
3.92M
    if (prune_intra_y_mode(this_model_rd, &best_model_rd, top_intra_model_rd,
1335
3.92M
                           intra_sf->top_intra_model_count_allowed,
1336
3.92M
                           model_rd_index_for_pruning))
1337
234k
      continue;
1338
1339
    // Builds the actual prediction. The prediction from
1340
    // model_intra_yrd_and_prune was just an estimation that did not take into
1341
    // account the effect of txfm pipeline, so we need to redo it for real
1342
    // here.
1343
3.69M
    av1_pick_uniform_tx_size_type_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
1344
3.69M
    this_rate_tokenonly = this_rd_stats.rate;
1345
3.69M
    this_distortion = this_rd_stats.dist;
1346
3.69M
    s = this_rd_stats.skip_txfm;
1347
1348
3.69M
    if (this_rate_tokenonly == INT_MAX) continue;
1349
1350
2.96M
    if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->bsize)) {
1351
      // av1_pick_uniform_tx_size_type_yrd above includes the cost of the
1352
      // tx_size in the tokenonly rate, but for intra blocks, tx_size is always
1353
      // coded (prediction granularity), so we account for it in the full rate,
1354
      // not the tokenonly rate.
1355
1.83M
      this_rate_tokenonly -= tx_size_cost(x, bsize, mbmi->tx_size);
1356
1.83M
    }
1357
2.96M
    this_rate =
1358
2.96M
        this_rd_stats.rate +
1359
2.96M
        intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]);
1360
2.96M
    this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
1361
1362
    // Visual quality adjustment based on recon vs source variance.
1363
2.96M
    if ((cpi->oxcf.mode == ALLINTRA) && (this_rd != INT64_MAX)) {
1364
2.96M
      this_rd = (int64_t)(this_rd * intra_rd_variance_factor(cpi, x, bsize));
1365
2.96M
    }
1366
1367
    // Collect mode stats for multiwinner mode processing
1368
2.96M
    const int txfm_search_done = 1;
1369
2.96M
    store_winner_mode_stats(
1370
2.96M
        &cpi->common, x, mbmi, NULL, NULL, NULL, 0, NULL, bsize, this_rd,
1371
2.96M
        cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
1372
2.96M
    if (this_rd < best_rd) {
1373
75.1k
      best_mbmi = *mbmi;
1374
75.1k
      best_rd = this_rd;
1375
      // Setting beat_best_rd flag because current mode rd is better than
1376
      // best_rd passed to this function
1377
75.1k
      beat_best_rd = 1;
1378
75.1k
      *rate = this_rate;
1379
75.1k
      *rate_tokenonly = this_rate_tokenonly;
1380
75.1k
      *distortion = this_distortion;
1381
75.1k
      *skippable = s;
1382
75.1k
      memcpy(ctx->blk_skip, x->txfm_search_info.blk_skip,
1383
75.1k
             sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
1384
75.1k
      av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
1385
75.1k
    }
1386
2.96M
  }
1387
1388
  // Searches palette
1389
112k
  if (try_palette) {
1390
0
    av1_rd_pick_palette_intra_sby(
1391
0
        cpi, x, bsize, bmode_costs[DC_PRED], &best_mbmi, best_palette_color_map,
1392
0
        &best_rd, rate, rate_tokenonly, distortion, skippable, &beat_best_rd,
1393
0
        ctx, ctx->blk_skip, ctx->tx_type_map);
1394
0
  }
1395
1396
  // Searches filter_intra
1397
112k
  if (beat_best_rd && av1_filter_intra_allowed_bsize(&cpi->common, bsize)) {
1398
57.2k
    if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
1399
57.2k
                                 skippable, bsize, bmode_costs[DC_PRED],
1400
57.2k
                                 best_mbmi.mode, &best_rd, &best_model_rd,
1401
57.2k
                                 ctx)) {
1402
1.97k
      best_mbmi = *mbmi;
1403
1.97k
    }
1404
57.2k
  }
1405
1406
  // No mode is identified with less rd value than best_rd passed to this
1407
  // function. In such cases winner mode processing is not necessary and return
1408
  // best_rd as INT64_MAX to indicate best mode is not identified
1409
112k
  if (!beat_best_rd) return INT64_MAX;
1410
1411
  // In multi-winner mode processing, perform tx search for few best modes
1412
  // identified during mode evaluation. Winner mode processing uses best tx
1413
  // configuration for tx search.
1414
68.3k
  if (cpi->sf.winner_mode_sf.multi_winner_mode_type) {
1415
68.3k
    int best_mode_idx = 0;
1416
68.3k
    int block_width, block_height;
1417
68.3k
    uint8_t *color_map_dst = xd->plane[PLANE_TYPE_Y].color_index_map;
1418
68.3k
    av1_get_block_dimensions(bsize, AOM_PLANE_Y, xd, &block_width,
1419
68.3k
                             &block_height, NULL, NULL);
1420
1421
204k
    for (int mode_idx = 0; mode_idx < x->winner_mode_count; mode_idx++) {
1422
136k
      *mbmi = x->winner_mode_stats[mode_idx].mbmi;
1423
136k
      if (is_winner_mode_processing_enabled(cpi, x, mbmi, mbmi->mode)) {
1424
        // Restore color_map of palette mode before winner mode processing
1425
136k
        if (mbmi->palette_mode_info.palette_size[0] > 0) {
1426
0
          uint8_t *color_map_src =
1427
0
              x->winner_mode_stats[mode_idx].color_index_map;
1428
0
          memcpy(color_map_dst, color_map_src,
1429
0
                 block_width * block_height * sizeof(*color_map_src));
1430
0
        }
1431
        // Set params for winner mode evaluation
1432
136k
        set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
1433
1434
        // Winner mode processing
1435
        // If previous searches use only the default tx type/no R-D optimization
1436
        // of quantized coeffs, do an extra search for the best tx type/better
1437
        // R-D optimization of quantized coeffs
1438
136k
        if (intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate,
1439
136k
                            rate_tokenonly, distortion, skippable, &best_mbmi,
1440
136k
                            ctx))
1441
28.7k
          best_mode_idx = mode_idx;
1442
136k
      }
1443
136k
    }
1444
    // Copy color_map of palette mode for final winner mode
1445
68.3k
    if (best_mbmi.palette_mode_info.palette_size[0] > 0) {
1446
0
      uint8_t *color_map_src =
1447
0
          x->winner_mode_stats[best_mode_idx].color_index_map;
1448
0
      memcpy(color_map_dst, color_map_src,
1449
0
             block_width * block_height * sizeof(*color_map_src));
1450
0
    }
1451
18.4E
  } else {
1452
    // If previous searches use only the default tx type/no R-D optimization of
1453
    // quantized coeffs, do an extra search for the best tx type/better R-D
1454
    // optimization of quantized coeffs
1455
18.4E
    if (is_winner_mode_processing_enabled(cpi, x, mbmi, best_mbmi.mode)) {
1456
      // Set params for winner mode evaluation
1457
0
      set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
1458
0
      *mbmi = best_mbmi;
1459
0
      intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate,
1460
0
                      rate_tokenonly, distortion, skippable, &best_mbmi, ctx);
1461
0
    }
1462
18.4E
  }
1463
68.3k
  *mbmi = best_mbmi;
1464
68.3k
  av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
1465
68.3k
  return best_rd;
1466
112k
}