Coverage Report

Created: 2022-08-24 06:17

/src/aom/av1/common/reconintra.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <math.h>
13
14
#include "config/aom_config.h"
15
#include "config/aom_dsp_rtcd.h"
16
#include "config/av1_rtcd.h"
17
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_mem/aom_mem.h"
20
#include "aom_ports/aom_once.h"
21
#include "aom_ports/mem.h"
22
#include "av1/common/av1_common_int.h"
23
#include "av1/common/cfl.h"
24
#include "av1/common/reconintra.h"
25
26
enum {
27
  NEED_LEFT = 1 << 1,
28
  NEED_ABOVE = 1 << 2,
29
  NEED_ABOVERIGHT = 1 << 3,
30
  NEED_ABOVELEFT = 1 << 4,
31
  NEED_BOTTOMLEFT = 1 << 5,
32
};
33
34
#define INTRA_EDGE_FILT 3
35
1.60G
#define INTRA_EDGE_TAPS 5
36
#define MAX_UPSAMPLE_SZ 16
37
129M
#define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
38
39
static const uint8_t extend_modes[INTRA_MODES] = {
40
  NEED_ABOVE | NEED_LEFT,                   // DC
41
  NEED_ABOVE,                               // V
42
  NEED_LEFT,                                // H
43
  NEED_ABOVE | NEED_ABOVERIGHT,             // D45
44
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
45
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
46
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
47
  NEED_LEFT | NEED_BOTTOMLEFT,              // D203
48
  NEED_ABOVE | NEED_ABOVERIGHT,             // D67
49
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH
50
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
51
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
52
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
53
};
54
55
// Tables to store if the top-right reference pixels are available. The flags
56
// are represented with bits, packed into 8-bit integers. E.g., for the 32x32
57
// blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
58
// order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
59
// i.e. (table[10 / 8] >> (10 % 8)) & 1.
60
//       . . . .
61
//       . . . .
62
//       . . o .
63
//       . . . .
64
static uint8_t has_tr_4x4[128] = {
65
  255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
66
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69
  255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73
};
74
static uint8_t has_tr_4x8[64] = {
75
  255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
76
  119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
77
  127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
78
  119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
79
  119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
80
};
81
static uint8_t has_tr_8x4[64] = {
82
  255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
83
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84
  255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86
};
87
static uint8_t has_tr_8x8[32] = {
88
  255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
89
  255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90
};
91
static uint8_t has_tr_8x16[16] = {
92
  255, 255, 119, 119, 127, 127, 119, 119,
93
  255, 127, 119, 119, 127, 127, 119, 119,
94
};
95
static uint8_t has_tr_16x8[16] = {
96
  255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
97
};
98
static uint8_t has_tr_16x16[8] = {
99
  255, 85, 119, 85, 127, 85, 119, 85,
100
};
101
static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
102
static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
103
static uint8_t has_tr_32x32[2] = { 95, 87 };
104
static uint8_t has_tr_32x64[1] = { 127 };
105
static uint8_t has_tr_64x32[1] = { 19 };
106
static uint8_t has_tr_64x64[1] = { 7 };
107
static uint8_t has_tr_64x128[1] = { 3 };
108
static uint8_t has_tr_128x64[1] = { 1 };
109
static uint8_t has_tr_128x128[1] = { 1 };
110
static uint8_t has_tr_4x16[32] = {
111
  255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
112
  127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
113
  127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
114
};
115
static uint8_t has_tr_16x4[32] = {
116
  255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
117
  127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118
};
119
static uint8_t has_tr_8x32[8] = {
120
  255, 255, 127, 127, 255, 127, 127, 127,
121
};
122
static uint8_t has_tr_32x8[8] = {
123
  15, 0, 5, 0, 7, 0, 5, 0,
124
};
125
static uint8_t has_tr_16x64[2] = { 255, 127 };
126
static uint8_t has_tr_64x16[2] = { 3, 1 };
127
128
static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
129
  // 4X4
130
  has_tr_4x4,
131
  // 4X8,       8X4,            8X8
132
  has_tr_4x8, has_tr_8x4, has_tr_8x8,
133
  // 8X16,      16X8,           16X16
134
  has_tr_8x16, has_tr_16x8, has_tr_16x16,
135
  // 16X32,     32X16,          32X32
136
  has_tr_16x32, has_tr_32x16, has_tr_32x32,
137
  // 32X64,     64X32,          64X64
138
  has_tr_32x64, has_tr_64x32, has_tr_64x64,
139
  // 64x128,    128x64,         128x128
140
  has_tr_64x128, has_tr_128x64, has_tr_128x128,
141
  // 4x16,      16x4,            8x32
142
  has_tr_4x16, has_tr_16x4, has_tr_8x32,
143
  // 32x8,      16x64,           64x16
144
  has_tr_32x8, has_tr_16x64, has_tr_64x16
145
};
146
147
static uint8_t has_tr_vert_8x8[32] = {
148
  255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
149
  255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150
};
151
static uint8_t has_tr_vert_16x16[8] = {
152
  255, 0, 119, 0, 127, 0, 119, 0,
153
};
154
static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
155
static uint8_t has_tr_vert_64x64[1] = { 3 };
156
157
// The _vert_* tables are like the ordinary tables above, but describe the
158
// order we visit square blocks when doing a PARTITION_VERT_A or
159
// PARTITION_VERT_B. This is the same order as normal except for on the last
160
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
161
// as a pair of squares, which means that these tables work correctly for both
162
// mixed vertical partition types.
163
//
164
// There are tables for each of the square sizes. Vertical rectangles (like
165
// BLOCK_16X32) use their respective "non-vert" table
166
static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
167
  // 4X4
168
  NULL,
169
  // 4X8,      8X4,         8X8
170
  has_tr_4x8, NULL, has_tr_vert_8x8,
171
  // 8X16,     16X8,        16X16
172
  has_tr_8x16, NULL, has_tr_vert_16x16,
173
  // 16X32,    32X16,       32X32
174
  has_tr_16x32, NULL, has_tr_vert_32x32,
175
  // 32X64,    64X32,       64X64
176
  has_tr_32x64, NULL, has_tr_vert_64x64,
177
  // 64x128,   128x64,      128x128
178
  has_tr_64x128, NULL, has_tr_128x128
179
};
180
181
static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
182
7.03M
                                       BLOCK_SIZE bsize) {
183
7.03M
  const uint8_t *ret = NULL;
184
  // If this is a mixed vertical partition, look up bsize in orders_vert.
185
7.03M
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
186
501k
    assert(bsize < BLOCK_SIZES);
187
501k
    ret = has_tr_vert_tables[bsize];
188
6.53M
  } else {
189
6.53M
    ret = has_tr_tables[bsize];
190
6.53M
  }
191
7.03M
  assert(ret);
192
7.03M
  return ret;
193
7.03M
}
194
195
static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
196
                         int mi_col, int top_available, int right_available,
197
                         PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
198
64.8M
                         int col_off, int ss_x, int ss_y) {
199
64.8M
  if (!top_available || !right_available) return 0;
200
201
56.7M
  const int bw_unit = mi_size_wide[bsize];
202
56.7M
  const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
203
56.7M
  const int top_right_count_unit = tx_size_wide_unit[txsz];
204
205
56.7M
  if (row_off > 0) {  // Just need to check if enough pixels on the right.
206
37.1M
    if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
207
      // Special case: For 128x128 blocks, the transform unit whose
208
      // top-right corner is at the center of the block does in fact have
209
      // pixels available at its top-right corner.
210
621k
      if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
211
621k
          col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
212
79.6k
        return 1;
213
79.6k
      }
214
541k
      const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
215
541k
      const int col_off_64 = col_off % plane_bw_unit_64;
216
541k
      return col_off_64 + top_right_count_unit < plane_bw_unit_64;
217
621k
    }
218
36.4M
    return col_off + top_right_count_unit < plane_bw_unit;
219
37.1M
  } else {
220
    // All top-right pixels are in the block above, which is already available.
221
19.6M
    if (col_off + top_right_count_unit < plane_bw_unit) return 1;
222
223
14.9M
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
224
14.9M
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
225
14.9M
    const int sb_mi_size = mi_size_high[sb_size];
226
14.9M
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
227
14.9M
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
228
229
    // Top row of superblock: so top-right pixels are in the top and/or
230
    // top-right superblocks, both of which are already available.
231
14.9M
    if (blk_row_in_sb == 0) return 1;
232
233
    // Rightmost column of superblock (and not the top row): so top-right pixels
234
    // fall in the right superblock, which is not available yet.
235
8.26M
    if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
236
1.52M
      return 0;
237
1.52M
    }
238
239
    // General case (neither top row nor rightmost column): check if the
240
    // top-right block is coded before the current block.
241
6.73M
    const int this_blk_index =
242
6.73M
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
243
6.73M
        blk_col_in_sb + 0;
244
6.73M
    const int idx1 = this_blk_index / 8;
245
6.73M
    const int idx2 = this_blk_index % 8;
246
6.73M
    const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
247
6.73M
    return (has_tr_table[idx1] >> idx2) & 1;
248
8.26M
  }
249
56.7M
}
250
251
// Similar to the has_tr_* tables, but store if the bottom-left reference
252
// pixels are available.
253
static uint8_t has_bl_4x4[128] = {
254
  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
255
  85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
256
  17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
257
  85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
258
  0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
259
  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
260
  85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
261
};
262
static uint8_t has_bl_4x8[64] = {
263
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
264
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
265
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
266
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
267
};
268
static uint8_t has_bl_8x4[64] = {
269
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
270
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
271
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
272
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
273
};
274
static uint8_t has_bl_8x8[32] = {
275
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
276
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277
};
278
static uint8_t has_bl_8x16[16] = {
279
  16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
280
};
281
static uint8_t has_bl_16x8[16] = {
282
  254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
283
};
284
static uint8_t has_bl_16x16[8] = {
285
  84, 16, 84, 0, 84, 16, 84, 0,
286
};
287
static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
288
static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
289
static uint8_t has_bl_32x32[2] = { 4, 4 };
290
static uint8_t has_bl_32x64[1] = { 0 };
291
static uint8_t has_bl_64x32[1] = { 34 };
292
static uint8_t has_bl_64x64[1] = { 0 };
293
static uint8_t has_bl_64x128[1] = { 0 };
294
static uint8_t has_bl_128x64[1] = { 0 };
295
static uint8_t has_bl_128x128[1] = { 0 };
296
static uint8_t has_bl_4x16[32] = {
297
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
298
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299
};
300
static uint8_t has_bl_16x4[32] = {
301
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
302
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303
};
304
static uint8_t has_bl_8x32[8] = {
305
  0, 1, 0, 0, 0, 1, 0, 0,
306
};
307
static uint8_t has_bl_32x8[8] = {
308
  238, 78, 238, 14, 238, 78, 238, 14,
309
};
310
static uint8_t has_bl_16x64[2] = { 0, 0 };
311
static uint8_t has_bl_64x16[2] = { 42, 42 };
312
313
static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
314
  // 4X4
315
  has_bl_4x4,
316
  // 4X8,         8X4,         8X8
317
  has_bl_4x8, has_bl_8x4, has_bl_8x8,
318
  // 8X16,        16X8,        16X16
319
  has_bl_8x16, has_bl_16x8, has_bl_16x16,
320
  // 16X32,       32X16,       32X32
321
  has_bl_16x32, has_bl_32x16, has_bl_32x32,
322
  // 32X64,       64X32,       64X64
323
  has_bl_32x64, has_bl_64x32, has_bl_64x64,
324
  // 64x128,      128x64,      128x128
325
  has_bl_64x128, has_bl_128x64, has_bl_128x128,
326
  // 4x16,        16x4,        8x32
327
  has_bl_4x16, has_bl_16x4, has_bl_8x32,
328
  // 32x8,        16x64,       64x16
329
  has_bl_32x8, has_bl_16x64, has_bl_64x16
330
};
331
332
static uint8_t has_bl_vert_8x8[32] = {
333
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
334
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335
};
336
static uint8_t has_bl_vert_16x16[8] = {
337
  254, 16, 254, 0, 254, 16, 254, 0,
338
};
339
static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
340
static uint8_t has_bl_vert_64x64[1] = { 2 };
341
342
// The _vert_* tables are like the ordinary tables above, but describe the
343
// order we visit square blocks when doing a PARTITION_VERT_A or
344
// PARTITION_VERT_B. This is the same order as normal except for on the last
345
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
346
// as a pair of squares, which means that these tables work correctly for both
347
// mixed vertical partition types.
348
//
349
// There are tables for each of the square sizes. Vertical rectangles (like
350
// BLOCK_16X32) use their respective "non-vert" table
351
static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
352
  // 4X4
353
  NULL,
354
  // 4X8,     8X4,         8X8
355
  has_bl_4x8, NULL, has_bl_vert_8x8,
356
  // 8X16,    16X8,        16X16
357
  has_bl_8x16, NULL, has_bl_vert_16x16,
358
  // 16X32,   32X16,       32X32
359
  has_bl_16x32, NULL, has_bl_vert_32x32,
360
  // 32X64,   64X32,       64X64
361
  has_bl_32x64, NULL, has_bl_vert_64x64,
362
  // 64x128,  128x64,      128x128
363
  has_bl_64x128, NULL, has_bl_128x128
364
};
365
366
static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
367
7.30M
                                       BLOCK_SIZE bsize) {
368
7.30M
  const uint8_t *ret = NULL;
369
  // If this is a mixed vertical partition, look up bsize in orders_vert.
370
7.30M
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
371
527k
    assert(bsize < BLOCK_SIZES);
372
527k
    ret = has_bl_vert_tables[bsize];
373
6.77M
  } else {
374
6.77M
    ret = has_bl_tables[bsize];
375
6.77M
  }
376
7.30M
  assert(ret);
377
7.30M
  return ret;
378
7.30M
}
379
380
static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
381
                           int mi_col, int bottom_available, int left_available,
382
                           PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
383
64.9M
                           int col_off, int ss_x, int ss_y) {
384
64.9M
  if (!bottom_available || !left_available) return 0;
385
386
  // Special case for 128x* blocks, when col_off is half the block width.
387
  // This is needed because 128x* superblocks are divided into 64x* blocks in
388
  // raster order
389
57.8M
  if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
390
627k
    const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
391
627k
    const int col_off_64 = col_off % plane_bw_unit_64;
392
627k
    if (col_off_64 == 0) {
393
      // We are at the left edge of top-right or bottom-right 64x* block.
394
191k
      const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
395
191k
      const int row_off_64 = row_off % plane_bh_unit_64;
396
191k
      const int plane_bh_unit =
397
191k
          AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
398
      // Check if all bottom-left pixels are in the left 64x* block (which is
399
      // already coded).
400
191k
      return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
401
191k
    }
402
627k
  }
403
404
57.6M
  if (col_off > 0) {
405
    // Bottom-left pixels are in the bottom-left block, which is not available.
406
36.9M
    return 0;
407
36.9M
  } else {
408
20.7M
    const int bh_unit = mi_size_high[bsize];
409
20.7M
    const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
410
20.7M
    const int bottom_left_count_unit = tx_size_high_unit[txsz];
411
412
    // All bottom-left pixels are in the left block, which is already available.
413
20.7M
    if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
414
415
15.9M
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
416
15.9M
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
417
15.9M
    const int sb_mi_size = mi_size_high[sb_size];
418
15.9M
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
419
15.9M
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
420
421
    // Leftmost column of superblock: so bottom-left pixels maybe in the left
422
    // and/or bottom-left superblocks. But only the left superblock is
423
    // available, so check if all required pixels fall in that superblock.
424
15.9M
    if (blk_col_in_sb == 0) {
425
5.70M
      const int blk_start_row_off =
426
5.70M
          blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
427
5.70M
          ss_y;
428
5.70M
      const int row_off_in_sb = blk_start_row_off + row_off;
429
5.70M
      const int sb_height_unit = sb_mi_size >> ss_y;
430
5.70M
      return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
431
5.70M
    }
432
433
    // Bottom row of superblock (and not the leftmost column): so bottom-left
434
    // pixels fall in the bottom superblock, which is not available yet.
435
10.2M
    if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
436
437
    // General case (neither leftmost column nor bottom row): check if the
438
    // bottom-left block is coded before the current block.
439
7.08M
    const int this_blk_index =
440
7.08M
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
441
7.08M
        blk_col_in_sb + 0;
442
7.08M
    const int idx1 = this_blk_index / 8;
443
7.08M
    const int idx2 = this_blk_index % 8;
444
7.08M
    const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
445
7.08M
    return (has_bl_table[idx1] >> idx2) & 1;
446
10.2M
  }
447
57.6M
}
448
449
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
450
                              const uint8_t *above, const uint8_t *left);
451
452
static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
453
static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
454
455
#if CONFIG_AV1_HIGHBITDEPTH
456
typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
457
                                   const uint16_t *above, const uint16_t *left,
458
                                   int bd);
459
static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
460
static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
461
#endif
462
463
2
static void init_intra_predictors_internal(void) {
464
2
  assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
465
466
#if CONFIG_REALTIME_ONLY
467
#define INIT_RECTANGULAR(p, type)             \
468
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
469
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
470
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
471
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
472
  p[TX_16X32] = aom_##type##_predictor_16x32; \
473
  p[TX_32X16] = aom_##type##_predictor_32x16; \
474
  p[TX_32X64] = aom_##type##_predictor_32x64; \
475
  p[TX_64X32] = aom_##type##_predictor_64x32;
476
#else
477
2
#define INIT_RECTANGULAR(p, type)             \
478
40
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
479
40
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
480
40
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
481
40
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
482
40
  p[TX_16X32] = aom_##type##_predictor_16x32; \
483
40
  p[TX_32X16] = aom_##type##_predictor_32x16; \
484
40
  p[TX_32X64] = aom_##type##_predictor_32x64; \
485
40
  p[TX_64X32] = aom_##type##_predictor_64x32; \
486
40
  p[TX_4X16] = aom_##type##_predictor_4x16;   \
487
40
  p[TX_16X4] = aom_##type##_predictor_16x4;   \
488
40
  p[TX_8X32] = aom_##type##_predictor_8x32;   \
489
40
  p[TX_32X8] = aom_##type##_predictor_32x8;   \
490
40
  p[TX_16X64] = aom_##type##_predictor_16x64; \
491
40
  p[TX_64X16] = aom_##type##_predictor_64x16;
492
2
#endif
493
494
2
#define INIT_NO_4X4(p, type)                  \
495
40
  p[TX_8X8] = aom_##type##_predictor_8x8;     \
496
40
  p[TX_16X16] = aom_##type##_predictor_16x16; \
497
40
  p[TX_32X32] = aom_##type##_predictor_32x32; \
498
40
  p[TX_64X64] = aom_##type##_predictor_64x64; \
499
40
  INIT_RECTANGULAR(p, type)
500
501
2
#define INIT_ALL_SIZES(p, type)           \
502
40
  p[TX_4X4] = aom_##type##_predictor_4x4; \
503
40
  INIT_NO_4X4(p, type)
504
505
2
  INIT_ALL_SIZES(pred[V_PRED], v);
506
2
  INIT_ALL_SIZES(pred[H_PRED], h);
507
2
  INIT_ALL_SIZES(pred[PAETH_PRED], paeth);
508
2
  INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth);
509
2
  INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v);
510
2
  INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h);
511
2
  INIT_ALL_SIZES(dc_pred[0][0], dc_128);
512
2
  INIT_ALL_SIZES(dc_pred[0][1], dc_top);
513
2
  INIT_ALL_SIZES(dc_pred[1][0], dc_left);
514
2
  INIT_ALL_SIZES(dc_pred[1][1], dc);
515
2
#if CONFIG_AV1_HIGHBITDEPTH
516
2
  INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
517
2
  INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
518
2
  INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth);
519
2
  INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth);
520
2
  INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v);
521
2
  INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h);
522
2
  INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
523
2
  INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
524
2
  INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
525
2
  INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
526
2
#endif
527
2
#undef intra_pred_allsizes
528
2
}
529
530
// Directional prediction, zone 1: 0 < angle < 90
531
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
532
                            const uint8_t *above, const uint8_t *left,
533
10.4M
                            int upsample_above, int dx, int dy) {
534
10.4M
  int r, c, x, base, shift, val;
535
536
10.4M
  (void)left;
537
10.4M
  (void)dy;
538
10.4M
  assert(dy == 1);
539
10.4M
  assert(dx > 0);
540
541
10.4M
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
542
10.4M
  const int frac_bits = 6 - upsample_above;
543
10.4M
  const int base_inc = 1 << upsample_above;
544
10.4M
  x = dx;
545
74.8M
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
546
64.3M
    base = x >> frac_bits;
547
64.3M
    shift = ((x << upsample_above) & 0x3F) >> 1;
548
549
64.3M
    if (base >= max_base_x) {
550
9.15k
      for (int i = r; i < bh; ++i) {
551
6.22k
        memset(dst, above[max_base_x], bw * sizeof(dst[0]));
552
6.22k
        dst += stride;
553
6.22k
      }
554
2.93k
      return;
555
2.93k
    }
556
557
959M
    for (c = 0; c < bw; ++c, base += base_inc) {
558
895M
      if (base < max_base_x) {
559
895M
        val = above[base] * (32 - shift) + above[base + 1] * shift;
560
895M
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
561
895M
      } else {
562
358k
        dst[c] = above[max_base_x];
563
358k
      }
564
895M
    }
565
64.3M
  }
566
10.4M
}
567
568
// Directional prediction, zone 2: 90 < angle < 180
569
void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
570
                            const uint8_t *above, const uint8_t *left,
571
                            int upsample_above, int upsample_left, int dx,
572
20.6M
                            int dy) {
573
20.6M
  assert(dx > 0);
574
20.6M
  assert(dy > 0);
575
576
20.6M
  const int min_base_x = -(1 << upsample_above);
577
20.6M
  const int min_base_y = -(1 << upsample_left);
578
20.6M
  (void)min_base_y;
579
20.6M
  const int frac_bits_x = 6 - upsample_above;
580
20.6M
  const int frac_bits_y = 6 - upsample_left;
581
582
159M
  for (int r = 0; r < bh; ++r) {
583
2.31G
    for (int c = 0; c < bw; ++c) {
584
2.17G
      int val;
585
2.17G
      int y = r + 1;
586
2.17G
      int x = (c << 6) - y * dx;
587
2.17G
      const int base_x = x >> frac_bits_x;
588
2.17G
      if (base_x >= min_base_x) {
589
1.09G
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
590
1.09G
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
591
1.09G
        val = ROUND_POWER_OF_TWO(val, 5);
592
1.09G
      } else {
593
1.07G
        x = c + 1;
594
1.07G
        y = (r << 6) - x * dy;
595
1.07G
        const int base_y = y >> frac_bits_y;
596
1.07G
        assert(base_y >= min_base_y);
597
1.07G
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
598
1.07G
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
599
1.07G
        val = ROUND_POWER_OF_TWO(val, 5);
600
1.07G
      }
601
2.17G
      dst[c] = val;
602
2.17G
    }
603
139M
    dst += stride;
604
139M
  }
605
20.6M
}
606
607
// Directional prediction, zone 3: 180 < angle < 270
608
void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
609
                            const uint8_t *above, const uint8_t *left,
610
9.87M
                            int upsample_left, int dx, int dy) {
611
9.87M
  int r, c, y, base, shift, val;
612
613
9.87M
  (void)above;
614
9.87M
  (void)dx;
615
616
9.87M
  assert(dx == 1);
617
9.87M
  assert(dy > 0);
618
619
9.87M
  const int max_base_y = (bw + bh - 1) << upsample_left;
620
9.87M
  const int frac_bits = 6 - upsample_left;
621
9.87M
  const int base_inc = 1 << upsample_left;
622
9.87M
  y = dy;
623
72.6M
  for (c = 0; c < bw; ++c, y += dy) {
624
62.7M
    base = y >> frac_bits;
625
62.7M
    shift = ((y << upsample_left) & 0x3F) >> 1;
626
627
979M
    for (r = 0; r < bh; ++r, base += base_inc) {
628
917M
      if (base < max_base_y) {
629
917M
        val = left[base] * (32 - shift) + left[base + 1] * shift;
630
917M
        dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5);
631
18.4E
      } else {
632
18.4E
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
633
18.4E
        break;
634
18.4E
      }
635
917M
    }
636
62.7M
  }
637
9.87M
}
638
639
static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
640
                         const uint8_t *above, const uint8_t *left,
641
43.1M
                         int upsample_above, int upsample_left, int angle) {
642
43.1M
  const int dx = av1_get_dx(angle);
643
43.1M
  const int dy = av1_get_dy(angle);
644
43.1M
  const int bw = tx_size_wide[tx_size];
645
43.1M
  const int bh = tx_size_high[tx_size];
646
43.1M
  assert(angle > 0 && angle < 270);
647
648
43.2M
  if (angle > 0 && angle < 90) {
649
10.4M
    av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
650
10.4M
                         dy);
651
32.6M
  } else if (angle > 90 && angle < 180) {
652
20.6M
    av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
653
20.6M
                         upsample_left, dx, dy);
654
20.6M
  } else if (angle > 180 && angle < 270) {
655
9.85M
    av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
656
9.85M
                         dy);
657
9.85M
  } else if (angle == 90) {
658
1.20M
    pred[V_PRED][tx_size](dst, stride, above, left);
659
1.30M
  } else if (angle == 180) {
660
1.30M
    pred[H_PRED][tx_size](dst, stride, above, left);
661
1.30M
  }
662
43.1M
}
663
664
#if CONFIG_AV1_HIGHBITDEPTH
665
// Directional prediction, zone 1: 0 < angle < 90
666
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
667
                                   int bh, const uint16_t *above,
668
                                   const uint16_t *left, int upsample_above,
669
185k
                                   int dx, int dy, int bd) {
670
185k
  int r, c, x, base, shift, val;
671
672
185k
  (void)left;
673
185k
  (void)dy;
674
185k
  (void)bd;
675
185k
  assert(dy == 1);
676
185k
  assert(dx > 0);
677
678
185k
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
679
185k
  const int frac_bits = 6 - upsample_above;
680
185k
  const int base_inc = 1 << upsample_above;
681
185k
  x = dx;
682
2.11M
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
683
1.93M
    base = x >> frac_bits;
684
1.93M
    shift = ((x << upsample_above) & 0x3F) >> 1;
685
686
1.93M
    if (base >= max_base_x) {
687
5.79k
      for (int i = r; i < bh; ++i) {
688
4.00k
        aom_memset16(dst, above[max_base_x], bw);
689
4.00k
        dst += stride;
690
4.00k
      }
691
1.78k
      return;
692
1.78k
    }
693
694
37.1M
    for (c = 0; c < bw; ++c, base += base_inc) {
695
35.1M
      if (base < max_base_x) {
696
34.9M
        val = above[base] * (32 - shift) + above[base + 1] * shift;
697
34.9M
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
698
34.9M
      } else {
699
277k
        dst[c] = above[max_base_x];
700
277k
      }
701
35.1M
    }
702
1.92M
  }
703
185k
}
704
705
// Directional prediction, zone 2: 90 < angle < 180
706
void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
707
                                   int bh, const uint16_t *above,
708
                                   const uint16_t *left, int upsample_above,
709
343k
                                   int upsample_left, int dx, int dy, int bd) {
710
343k
  (void)bd;
711
343k
  assert(dx > 0);
712
343k
  assert(dy > 0);
713
714
343k
  const int min_base_x = -(1 << upsample_above);
715
343k
  const int min_base_y = -(1 << upsample_left);
716
343k
  (void)min_base_y;
717
343k
  const int frac_bits_x = 6 - upsample_above;
718
343k
  const int frac_bits_y = 6 - upsample_left;
719
720
4.51M
  for (int r = 0; r < bh; ++r) {
721
86.7M
    for (int c = 0; c < bw; ++c) {
722
82.6M
      int val;
723
82.6M
      int y = r + 1;
724
82.6M
      int x = (c << 6) - y * dx;
725
82.6M
      const int base_x = x >> frac_bits_x;
726
82.6M
      if (base_x >= min_base_x) {
727
36.7M
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
728
36.7M
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
729
36.7M
        val = ROUND_POWER_OF_TWO(val, 5);
730
45.9M
      } else {
731
45.9M
        x = c + 1;
732
45.9M
        y = (r << 6) - x * dy;
733
45.9M
        const int base_y = y >> frac_bits_y;
734
45.9M
        assert(base_y >= min_base_y);
735
45.9M
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
736
45.9M
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
737
45.9M
        val = ROUND_POWER_OF_TWO(val, 5);
738
45.9M
      }
739
82.6M
      dst[c] = val;
740
82.6M
    }
741
4.17M
    dst += stride;
742
4.17M
  }
743
343k
}
744
745
// Directional prediction, zone 3: 180 < angle < 270
746
void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
747
                                   int bh, const uint16_t *above,
748
                                   const uint16_t *left, int upsample_left,
749
220k
                                   int dx, int dy, int bd) {
750
220k
  int r, c, y, base, shift, val;
751
752
220k
  (void)above;
753
220k
  (void)dx;
754
220k
  (void)bd;
755
220k
  assert(dx == 1);
756
220k
  assert(dy > 0);
757
758
220k
  const int max_base_y = (bw + bh - 1) << upsample_left;
759
220k
  const int frac_bits = 6 - upsample_left;
760
220k
  const int base_inc = 1 << upsample_left;
761
220k
  y = dy;
762
2.84M
  for (c = 0; c < bw; ++c, y += dy) {
763
2.62M
    base = y >> frac_bits;
764
2.62M
    shift = ((y << upsample_left) & 0x3F) >> 1;
765
766
53.6M
    for (r = 0; r < bh; ++r, base += base_inc) {
767
50.9M
      if (base < max_base_y) {
768
50.9M
        val = left[base] * (32 - shift) + left[base + 1] * shift;
769
50.9M
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
770
50.9M
      } else {
771
0
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
772
0
        break;
773
0
      }
774
50.9M
    }
775
2.62M
  }
776
220k
}
777
778
static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
779
                                TX_SIZE tx_size, const uint16_t *above,
780
                                const uint16_t *left, int upsample_above,
781
1.03M
                                int upsample_left, int angle, int bd) {
782
1.03M
  const int dx = av1_get_dx(angle);
783
1.03M
  const int dy = av1_get_dy(angle);
784
1.03M
  const int bw = tx_size_wide[tx_size];
785
1.03M
  const int bh = tx_size_high[tx_size];
786
1.03M
  assert(angle > 0 && angle < 270);
787
788
1.03M
  if (angle > 0 && angle < 90) {
789
185k
    av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
790
185k
                                upsample_above, dx, dy, bd);
791
852k
  } else if (angle > 90 && angle < 180) {
792
343k
    av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
793
343k
                                upsample_above, upsample_left, dx, dy, bd);
794
509k
  } else if (angle > 180 && angle < 270) {
795
220k
    av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
796
220k
                                dx, dy, bd);
797
288k
  } else if (angle == 90) {
798
106k
    pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
799
181k
  } else if (angle == 180) {
800
181k
    pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
801
181k
  }
802
1.03M
}
803
#endif  // CONFIG_AV1_HIGHBITDEPTH
804
805
DECLARE_ALIGNED(16, const int8_t,
806
                av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
807
  {
808
      { -6, 10, 0, 0, 0, 12, 0, 0 },
809
      { -5, 2, 10, 0, 0, 9, 0, 0 },
810
      { -3, 1, 1, 10, 0, 7, 0, 0 },
811
      { -3, 1, 1, 2, 10, 5, 0, 0 },
812
      { -4, 6, 0, 0, 0, 2, 12, 0 },
813
      { -3, 2, 6, 0, 0, 2, 9, 0 },
814
      { -3, 2, 2, 6, 0, 2, 7, 0 },
815
      { -3, 1, 2, 2, 6, 3, 5, 0 },
816
  },
817
  {
818
      { -10, 16, 0, 0, 0, 10, 0, 0 },
819
      { -6, 0, 16, 0, 0, 6, 0, 0 },
820
      { -4, 0, 0, 16, 0, 4, 0, 0 },
821
      { -2, 0, 0, 0, 16, 2, 0, 0 },
822
      { -10, 16, 0, 0, 0, 0, 10, 0 },
823
      { -6, 0, 16, 0, 0, 0, 6, 0 },
824
      { -4, 0, 0, 16, 0, 0, 4, 0 },
825
      { -2, 0, 0, 0, 16, 0, 2, 0 },
826
  },
827
  {
828
      { -8, 8, 0, 0, 0, 16, 0, 0 },
829
      { -8, 0, 8, 0, 0, 16, 0, 0 },
830
      { -8, 0, 0, 8, 0, 16, 0, 0 },
831
      { -8, 0, 0, 0, 8, 16, 0, 0 },
832
      { -4, 4, 0, 0, 0, 0, 16, 0 },
833
      { -4, 0, 4, 0, 0, 0, 16, 0 },
834
      { -4, 0, 0, 4, 0, 0, 16, 0 },
835
      { -4, 0, 0, 0, 4, 0, 16, 0 },
836
  },
837
  {
838
      { -2, 8, 0, 0, 0, 10, 0, 0 },
839
      { -1, 3, 8, 0, 0, 6, 0, 0 },
840
      { -1, 2, 3, 8, 0, 4, 0, 0 },
841
      { 0, 1, 2, 3, 8, 2, 0, 0 },
842
      { -1, 4, 0, 0, 0, 3, 10, 0 },
843
      { -1, 3, 4, 0, 0, 4, 6, 0 },
844
      { -1, 2, 3, 4, 0, 4, 4, 0 },
845
      { -1, 2, 2, 3, 4, 3, 3, 0 },
846
  },
847
  {
848
      { -12, 14, 0, 0, 0, 14, 0, 0 },
849
      { -10, 0, 14, 0, 0, 12, 0, 0 },
850
      { -9, 0, 0, 14, 0, 11, 0, 0 },
851
      { -8, 0, 0, 0, 14, 10, 0, 0 },
852
      { -10, 12, 0, 0, 0, 0, 14, 0 },
853
      { -9, 1, 12, 0, 0, 0, 12, 0 },
854
      { -8, 0, 0, 12, 0, 1, 11, 0 },
855
      { -7, 0, 0, 1, 12, 1, 9, 0 },
856
  },
857
};
858
859
void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
860
                                  TX_SIZE tx_size, const uint8_t *above,
861
777k
                                  const uint8_t *left, int mode) {
862
777k
  int r, c;
863
777k
  uint8_t buffer[33][33];
864
777k
  const int bw = tx_size_wide[tx_size];
865
777k
  const int bh = tx_size_high[tx_size];
866
867
777k
  assert(bw <= 32 && bh <= 32);
868
869
6.60M
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
870
777k
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
871
872
3.68M
  for (r = 1; r < bh + 1; r += 2)
873
11.0M
    for (c = 1; c < bw + 1; c += 4) {
874
8.17M
      const uint8_t p0 = buffer[r - 1][c - 1];
875
8.17M
      const uint8_t p1 = buffer[r - 1][c];
876
8.17M
      const uint8_t p2 = buffer[r - 1][c + 1];
877
8.17M
      const uint8_t p3 = buffer[r - 1][c + 2];
878
8.17M
      const uint8_t p4 = buffer[r - 1][c + 3];
879
8.17M
      const uint8_t p5 = buffer[r][c - 1];
880
8.17M
      const uint8_t p6 = buffer[r + 1][c - 1];
881
73.5M
      for (int k = 0; k < 8; ++k) {
882
65.4M
        int r_offset = k >> 2;
883
65.4M
        int c_offset = k & 0x03;
884
65.4M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
885
65.4M
                 av1_filter_intra_taps[mode][k][1] * p1 +
886
65.4M
                 av1_filter_intra_taps[mode][k][2] * p2 +
887
65.4M
                 av1_filter_intra_taps[mode][k][3] * p3 +
888
65.4M
                 av1_filter_intra_taps[mode][k][4] * p4 +
889
65.4M
                 av1_filter_intra_taps[mode][k][5] * p5 +
890
65.4M
                 av1_filter_intra_taps[mode][k][6] * p6;
891
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
892
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
893
        // Since Clip1() clips a negative value to 0, it is safe to replace
894
        // Round2Signed() with Round2().
895
65.4M
        buffer[r + r_offset][c + c_offset] =
896
65.4M
            clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
897
65.4M
      }
898
8.17M
    }
899
900
6.60M
  for (r = 0; r < bh; ++r) {
901
5.82M
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
902
5.82M
    dst += stride;
903
5.82M
  }
904
777k
}
905
906
#if CONFIG_AV1_HIGHBITDEPTH
907
static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
908
                                          TX_SIZE tx_size,
909
                                          const uint16_t *above,
910
                                          const uint16_t *left, int mode,
911
180k
                                          int bd) {
912
180k
  int r, c;
913
180k
  uint16_t buffer[33][33];
914
180k
  const int bw = tx_size_wide[tx_size];
915
180k
  const int bh = tx_size_high[tx_size];
916
917
180k
  assert(bw <= 32 && bh <= 32);
918
919
1.90M
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
920
180k
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
921
922
1.04M
  for (r = 1; r < bh + 1; r += 2)
923
3.61M
    for (c = 1; c < bw + 1; c += 4) {
924
2.75M
      const uint16_t p0 = buffer[r - 1][c - 1];
925
2.75M
      const uint16_t p1 = buffer[r - 1][c];
926
2.75M
      const uint16_t p2 = buffer[r - 1][c + 1];
927
2.75M
      const uint16_t p3 = buffer[r - 1][c + 2];
928
2.75M
      const uint16_t p4 = buffer[r - 1][c + 3];
929
2.75M
      const uint16_t p5 = buffer[r][c - 1];
930
2.75M
      const uint16_t p6 = buffer[r + 1][c - 1];
931
24.7M
      for (int k = 0; k < 8; ++k) {
932
22.0M
        int r_offset = k >> 2;
933
22.0M
        int c_offset = k & 0x03;
934
22.0M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
935
22.0M
                 av1_filter_intra_taps[mode][k][1] * p1 +
936
22.0M
                 av1_filter_intra_taps[mode][k][2] * p2 +
937
22.0M
                 av1_filter_intra_taps[mode][k][3] * p3 +
938
22.0M
                 av1_filter_intra_taps[mode][k][4] * p4 +
939
22.0M
                 av1_filter_intra_taps[mode][k][5] * p5 +
940
22.0M
                 av1_filter_intra_taps[mode][k][6] * p6;
941
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
942
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
943
        // Since Clip1() clips a negative value to 0, it is safe to replace
944
        // Round2Signed() with Round2().
945
22.0M
        buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
946
22.0M
            ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
947
22.0M
      }
948
2.75M
    }
949
950
1.90M
  for (r = 0; r < bh; ++r) {
951
1.72M
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
952
1.72M
    dst += stride;
953
1.72M
  }
954
180k
}
955
#endif  // CONFIG_AV1_HIGHBITDEPTH
956
957
100M
static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
958
100M
  if (plane == 0) {
959
83.7M
    const PREDICTION_MODE mode = mbmi->mode;
960
83.7M
    return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
961
83.7M
            mode == SMOOTH_H_PRED);
962
83.7M
  } else {
963
    // uv_mode is not set for inter blocks, so need to explicitly
964
    // detect that case.
965
16.6M
    if (is_inter_block(mbmi)) return 0;
966
967
16.5M
    const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
968
16.5M
    return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
969
16.5M
            uv_mode == UV_SMOOTH_H_PRED);
970
16.6M
  }
971
100M
}
972
973
64.9M
static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
974
64.9M
  int ab_sm, le_sm;
975
976
64.9M
  if (plane == 0) {
977
55.8M
    const MB_MODE_INFO *ab = xd->above_mbmi;
978
55.8M
    const MB_MODE_INFO *le = xd->left_mbmi;
979
55.8M
    ab_sm = ab ? is_smooth(ab, plane) : 0;
980
55.8M
    le_sm = le ? is_smooth(le, plane) : 0;
981
55.8M
  } else {
982
9.11M
    const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
983
9.11M
    const MB_MODE_INFO *le = xd->chroma_left_mbmi;
984
9.11M
    ab_sm = ab ? is_smooth(ab, plane) : 0;
985
9.11M
    le_sm = le ? is_smooth(le, plane) : 0;
986
9.11M
  }
987
988
64.9M
  return (ab_sm || le_sm) ? 1 : 0;
989
64.9M
}
990
991
59.5M
static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
992
59.5M
  const int d = abs(delta);
993
59.5M
  int strength = 0;
994
995
59.5M
  const int blk_wh = bs0 + bs1;
996
59.5M
  if (type == 0) {
997
59.0M
    if (blk_wh <= 8) {
998
48.9M
      if (d >= 56) strength = 1;
999
48.9M
    } else if (blk_wh <= 12) {
1000
326k
      if (d >= 40) strength = 1;
1001
9.68M
    } else if (blk_wh <= 16) {
1002
4.03M
      if (d >= 40) strength = 1;
1003
5.65M
    } else if (blk_wh <= 24) {
1004
633k
      if (d >= 8) strength = 1;
1005
633k
      if (d >= 16) strength = 2;
1006
633k
      if (d >= 32) strength = 3;
1007
5.01M
    } else if (blk_wh <= 32) {
1008
1.94M
      if (d >= 1) strength = 1;
1009
1.94M
      if (d >= 4) strength = 2;
1010
1.94M
      if (d >= 32) strength = 3;
1011
3.07M
    } else {
1012
3.18M
      if (d >= 1) strength = 3;
1013
3.07M
    }
1014
59.0M
  } else {
1015
575k
    if (blk_wh <= 8) {
1016
157k
      if (d >= 40) strength = 1;
1017
157k
      if (d >= 64) strength = 2;
1018
417k
    } else if (blk_wh <= 16) {
1019
208k
      if (d >= 20) strength = 1;
1020
208k
      if (d >= 48) strength = 2;
1021
209k
    } else if (blk_wh <= 24) {
1022
90.3k
      if (d >= 4) strength = 3;
1023
119k
    } else {
1024
119k
      if (d >= 1) strength = 3;
1025
119k
    }
1026
575k
  }
1027
59.5M
  return strength;
1028
59.5M
}
1029
1030
58.6M
void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1031
58.6M
  if (!strength) return;
1032
1033
22.8M
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1034
22.8M
                                                         { 0, 5, 6, 5, 0 },
1035
22.8M
                                                         { 2, 4, 4, 4, 2 } };
1036
22.8M
  const int filt = strength - 1;
1037
22.8M
  uint8_t edge[129];
1038
1039
22.8M
  memcpy(edge, p, sz * sizeof(*p));
1040
280M
  for (int i = 1; i < sz; i++) {
1041
257M
    int s = 0;
1042
1.54G
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1043
1.28G
      int k = i - 2 + j;
1044
1.28G
      k = (k < 0) ? 0 : k;
1045
1.28G
      k = (k > sz - 1) ? sz - 1 : k;
1046
1.28G
      s += edge[k] * kernel[filt][j];
1047
1.28G
    }
1048
257M
    s = (s + 8) >> 4;
1049
257M
    p[i] = s;
1050
257M
  }
1051
22.8M
}
1052
1053
2.34M
static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1054
2.34M
  const int kernel[3] = { 5, 6, 5 };
1055
1056
2.34M
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1057
2.34M
          (p_above[0] * kernel[2]);
1058
2.34M
  s = (s + 8) >> 4;
1059
2.34M
  p_above[-1] = s;
1060
2.34M
  p_left[-1] = s;
1061
2.34M
}
1062
1063
858k
void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
1064
858k
  if (!strength) return;
1065
1066
516k
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1067
516k
                                                         { 0, 5, 6, 5, 0 },
1068
516k
                                                         { 2, 4, 4, 4, 2 } };
1069
516k
  const int filt = strength - 1;
1070
516k
  uint16_t edge[129];
1071
1072
516k
  memcpy(edge, p, sz * sizeof(*p));
1073
10.8M
  for (int i = 1; i < sz; i++) {
1074
10.3M
    int s = 0;
1075
61.9M
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1076
51.6M
      int k = i - 2 + j;
1077
51.6M
      k = (k < 0) ? 0 : k;
1078
51.6M
      k = (k > sz - 1) ? sz - 1 : k;
1079
51.6M
      s += edge[k] * kernel[filt][j];
1080
51.6M
    }
1081
10.3M
    s = (s + 8) >> 4;
1082
10.3M
    p[i] = s;
1083
10.3M
  }
1084
516k
}
1085
1086
#if CONFIG_AV1_HIGHBITDEPTH
1087
100k
static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
1088
100k
  const int kernel[3] = { 5, 6, 5 };
1089
1090
100k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1091
100k
          (p_above[0] * kernel[2]);
1092
100k
  s = (s + 8) >> 4;
1093
100k
  p_above[-1] = s;
1094
100k
  p_left[-1] = s;
1095
100k
}
1096
#endif
1097
1098
36.3M
void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1099
  // interpolate half-sample positions
1100
36.3M
  assert(sz <= MAX_UPSAMPLE_SZ);
1101
1102
36.3M
  uint8_t in[MAX_UPSAMPLE_SZ + 3];
1103
  // copy p[-1..(sz-1)] and extend first and last samples
1104
36.3M
  in[0] = p[-1];
1105
36.3M
  in[1] = p[-1];
1106
271M
  for (int i = 0; i < sz; i++) {
1107
235M
    in[i + 2] = p[i];
1108
235M
  }
1109
36.3M
  in[sz + 2] = p[sz - 1];
1110
1111
  // interpolate half-sample edge positions
1112
36.3M
  p[-2] = in[0];
1113
271M
  for (int i = 0; i < sz; i++) {
1114
235M
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1115
235M
    s = clip_pixel((s + 8) >> 4);
1116
235M
    p[2 * i - 1] = s;
1117
235M
    p[2 * i] = in[i + 2];
1118
235M
  }
1119
36.3M
}
1120
1121
275k
void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
1122
  // interpolate half-sample positions
1123
275k
  assert(sz <= MAX_UPSAMPLE_SZ);
1124
1125
275k
  uint16_t in[MAX_UPSAMPLE_SZ + 3];
1126
  // copy p[-1..(sz-1)] and extend first and last samples
1127
275k
  in[0] = p[-1];
1128
275k
  in[1] = p[-1];
1129
2.54M
  for (int i = 0; i < sz; i++) {
1130
2.27M
    in[i + 2] = p[i];
1131
2.27M
  }
1132
275k
  in[sz + 2] = p[sz - 1];
1133
1134
  // interpolate half-sample edge positions
1135
275k
  p[-2] = in[0];
1136
2.54M
  for (int i = 0; i < sz; i++) {
1137
2.27M
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1138
2.27M
    s = (s + 8) >> 4;
1139
2.27M
    s = clip_pixel_highbd(s, bd);
1140
2.27M
    p[2 * i - 1] = s;
1141
2.27M
    p[2 * i] = in[i + 2];
1142
2.27M
  }
1143
275k
}
1144
#if CONFIG_AV1_HIGHBITDEPTH
1145
static void build_intra_predictors_high(
1146
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1147
    PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode,
1148
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1149
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1150
5.11M
    int bit_depth) {
1151
5.11M
  int i;
1152
5.11M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1153
5.11M
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1154
5.11M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1155
5.11M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1156
5.11M
  uint16_t *const above_row = above_data + 16;
1157
5.11M
  uint16_t *const left_col = left_data + 16;
1158
5.11M
  const int txwpx = tx_size_wide[tx_size];
1159
5.11M
  const int txhpx = tx_size_high[tx_size];
1160
5.11M
  int need_left = extend_modes[mode] & NEED_LEFT;
1161
5.11M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1162
5.11M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1163
5.11M
  const uint16_t *above_ref = ref - ref_stride;
1164
5.11M
  const uint16_t *left_ref = ref - 1;
1165
5.11M
  int p_angle = 0;
1166
5.11M
  const int is_dr_mode = av1_is_directional_mode(mode);
1167
5.11M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1168
5.11M
  int base = 128 << (bit_depth - 8);
1169
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1170
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1171
  // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1172
  // seen to be the potential reason for this issue.
1173
5.11M
  aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1174
5.11M
  aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1175
1176
  // The default values if ref pixels are not available:
1177
  // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1178
  // base+1   A      B  ..     Y      Z
1179
  // base+1   C      D  ..     W      X
1180
  // base+1   E      F  ..     U      V
1181
  // base+1   G      H  ..     S      T      T      T      T      T
1182
1183
5.11M
  if (is_dr_mode) {
1184
1.06M
    p_angle = mode_to_angle_map[mode] + angle_delta;
1185
1.06M
    if (p_angle <= 90)
1186
315k
      need_above = 1, need_left = 0, need_above_left = 1;
1187
745k
    else if (p_angle < 180)
1188
343k
      need_above = 1, need_left = 1, need_above_left = 1;
1189
402k
    else
1190
402k
      need_above = 0, need_left = 1, need_above_left = 1;
1191
1.06M
  }
1192
5.11M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1193
1194
5.11M
  assert(n_top_px >= 0);
1195
5.11M
  assert(n_topright_px >= 0);
1196
5.11M
  assert(n_left_px >= 0);
1197
5.11M
  assert(n_bottomleft_px >= 0);
1198
1199
5.11M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1200
23.2k
    int val;
1201
23.2k
    if (need_left) {
1202
540
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1203
22.7k
    } else {
1204
22.7k
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1205
22.7k
    }
1206
638k
    for (i = 0; i < txhpx; ++i) {
1207
614k
      aom_memset16(dst, val, txwpx);
1208
614k
      dst += dst_stride;
1209
614k
    }
1210
23.2k
    return;
1211
23.2k
  }
1212
1213
  // NEED_LEFT
1214
5.08M
  if (need_left) {
1215
4.79M
    int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
1216
4.79M
    if (use_filter_intra) need_bottom = 0;
1217
4.79M
    if (is_dr_mode) need_bottom = p_angle > 180;
1218
4.79M
    const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1219
4.79M
    i = 0;
1220
4.79M
    if (n_left_px > 0) {
1221
81.3M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1222
4.77M
      if (need_bottom && n_bottomleft_px > 0) {
1223
78.0k
        assert(i == txhpx);
1224
760k
        for (; i < txhpx + n_bottomleft_px; i++)
1225
682k
          left_col[i] = left_ref[i * ref_stride];
1226
78.0k
      }
1227
4.77M
      if (i < num_left_pixels_needed)
1228
190k
        aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1229
4.77M
    } else if (n_top_px > 0) {
1230
15.2k
      aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1231
15.2k
    }
1232
4.79M
  }
1233
1234
  // NEED_ABOVE
1235
5.08M
  if (need_above) {
1236
4.68M
    int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
1237
4.68M
    if (use_filter_intra) need_right = 0;
1238
4.68M
    if (is_dr_mode) need_right = p_angle < 90;
1239
4.68M
    const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1240
4.68M
    if (n_top_px > 0) {
1241
4.36M
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1242
4.36M
      i = n_top_px;
1243
4.36M
      if (need_right && n_topright_px > 0) {
1244
120k
        assert(n_top_px == txwpx);
1245
120k
        memcpy(above_row + txwpx, above_ref + txwpx,
1246
120k
               n_topright_px * sizeof(above_ref[0]));
1247
120k
        i += n_topright_px;
1248
120k
      }
1249
4.36M
      if (i < num_top_pixels_needed)
1250
92.0k
        aom_memset16(&above_row[i], above_row[i - 1],
1251
92.0k
                     num_top_pixels_needed - i);
1252
4.36M
    } else if (n_left_px > 0) {
1253
322k
      aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1254
322k
    }
1255
4.68M
  }
1256
1257
5.08M
  if (need_above_left) {
1258
2.81M
    if (n_top_px > 0 && n_left_px > 0) {
1259
2.62M
      above_row[-1] = above_ref[-1];
1260
2.62M
    } else if (n_top_px > 0) {
1261
8.93k
      above_row[-1] = above_ref[0];
1262
181k
    } else if (n_left_px > 0) {
1263
180k
      above_row[-1] = left_ref[0];
1264
180k
    } else {
1265
434
      above_row[-1] = base;
1266
434
    }
1267
2.81M
    left_col[-1] = above_row[-1];
1268
2.81M
  }
1269
1270
5.08M
  if (use_filter_intra) {
1271
180k
    highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1272
180k
                                  filter_intra_mode, bit_depth);
1273
180k
    return;
1274
180k
  }
1275
1276
4.90M
  if (is_dr_mode) {
1277
1.03M
    int upsample_above = 0;
1278
1.03M
    int upsample_left = 0;
1279
1.03M
    if (!disable_edge_filter) {
1280
826k
      const int need_right = p_angle < 90;
1281
826k
      const int need_bottom = p_angle > 180;
1282
826k
      if (p_angle != 90 && p_angle != 180) {
1283
599k
        const int ab_le = need_above_left ? 1 : 0;
1284
599k
        if (need_above && need_left && (txwpx + txhpx >= 24)) {
1285
100k
          filter_intra_edge_corner_high(above_row, left_col);
1286
100k
        }
1287
599k
        if (need_above && n_top_px > 0) {
1288
413k
          const int strength = intra_edge_filter_strength(
1289
413k
              txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1290
413k
          const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1291
413k
          av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
1292
413k
        }
1293
599k
        if (need_left && n_left_px > 0) {
1294
444k
          const int strength = intra_edge_filter_strength(
1295
444k
              txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1296
444k
          const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1297
444k
          av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
1298
444k
        }
1299
599k
      }
1300
826k
      upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1301
826k
                                                   intra_edge_filter_type);
1302
826k
      if (need_above && upsample_above) {
1303
122k
        const int n_px = txwpx + (need_right ? txhpx : 0);
1304
122k
        av1_upsample_intra_edge_high(above_row, n_px, bit_depth);
1305
122k
      }
1306
826k
      upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1307
826k
                                                  intra_edge_filter_type);
1308
826k
      if (need_left && upsample_left) {
1309
153k
        const int n_px = txhpx + (need_bottom ? txwpx : 0);
1310
153k
        av1_upsample_intra_edge_high(left_col, n_px, bit_depth);
1311
153k
      }
1312
826k
    }
1313
1.03M
    highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1314
1.03M
                        upsample_above, upsample_left, p_angle, bit_depth);
1315
1.03M
    return;
1316
1.03M
  }
1317
1318
  // predict
1319
3.87M
  if (mode == DC_PRED) {
1320
1.67M
    dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1321
1.67M
        dst, dst_stride, above_row, left_col, bit_depth);
1322
2.20M
  } else {
1323
2.20M
    pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1324
2.20M
  }
1325
3.87M
}
1326
#endif  // CONFIG_AV1_HIGHBITDEPTH
1327
1328
static void build_intra_predictors(
1329
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1330
    PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode,
1331
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1332
59.6M
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1333
59.6M
  int i;
1334
59.6M
  const uint8_t *above_ref = ref - ref_stride;
1335
59.6M
  const uint8_t *left_ref = ref - 1;
1336
59.6M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1337
59.6M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1338
59.6M
  uint8_t *const above_row = above_data + 16;
1339
59.6M
  uint8_t *const left_col = left_data + 16;
1340
59.6M
  const int txwpx = tx_size_wide[tx_size];
1341
59.6M
  const int txhpx = tx_size_high[tx_size];
1342
59.6M
  int need_left = extend_modes[mode] & NEED_LEFT;
1343
59.6M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1344
59.6M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1345
59.6M
  int p_angle = 0;
1346
59.6M
  const int is_dr_mode = av1_is_directional_mode(mode);
1347
59.6M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1348
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1349
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1350
  // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1351
  // be the potential reason for this issue.
1352
59.6M
  memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1353
59.6M
  memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1354
1355
  // The default values if ref pixels are not available:
1356
  // 128 127 127 .. 127 127 127 127 127 127
1357
  // 129  A   B  ..  Y   Z
1358
  // 129  C   D  ..  W   X
1359
  // 129  E   F  ..  U   V
1360
  // 129  G   H  ..  S   T   T   T   T   T
1361
  // ..
1362
1363
59.6M
  if (is_dr_mode) {
1364
44.8M
    p_angle = mode_to_angle_map[mode] + angle_delta;
1365
44.8M
    if (p_angle <= 90)
1366
12.6M
      need_above = 1, need_left = 0, need_above_left = 1;
1367
32.1M
    else if (p_angle < 180)
1368
20.6M
      need_above = 1, need_left = 1, need_above_left = 1;
1369
11.5M
    else
1370
11.5M
      need_above = 0, need_left = 1, need_above_left = 1;
1371
44.8M
  }
1372
59.6M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1373
1374
59.6M
  assert(n_top_px >= 0);
1375
59.6M
  assert(n_topright_px >= 0);
1376
59.6M
  assert(n_left_px >= 0);
1377
59.6M
  assert(n_bottomleft_px >= 0);
1378
1379
59.6M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1380
1.60M
    int val;
1381
1.60M
    if (need_left) {
1382
662k
      val = (n_top_px > 0) ? above_ref[0] : 129;
1383
942k
    } else {
1384
942k
      val = (n_left_px > 0) ? left_ref[0] : 127;
1385
942k
    }
1386
22.9M
    for (i = 0; i < txhpx; ++i) {
1387
21.3M
      memset(dst, val, txwpx);
1388
21.3M
      dst += dst_stride;
1389
21.3M
    }
1390
1.60M
    return;
1391
1.60M
  }
1392
1393
  // NEED_LEFT
1394
58.0M
  if (need_left) {
1395
46.4M
    int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
1396
46.4M
    if (use_filter_intra) need_bottom = 0;
1397
46.4M
    if (is_dr_mode) need_bottom = p_angle > 180;
1398
46.4M
    const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1399
46.4M
    i = 0;
1400
46.4M
    if (n_left_px > 0) {
1401
396M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1402
44.7M
      if (need_bottom && n_bottomleft_px > 0) {
1403
1.57M
        assert(i == txhpx);
1404
15.1M
        for (; i < txhpx + n_bottomleft_px; i++)
1405
13.5M
          left_col[i] = left_ref[i * ref_stride];
1406
1.57M
      }
1407
44.7M
      if (i < num_left_pixels_needed)
1408
8.69M
        memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1409
44.7M
    } else if (n_top_px > 0) {
1410
1.47M
      memset(left_col, above_ref[0], num_left_pixels_needed);
1411
1.47M
    }
1412
46.4M
  }
1413
1414
  // NEED_ABOVE
1415
58.0M
  if (need_above) {
1416
47.0M
    int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
1417
47.0M
    if (use_filter_intra) need_right = 0;
1418
47.0M
    if (is_dr_mode) need_right = p_angle < 90;
1419
47.0M
    const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1420
47.0M
    if (n_top_px > 0) {
1421
44.6M
      memcpy(above_row, above_ref, n_top_px);
1422
44.6M
      i = n_top_px;
1423
44.6M
      if (need_right && n_topright_px > 0) {
1424
8.75M
        assert(n_top_px == txwpx);
1425
8.75M
        memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1426
8.75M
        i += n_topright_px;
1427
8.75M
      }
1428
44.6M
      if (i < num_top_pixels_needed)
1429
2.16M
        memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1430
44.6M
    } else if (n_left_px > 0) {
1431
2.13M
      memset(above_row, left_ref[0], num_top_pixels_needed);
1432
2.13M
    }
1433
47.0M
  }
1434
1435
58.0M
  if (need_above_left) {
1436
48.7M
    if (n_top_px > 0 && n_left_px > 0) {
1437
44.5M
      above_row[-1] = above_ref[-1];
1438
44.5M
    } else if (n_top_px > 0) {
1439
1.79M
      above_row[-1] = above_ref[0];
1440
2.47M
    } else if (n_left_px > 0) {
1441
2.36M
      above_row[-1] = left_ref[0];
1442
2.36M
    } else {
1443
108k
      above_row[-1] = 128;
1444
108k
    }
1445
48.7M
    left_col[-1] = above_row[-1];
1446
48.7M
  }
1447
1448
58.0M
  if (use_filter_intra) {
1449
777k
    av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1450
777k
                               filter_intra_mode);
1451
777k
    return;
1452
777k
  }
1453
1454
57.2M
  if (is_dr_mode) {
1455
43.3M
    int upsample_above = 0;
1456
43.3M
    int upsample_left = 0;
1457
43.3M
    if (!disable_edge_filter) {
1458
43.2M
      const int need_right = p_angle < 90;
1459
43.2M
      const int need_bottom = p_angle > 180;
1460
43.2M
      if (p_angle != 90 && p_angle != 180) {
1461
40.8M
        const int ab_le = need_above_left ? 1 : 0;
1462
40.8M
        if (need_above && need_left && (txwpx + txhpx >= 24)) {
1463
2.34M
          filter_intra_edge_corner(above_row, left_col);
1464
2.34M
        }
1465
40.8M
        if (need_above && n_top_px > 0) {
1466
29.6M
          const int strength = intra_edge_filter_strength(
1467
29.6M
              txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1468
29.6M
          const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1469
29.6M
          av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1470
29.6M
        }
1471
40.8M
        if (need_left && n_left_px > 0) {
1472
29.2M
          const int strength = intra_edge_filter_strength(
1473
29.2M
              txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1474
29.2M
          const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1475
29.2M
          av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1476
29.2M
        }
1477
40.8M
      }
1478
43.2M
      upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1479
43.2M
                                                   intra_edge_filter_type);
1480
43.2M
      if (need_above && upsample_above) {
1481
18.6M
        const int n_px = txwpx + (need_right ? txhpx : 0);
1482
18.6M
        av1_upsample_intra_edge(above_row, n_px);
1483
18.6M
      }
1484
43.2M
      upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1485
43.2M
                                                  intra_edge_filter_type);
1486
43.2M
      if (need_left && upsample_left) {
1487
17.9M
        const int n_px = txhpx + (need_bottom ? txwpx : 0);
1488
17.9M
        av1_upsample_intra_edge(left_col, n_px);
1489
17.9M
      }
1490
43.2M
    }
1491
43.3M
    dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1492
43.3M
                 upsample_left, p_angle);
1493
43.3M
    return;
1494
43.3M
  }
1495
1496
  // predict
1497
13.9M
  if (mode == DC_PRED) {
1498
6.36M
    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1499
6.36M
                                                  left_col);
1500
7.62M
  } else {
1501
7.62M
    pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1502
7.62M
  }
1503
13.9M
}
1504
1505
static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1506
9.15M
                                            int subsampling_y) {
1507
9.15M
  assert(subsampling_x >= 0 && subsampling_x < 2);
1508
9.15M
  assert(subsampling_y >= 0 && subsampling_y < 2);
1509
9.15M
  BLOCK_SIZE bs = bsize;
1510
9.15M
  switch (bsize) {
1511
113k
    case BLOCK_4X4:
1512
113k
      if (subsampling_x == 1 && subsampling_y == 1)
1513
113k
        bs = BLOCK_8X8;
1514
0
      else if (subsampling_x == 1)
1515
0
        bs = BLOCK_8X4;
1516
0
      else if (subsampling_y == 1)
1517
0
        bs = BLOCK_4X8;
1518
113k
      break;
1519
191k
    case BLOCK_4X8:
1520
191k
      if (subsampling_x == 1 && subsampling_y == 1)
1521
191k
        bs = BLOCK_8X8;
1522
0
      else if (subsampling_x == 1)
1523
0
        bs = BLOCK_8X8;
1524
0
      else if (subsampling_y == 1)
1525
0
        bs = BLOCK_4X8;
1526
191k
      break;
1527
247k
    case BLOCK_8X4:
1528
247k
      if (subsampling_x == 1 && subsampling_y == 1)
1529
247k
        bs = BLOCK_8X8;
1530
0
      else if (subsampling_x == 1)
1531
0
        bs = BLOCK_8X4;
1532
0
      else if (subsampling_y == 1)
1533
0
        bs = BLOCK_8X8;
1534
247k
      break;
1535
307k
    case BLOCK_4X16:
1536
307k
      if (subsampling_x == 1 && subsampling_y == 1)
1537
307k
        bs = BLOCK_8X16;
1538
0
      else if (subsampling_x == 1)
1539
0
        bs = BLOCK_8X16;
1540
0
      else if (subsampling_y == 1)
1541
0
        bs = BLOCK_4X16;
1542
307k
      break;
1543
321k
    case BLOCK_16X4:
1544
321k
      if (subsampling_x == 1 && subsampling_y == 1)
1545
321k
        bs = BLOCK_16X8;
1546
0
      else if (subsampling_x == 1)
1547
0
        bs = BLOCK_16X4;
1548
0
      else if (subsampling_y == 1)
1549
0
        bs = BLOCK_16X8;
1550
321k
      break;
1551
7.96M
    default: break;
1552
9.15M
  }
1553
9.14M
  return bs;
1554
9.15M
}
1555
1556
void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1557
                             int enable_intra_edge_filter, int wpx, int hpx,
1558
                             TX_SIZE tx_size, PREDICTION_MODE mode,
1559
                             int angle_delta, int use_palette,
1560
                             FILTER_INTRA_MODE filter_intra_mode,
1561
                             const uint8_t *ref, int ref_stride, uint8_t *dst,
1562
                             int dst_stride, int col_off, int row_off,
1563
64.7M
                             int plane) {
1564
64.7M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1565
64.7M
  const int txwpx = tx_size_wide[tx_size];
1566
64.7M
  const int txhpx = tx_size_high[tx_size];
1567
64.7M
  const int x = col_off << MI_SIZE_LOG2;
1568
64.7M
  const int y = row_off << MI_SIZE_LOG2;
1569
1570
64.7M
  if (use_palette) {
1571
33.7k
    int r, c;
1572
33.7k
    const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1573
33.7k
                               xd->color_index_map_offset[plane != 0];
1574
33.7k
    const uint16_t *const palette =
1575
33.7k
        mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1576
33.7k
    if (is_cur_buf_hbd(xd)) {
1577
4.26k
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1578
36.3k
      for (r = 0; r < txhpx; ++r) {
1579
409k
        for (c = 0; c < txwpx; ++c) {
1580
377k
          dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1581
377k
        }
1582
32.1k
      }
1583
29.5k
    } else {
1584
296k
      for (r = 0; r < txhpx; ++r) {
1585
3.91M
        for (c = 0; c < txwpx; ++c) {
1586
3.65M
          dst[r * dst_stride + c] =
1587
3.65M
              (uint8_t)palette[map[(r + y) * wpx + c + x]];
1588
3.65M
        }
1589
266k
      }
1590
29.5k
    }
1591
33.7k
    return;
1592
33.7k
  }
1593
1594
64.7M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
1595
64.7M
  const int txw = tx_size_wide_unit[tx_size];
1596
64.7M
  const int txh = tx_size_high_unit[tx_size];
1597
64.7M
  const int ss_x = pd->subsampling_x;
1598
64.7M
  const int ss_y = pd->subsampling_y;
1599
64.7M
  const int have_top =
1600
64.7M
      row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1601
64.7M
  const int have_left =
1602
64.7M
      col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1603
64.7M
  const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1604
64.7M
  const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1605
1606
  // Distance between the right edge of this prediction block to
1607
  // the frame right edge
1608
64.7M
  const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1609
  // Distance between the bottom edge of this prediction block to
1610
  // the frame bottom edge
1611
64.7M
  const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1612
64.7M
  const int right_available =
1613
64.7M
      mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1614
64.7M
  const int bottom_available =
1615
64.7M
      (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1616
1617
64.7M
  const PARTITION_TYPE partition = mbmi->partition;
1618
1619
64.7M
  BLOCK_SIZE bsize = mbmi->bsize;
1620
  // force 4x4 chroma component block size.
1621
64.7M
  if (ss_x || ss_y) {
1622
9.15M
    bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1623
9.15M
  }
1624
1625
64.7M
  const int have_top_right =
1626
64.7M
      has_top_right(sb_size, bsize, mi_row, mi_col, have_top, right_available,
1627
64.7M
                    partition, tx_size, row_off, col_off, ss_x, ss_y);
1628
64.7M
  const int have_bottom_left = has_bottom_left(
1629
64.7M
      sb_size, bsize, mi_row, mi_col, bottom_available, have_left, partition,
1630
64.7M
      tx_size, row_off, col_off, ss_x, ss_y);
1631
1632
64.7M
  const int disable_edge_filter = !enable_intra_edge_filter;
1633
64.7M
  const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1634
64.7M
#if CONFIG_AV1_HIGHBITDEPTH
1635
64.7M
  if (is_cur_buf_hbd(xd)) {
1636
5.11M
    build_intra_predictors_high(
1637
5.11M
        ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode,
1638
5.11M
        tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1639
5.11M
        have_top_right ? AOMMIN(txwpx, xr) : 0,
1640
5.11M
        have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1641
5.11M
        have_bottom_left ? AOMMIN(txhpx, yd) : 0, intra_edge_filter_type,
1642
5.11M
        xd->bd);
1643
5.11M
    return;
1644
5.11M
  }
1645
59.6M
#endif
1646
59.6M
  build_intra_predictors(
1647
59.6M
      ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode,
1648
59.6M
      tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1649
59.6M
      have_top_right ? AOMMIN(txwpx, xr) : 0,
1650
59.6M
      have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1651
59.6M
      have_bottom_left ? AOMMIN(txhpx, yd) : 0, intra_edge_filter_type);
1652
59.6M
}
1653
1654
void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1655
                                    int plane, int blk_col, int blk_row,
1656
65.2M
                                    TX_SIZE tx_size) {
1657
65.2M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1658
65.2M
  struct macroblockd_plane *const pd = &xd->plane[plane];
1659
65.2M
  const int dst_stride = pd->dst.stride;
1660
65.2M
  uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1661
65.2M
  const PREDICTION_MODE mode =
1662
65.2M
      (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1663
65.2M
  const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1664
65.2M
  const FILTER_INTRA_MODE filter_intra_mode =
1665
65.2M
      (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1666
65.2M
          ? mbmi->filter_intra_mode_info.filter_intra_mode
1667
65.2M
          : FILTER_INTRA_MODES;
1668
65.2M
  const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1669
65.2M
  const SequenceHeader *seq_params = cm->seq_params;
1670
1671
65.2M
  if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1672
#if CONFIG_DEBUG
1673
    assert(is_cfl_allowed(xd));
1674
    const BLOCK_SIZE plane_bsize =
1675
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1676
    (void)plane_bsize;
1677
    assert(plane_bsize < BLOCK_SIZES_ALL);
1678
    if (!xd->lossless[mbmi->segment_id]) {
1679
      assert(blk_col == 0);
1680
      assert(blk_row == 0);
1681
      assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1682
      assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1683
    }
1684
#endif
1685
2.47M
    CFL_CTX *const cfl = &xd->cfl;
1686
2.47M
    CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1687
2.47M
    if (cfl->dc_pred_is_cached[pred_plane] == 0) {
1688
1.93M
      av1_predict_intra_block(xd, seq_params->sb_size,
1689
1.93M
                              seq_params->enable_intra_edge_filter, pd->width,
1690
1.93M
                              pd->height, tx_size, mode, angle_delta,
1691
1.93M
                              use_palette, filter_intra_mode, dst, dst_stride,
1692
1.93M
                              dst, dst_stride, blk_col, blk_row, plane);
1693
1.93M
      if (cfl->use_dc_pred_cache) {
1694
77.0k
        cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1695
77.0k
        cfl->dc_pred_is_cached[pred_plane] = 1;
1696
77.0k
      }
1697
1.93M
    } else {
1698
539k
      cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1699
539k
    }
1700
2.47M
    cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1701
2.47M
    return;
1702
2.47M
  }
1703
62.7M
  av1_predict_intra_block(
1704
62.7M
      xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1705
62.7M
      pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1706
62.7M
      dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1707
62.7M
}
1708
1709
2
void av1_init_intra_predictors(void) {
1710
2
  aom_once(init_intra_predictors_internal);
1711
2
}