Coverage Report

Created: 2026-03-08 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/aom/av1/common/reconintra.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <math.h>
13
14
#include "config/aom_config.h"
15
#include "config/aom_dsp_rtcd.h"
16
#include "config/av1_rtcd.h"
17
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_mem/aom_mem.h"
20
#include "aom_ports/aom_once.h"
21
#include "aom_ports/mem.h"
22
#include "av1/common/av1_common_int.h"
23
#include "av1/common/cfl.h"
24
#include "av1/common/reconintra.h"
25
26
enum {
27
  NEED_LEFT = 1 << 1,
28
  NEED_ABOVE = 1 << 2,
29
  NEED_ABOVERIGHT = 1 << 3,
30
  NEED_ABOVELEFT = 1 << 4,
31
  NEED_BOTTOMLEFT = 1 << 5,
32
};
33
34
#define INTRA_EDGE_FILT 3
35
454M
#define INTRA_EDGE_TAPS 5
36
#define MAX_UPSAMPLE_SZ 16
37
53.5M
#define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
38
39
static const uint8_t extend_modes[INTRA_MODES] = {
40
  NEED_ABOVE | NEED_LEFT,                   // DC
41
  NEED_ABOVE,                               // V
42
  NEED_LEFT,                                // H
43
  NEED_ABOVE | NEED_ABOVERIGHT,             // D45
44
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
45
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
46
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
47
  NEED_LEFT | NEED_BOTTOMLEFT,              // D203
48
  NEED_ABOVE | NEED_ABOVERIGHT,             // D67
49
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH
50
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
51
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
52
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
53
};
54
55
// Tables to store if the top-right reference pixels are available. The flags
56
// are represented with bits, packed into 8-bit integers. E.g., for the 32x32
57
// blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
58
// order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
59
// i.e. (table[10 / 8] >> (10 % 8)) & 1.
60
//       . . . .
61
//       . . . .
62
//       . . o .
63
//       . . . .
64
static uint8_t has_tr_4x4[128] = {
65
  255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
66
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69
  255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73
};
74
static uint8_t has_tr_4x8[64] = {
75
  255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
76
  119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
77
  127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
78
  119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
79
  119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
80
};
81
static uint8_t has_tr_8x4[64] = {
82
  255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
83
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84
  255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86
};
87
static uint8_t has_tr_8x8[32] = {
88
  255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
89
  255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90
};
91
static uint8_t has_tr_8x16[16] = {
92
  255, 255, 119, 119, 127, 127, 119, 119,
93
  255, 127, 119, 119, 127, 127, 119, 119,
94
};
95
static uint8_t has_tr_16x8[16] = {
96
  255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
97
};
98
static uint8_t has_tr_16x16[8] = {
99
  255, 85, 119, 85, 127, 85, 119, 85,
100
};
101
static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
102
static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
103
static uint8_t has_tr_32x32[2] = { 95, 87 };
104
static uint8_t has_tr_32x64[1] = { 127 };
105
static uint8_t has_tr_64x32[1] = { 19 };
106
static uint8_t has_tr_64x64[1] = { 7 };
107
static uint8_t has_tr_64x128[1] = { 3 };
108
static uint8_t has_tr_128x64[1] = { 1 };
109
static uint8_t has_tr_128x128[1] = { 1 };
110
static uint8_t has_tr_4x16[32] = {
111
  255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
112
  127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
113
  127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
114
};
115
static uint8_t has_tr_16x4[32] = {
116
  255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
117
  127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118
};
119
static uint8_t has_tr_8x32[8] = {
120
  255, 255, 127, 127, 255, 127, 127, 127,
121
};
122
static uint8_t has_tr_32x8[8] = {
123
  15, 0, 5, 0, 7, 0, 5, 0,
124
};
125
static uint8_t has_tr_16x64[2] = { 255, 127 };
126
static uint8_t has_tr_64x16[2] = { 3, 1 };
127
128
static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
129
  // 4X4
130
  has_tr_4x4,
131
  // 4X8,       8X4,            8X8
132
  has_tr_4x8, has_tr_8x4, has_tr_8x8,
133
  // 8X16,      16X8,           16X16
134
  has_tr_8x16, has_tr_16x8, has_tr_16x16,
135
  // 16X32,     32X16,          32X32
136
  has_tr_16x32, has_tr_32x16, has_tr_32x32,
137
  // 32X64,     64X32,          64X64
138
  has_tr_32x64, has_tr_64x32, has_tr_64x64,
139
  // 64x128,    128x64,         128x128
140
  has_tr_64x128, has_tr_128x64, has_tr_128x128,
141
  // 4x16,      16x4,            8x32
142
  has_tr_4x16, has_tr_16x4, has_tr_8x32,
143
  // 32x8,      16x64,           64x16
144
  has_tr_32x8, has_tr_16x64, has_tr_64x16
145
};
146
147
static uint8_t has_tr_vert_8x8[32] = {
148
  255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
149
  255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150
};
151
static uint8_t has_tr_vert_16x16[8] = {
152
  255, 0, 119, 0, 127, 0, 119, 0,
153
};
154
static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
155
static uint8_t has_tr_vert_64x64[1] = { 3 };
156
157
// The _vert_* tables are like the ordinary tables above, but describe the
158
// order we visit square blocks when doing a PARTITION_VERT_A or
159
// PARTITION_VERT_B. This is the same order as normal except for on the last
160
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
161
// as a pair of squares, which means that these tables work correctly for both
162
// mixed vertical partition types.
163
//
164
// There are tables for each of the square sizes. Vertical rectangles (like
165
// BLOCK_16X32) use their respective "non-vert" table
166
static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
167
  // 4X4
168
  NULL,
169
  // 4X8,      8X4,         8X8
170
  has_tr_4x8, NULL, has_tr_vert_8x8,
171
  // 8X16,     16X8,        16X16
172
  has_tr_8x16, NULL, has_tr_vert_16x16,
173
  // 16X32,    32X16,       32X32
174
  has_tr_16x32, NULL, has_tr_vert_32x32,
175
  // 32X64,    64X32,       64X64
176
  has_tr_32x64, NULL, has_tr_vert_64x64,
177
  // 64x128,   128x64,      128x128
178
  has_tr_64x128, NULL, has_tr_128x128
179
};
180
181
static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
182
6.75M
                                       BLOCK_SIZE bsize) {
183
6.75M
  const uint8_t *ret = NULL;
184
  // If this is a mixed vertical partition, look up bsize in orders_vert.
185
6.75M
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
186
565k
    assert(bsize < BLOCK_SIZES);
187
565k
    ret = has_tr_vert_tables[bsize];
188
6.18M
  } else {
189
6.18M
    ret = has_tr_tables[bsize];
190
6.18M
  }
191
6.75M
  assert(ret);
192
6.75M
  return ret;
193
6.75M
}
194
195
static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
196
                         int mi_col, int top_available, int right_available,
197
                         PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
198
26.8M
                         int col_off, int ss_x, int ss_y) {
199
26.8M
  if (!top_available || !right_available) return 0;
200
201
24.6M
  const int bw_unit = mi_size_wide[bsize];
202
24.6M
  const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
203
24.6M
  const int top_right_count_unit = tx_size_wide_unit[txsz];
204
205
24.6M
  if (row_off > 0) {  // Just need to check if enough pixels on the right.
206
12.8M
    if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
207
      // Special case: For 128x128 blocks, the transform unit whose
208
      // top-right corner is at the center of the block does in fact have
209
      // pixels available at its top-right corner.
210
1.14M
      if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
211
273k
          col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
212
113k
        return 1;
213
113k
      }
214
1.02M
      const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
215
1.02M
      const int col_off_64 = col_off % plane_bw_unit_64;
216
1.02M
      return col_off_64 + top_right_count_unit < plane_bw_unit_64;
217
1.14M
    }
218
11.6M
    return col_off + top_right_count_unit < plane_bw_unit;
219
12.8M
  } else {
220
    // All top-right pixels are in the block above, which is already available.
221
11.8M
    if (col_off + top_right_count_unit < plane_bw_unit) return 1;
222
223
9.75M
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
224
9.75M
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
225
9.75M
    const int sb_mi_size = mi_size_high[sb_size];
226
9.75M
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
227
9.75M
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
228
229
    // Top row of superblock: so top-right pixels are in the top and/or
230
    // top-right superblocks, both of which are already available.
231
9.75M
    if (blk_row_in_sb == 0) return 1;
232
233
    // Rightmost column of superblock (and not the top row): so top-right pixels
234
    // fall in the right superblock, which is not available yet.
235
7.83M
    if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
236
1.18M
      return 0;
237
1.18M
    }
238
239
    // General case (neither top row nor rightmost column): check if the
240
    // top-right block is coded before the current block.
241
6.65M
    const int this_blk_index =
242
6.65M
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
243
6.65M
        blk_col_in_sb + 0;
244
6.65M
    const int idx1 = this_blk_index / 8;
245
6.65M
    const int idx2 = this_blk_index % 8;
246
6.65M
    const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
247
6.65M
    return (has_tr_table[idx1] >> idx2) & 1;
248
7.83M
  }
249
24.6M
}
250
251
// Similar to the has_tr_* tables, but store if the bottom-left reference
252
// pixels are available.
253
static uint8_t has_bl_4x4[128] = {
254
  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
255
  85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
256
  17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
257
  85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
258
  0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
259
  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
260
  85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
261
};
262
static uint8_t has_bl_4x8[64] = {
263
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
264
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
265
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
266
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
267
};
268
static uint8_t has_bl_8x4[64] = {
269
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
270
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
271
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
272
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
273
};
274
static uint8_t has_bl_8x8[32] = {
275
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
276
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277
};
278
static uint8_t has_bl_8x16[16] = {
279
  16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
280
};
281
static uint8_t has_bl_16x8[16] = {
282
  254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
283
};
284
static uint8_t has_bl_16x16[8] = {
285
  84, 16, 84, 0, 84, 16, 84, 0,
286
};
287
static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
288
static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
289
static uint8_t has_bl_32x32[2] = { 4, 4 };
290
static uint8_t has_bl_32x64[1] = { 0 };
291
static uint8_t has_bl_64x32[1] = { 34 };
292
static uint8_t has_bl_64x64[1] = { 0 };
293
static uint8_t has_bl_64x128[1] = { 0 };
294
static uint8_t has_bl_128x64[1] = { 0 };
295
static uint8_t has_bl_128x128[1] = { 0 };
296
static uint8_t has_bl_4x16[32] = {
297
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
298
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299
};
300
static uint8_t has_bl_16x4[32] = {
301
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
302
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303
};
304
static uint8_t has_bl_8x32[8] = {
305
  0, 1, 0, 0, 0, 1, 0, 0,
306
};
307
static uint8_t has_bl_32x8[8] = {
308
  238, 78, 238, 14, 238, 78, 238, 14,
309
};
310
static uint8_t has_bl_16x64[2] = { 0, 0 };
311
static uint8_t has_bl_64x16[2] = { 42, 42 };
312
313
static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
314
  // 4X4
315
  has_bl_4x4,
316
  // 4X8,         8X4,         8X8
317
  has_bl_4x8, has_bl_8x4, has_bl_8x8,
318
  // 8X16,        16X8,        16X16
319
  has_bl_8x16, has_bl_16x8, has_bl_16x16,
320
  // 16X32,       32X16,       32X32
321
  has_bl_16x32, has_bl_32x16, has_bl_32x32,
322
  // 32X64,       64X32,       64X64
323
  has_bl_32x64, has_bl_64x32, has_bl_64x64,
324
  // 64x128,      128x64,      128x128
325
  has_bl_64x128, has_bl_128x64, has_bl_128x128,
326
  // 4x16,        16x4,        8x32
327
  has_bl_4x16, has_bl_16x4, has_bl_8x32,
328
  // 32x8,        16x64,       64x16
329
  has_bl_32x8, has_bl_16x64, has_bl_64x16
330
};
331
332
static uint8_t has_bl_vert_8x8[32] = {
333
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
334
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335
};
336
static uint8_t has_bl_vert_16x16[8] = {
337
  254, 16, 254, 0, 254, 16, 254, 0,
338
};
339
static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
340
static uint8_t has_bl_vert_64x64[1] = { 2 };
341
342
// The _vert_* tables are like the ordinary tables above, but describe the
343
// order we visit square blocks when doing a PARTITION_VERT_A or
344
// PARTITION_VERT_B. This is the same order as normal except for on the last
345
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
346
// as a pair of squares, which means that these tables work correctly for both
347
// mixed vertical partition types.
348
//
349
// There are tables for each of the square sizes. Vertical rectangles (like
350
// BLOCK_16X32) use their respective "non-vert" table
351
static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
352
  // 4X4
353
  NULL,
354
  // 4X8,     8X4,         8X8
355
  has_bl_4x8, NULL, has_bl_vert_8x8,
356
  // 8X16,    16X8,        16X16
357
  has_bl_8x16, NULL, has_bl_vert_16x16,
358
  // 16X32,   32X16,       32X32
359
  has_bl_16x32, NULL, has_bl_vert_32x32,
360
  // 32X64,   64X32,       64X64
361
  has_bl_32x64, NULL, has_bl_vert_64x64,
362
  // 64x128,  128x64,      128x128
363
  has_bl_64x128, NULL, has_bl_128x128
364
};
365
366
static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
367
6.78M
                                       BLOCK_SIZE bsize) {
368
6.78M
  const uint8_t *ret = NULL;
369
  // If this is a mixed vertical partition, look up bsize in orders_vert.
370
6.78M
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
371
558k
    assert(bsize < BLOCK_SIZES);
372
558k
    ret = has_bl_vert_tables[bsize];
373
6.22M
  } else {
374
6.22M
    ret = has_bl_tables[bsize];
375
6.22M
  }
376
6.78M
  assert(ret);
377
6.78M
  return ret;
378
6.78M
}
379
380
static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
381
                           int mi_col, int bottom_available, int left_available,
382
                           PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
383
26.8M
                           int col_off, int ss_x, int ss_y) {
384
26.8M
  if (!bottom_available || !left_available) return 0;
385
386
  // Special case for 128x* blocks, when col_off is half the block width.
387
  // This is needed because 128x* superblocks are divided into 64x* blocks in
388
  // raster order
389
24.7M
  if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
390
1.14M
    const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
391
1.14M
    const int col_off_64 = col_off % plane_bw_unit_64;
392
1.14M
    if (col_off_64 == 0) {
393
      // We are at the left edge of top-right or bottom-right 64x* block.
394
274k
      const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
395
274k
      const int row_off_64 = row_off % plane_bh_unit_64;
396
274k
      const int plane_bh_unit =
397
274k
          AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
398
      // Check if all bottom-left pixels are in the left 64x* block (which is
399
      // already coded).
400
274k
      return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
401
274k
    }
402
1.14M
  }
403
404
24.4M
  if (col_off > 0) {
405
    // Bottom-left pixels are in the bottom-left block, which is not available.
406
12.5M
    return 0;
407
12.5M
  } else {
408
11.9M
    const int bh_unit = mi_size_high[bsize];
409
11.9M
    const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
410
11.9M
    const int bottom_left_count_unit = tx_size_high_unit[txsz];
411
412
    // All bottom-left pixels are in the left block, which is already available.
413
11.9M
    if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
414
415
9.82M
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
416
9.82M
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
417
9.82M
    const int sb_mi_size = mi_size_high[sb_size];
418
9.82M
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
419
9.82M
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
420
421
    // Leftmost column of superblock: so bottom-left pixels maybe in the left
422
    // and/or bottom-left superblocks. But only the left superblock is
423
    // available, so check if all required pixels fall in that superblock.
424
9.82M
    if (blk_col_in_sb == 0) {
425
1.94M
      const int blk_start_row_off =
426
1.94M
          blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
427
1.94M
          ss_y;
428
1.94M
      const int row_off_in_sb = blk_start_row_off + row_off;
429
1.94M
      const int sb_height_unit = sb_mi_size >> ss_y;
430
1.94M
      return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
431
1.94M
    }
432
433
    // Bottom row of superblock (and not the leftmost column): so bottom-left
434
    // pixels fall in the bottom superblock, which is not available yet.
435
7.88M
    if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
436
437
    // General case (neither leftmost column nor bottom row): check if the
438
    // bottom-left block is coded before the current block.
439
6.70M
    const int this_blk_index =
440
6.70M
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
441
6.70M
        blk_col_in_sb + 0;
442
6.70M
    const int idx1 = this_blk_index / 8;
443
6.70M
    const int idx2 = this_blk_index % 8;
444
6.70M
    const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
445
6.70M
    return (has_bl_table[idx1] >> idx2) & 1;
446
7.88M
  }
447
24.4M
}
448
449
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
450
                              const uint8_t *above, const uint8_t *left);
451
452
static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
453
static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
454
455
#if CONFIG_AV1_HIGHBITDEPTH
456
typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
457
                                   const uint16_t *above, const uint16_t *left,
458
                                   int bd);
459
static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
460
static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
461
#endif
462
463
2
static void init_intra_predictors_internal(void) {
464
2
  assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
465
466
#if CONFIG_REALTIME_ONLY
467
#define INIT_RECTANGULAR(p, type)             \
468
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
469
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
470
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
471
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
472
  p[TX_16X32] = aom_##type##_predictor_16x32; \
473
  p[TX_32X16] = aom_##type##_predictor_32x16; \
474
  p[TX_32X64] = aom_##type##_predictor_32x64; \
475
  p[TX_64X32] = aom_##type##_predictor_64x32;
476
#else
477
2
#define INIT_RECTANGULAR(p, type)             \
478
40
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
479
40
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
480
40
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
481
40
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
482
40
  p[TX_16X32] = aom_##type##_predictor_16x32; \
483
40
  p[TX_32X16] = aom_##type##_predictor_32x16; \
484
40
  p[TX_32X64] = aom_##type##_predictor_32x64; \
485
40
  p[TX_64X32] = aom_##type##_predictor_64x32; \
486
40
  p[TX_4X16] = aom_##type##_predictor_4x16;   \
487
40
  p[TX_16X4] = aom_##type##_predictor_16x4;   \
488
40
  p[TX_8X32] = aom_##type##_predictor_8x32;   \
489
40
  p[TX_32X8] = aom_##type##_predictor_32x8;   \
490
40
  p[TX_16X64] = aom_##type##_predictor_16x64; \
491
40
  p[TX_64X16] = aom_##type##_predictor_64x16;
492
2
#endif
493
494
2
#define INIT_NO_4X4(p, type)                  \
495
40
  p[TX_8X8] = aom_##type##_predictor_8x8;     \
496
40
  p[TX_16X16] = aom_##type##_predictor_16x16; \
497
40
  p[TX_32X32] = aom_##type##_predictor_32x32; \
498
40
  p[TX_64X64] = aom_##type##_predictor_64x64; \
499
40
  INIT_RECTANGULAR(p, type)
500
501
2
#define INIT_ALL_SIZES(p, type)           \
502
40
  p[TX_4X4] = aom_##type##_predictor_4x4; \
503
40
  INIT_NO_4X4(p, type)
504
505
2
  INIT_ALL_SIZES(pred[V_PRED], v);
506
2
  INIT_ALL_SIZES(pred[H_PRED], h);
507
2
  INIT_ALL_SIZES(pred[PAETH_PRED], paeth);
508
2
  INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth);
509
2
  INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v);
510
2
  INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h);
511
2
  INIT_ALL_SIZES(dc_pred[0][0], dc_128);
512
2
  INIT_ALL_SIZES(dc_pred[0][1], dc_top);
513
2
  INIT_ALL_SIZES(dc_pred[1][0], dc_left);
514
2
  INIT_ALL_SIZES(dc_pred[1][1], dc);
515
2
#if CONFIG_AV1_HIGHBITDEPTH
516
2
  INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
517
2
  INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
518
2
  INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth);
519
2
  INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth);
520
2
  INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v);
521
2
  INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h);
522
2
  INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
523
2
  INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
524
2
  INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
525
2
  INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
526
2
#endif
527
2
#undef intra_pred_allsizes
528
2
}
529
530
// Directional prediction, zone 1: 0 < angle < 90
531
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
532
                            const uint8_t *above, const uint8_t *left,
533
2.62M
                            int upsample_above, int dx, int dy) {
534
2.62M
  int r, c, x, base, shift, val;
535
536
2.62M
  (void)left;
537
2.62M
  (void)dy;
538
2.62M
  assert(dy == 1);
539
2.62M
  assert(dx > 0);
540
541
2.62M
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
542
2.62M
  const int frac_bits = 6 - upsample_above;
543
2.62M
  const int base_inc = 1 << upsample_above;
544
2.62M
  x = dx;
545
19.5M
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
546
16.8M
    base = x >> frac_bits;
547
16.8M
    shift = ((x << upsample_above) & 0x3F) >> 1;
548
549
16.8M
    if (base >= max_base_x) {
550
8.22k
      for (int i = r; i < bh; ++i) {
551
5.24k
        memset(dst, above[max_base_x], bw * sizeof(dst[0]));
552
5.24k
        dst += stride;
553
5.24k
      }
554
2.98k
      return;
555
2.98k
    }
556
557
237M
    for (c = 0; c < bw; ++c, base += base_inc) {
558
220M
      if (base < max_base_x) {
559
219M
        val = above[base] * (32 - shift) + above[base + 1] * shift;
560
219M
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
561
219M
      } else {
562
611k
        dst[c] = above[max_base_x];
563
611k
      }
564
220M
    }
565
16.8M
  }
566
2.62M
}
567
568
// Directional prediction, zone 2: 90 < angle < 180
569
void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
570
                            const uint8_t *above, const uint8_t *left,
571
                            int upsample_above, int upsample_left, int dx,
572
5.29M
                            int dy) {
573
5.29M
  assert(dx > 0);
574
5.29M
  assert(dy > 0);
575
576
5.29M
  const int min_base_x = -(1 << upsample_above);
577
5.29M
  const int min_base_y = -(1 << upsample_left);
578
5.29M
  (void)min_base_y;
579
5.29M
  const int frac_bits_x = 6 - upsample_above;
580
5.29M
  const int frac_bits_y = 6 - upsample_left;
581
582
40.1M
  for (int r = 0; r < bh; ++r) {
583
498M
    for (int c = 0; c < bw; ++c) {
584
463M
      int val;
585
463M
      int y = r + 1;
586
463M
      int x = (c << 6) - y * dx;
587
463M
      const int base_x = x >> frac_bits_x;
588
463M
      if (base_x >= min_base_x) {
589
225M
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
590
225M
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
591
225M
        val = ROUND_POWER_OF_TWO(val, 5);
592
238M
      } else {
593
238M
        x = c + 1;
594
238M
        y = (r << 6) - x * dy;
595
238M
        const int base_y = y >> frac_bits_y;
596
238M
        assert(base_y >= min_base_y);
597
238M
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
598
238M
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
599
238M
        val = ROUND_POWER_OF_TWO(val, 5);
600
238M
      }
601
463M
      dst[c] = val;
602
463M
    }
603
34.8M
    dst += stride;
604
34.8M
  }
605
5.29M
}
606
607
// Directional prediction, zone 3: 180 < angle < 270
608
void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
609
                            const uint8_t *above, const uint8_t *left,
610
2.62M
                            int upsample_left, int dx, int dy) {
611
2.62M
  int r, c, y, base, shift, val;
612
613
2.62M
  (void)above;
614
2.62M
  (void)dx;
615
616
2.62M
  assert(dx == 1);
617
2.62M
  assert(dy > 0);
618
619
2.62M
  const int max_base_y = (bw + bh - 1) << upsample_left;
620
2.62M
  const int frac_bits = 6 - upsample_left;
621
2.62M
  const int base_inc = 1 << upsample_left;
622
2.62M
  y = dy;
623
20.0M
  for (c = 0; c < bw; ++c, y += dy) {
624
17.4M
    base = y >> frac_bits;
625
17.4M
    shift = ((y << upsample_left) & 0x3F) >> 1;
626
627
240M
    for (r = 0; r < bh; ++r, base += base_inc) {
628
222M
      if (base < max_base_y) {
629
222M
        val = left[base] * (32 - shift) + left[base + 1] * shift;
630
222M
        dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5);
631
222M
      } else {
632
681
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
633
681
        break;
634
681
      }
635
222M
    }
636
17.4M
  }
637
2.62M
}
638
639
static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
640
                         const uint8_t *above, const uint8_t *left,
641
12.6M
                         int upsample_above, int upsample_left, int angle) {
642
12.6M
  const int dx = av1_get_dx(angle);
643
12.6M
  const int dy = av1_get_dy(angle);
644
12.6M
  const int bw = tx_size_wide[tx_size];
645
12.6M
  const int bh = tx_size_high[tx_size];
646
12.6M
  assert(angle > 0 && angle < 270);
647
648
12.6M
  if (angle > 0 && angle < 90) {
649
2.62M
    av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
650
2.62M
                         dy);
651
9.97M
  } else if (angle > 90 && angle < 180) {
652
5.29M
    av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
653
5.29M
                         upsample_left, dx, dy);
654
5.29M
  } else if (angle > 180 && angle < 270) {
655
2.62M
    av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
656
2.62M
                         dy);
657
2.62M
  } else if (angle == 90) {
658
983k
    pred[V_PRED][tx_size](dst, stride, above, left);
659
1.14M
  } else if (angle == 180) {
660
1.14M
    pred[H_PRED][tx_size](dst, stride, above, left);
661
1.14M
  }
662
12.6M
}
663
664
#if CONFIG_AV1_HIGHBITDEPTH
665
// Directional prediction, zone 1: 0 < angle < 90
666
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
667
                                   int bh, const uint16_t *above,
668
                                   const uint16_t *left, int upsample_above,
669
202k
                                   int dx, int dy, int bd) {
670
202k
  int r, c, x, base, shift, val;
671
672
202k
  (void)left;
673
202k
  (void)dy;
674
202k
  (void)bd;
675
202k
  assert(dy == 1);
676
202k
  assert(dx > 0);
677
678
202k
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
679
202k
  const int frac_bits = 6 - upsample_above;
680
202k
  const int base_inc = 1 << upsample_above;
681
202k
  x = dx;
682
2.56M
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
683
2.36M
    base = x >> frac_bits;
684
2.36M
    shift = ((x << upsample_above) & 0x3F) >> 1;
685
686
2.36M
    if (base >= max_base_x) {
687
4.02k
      for (int i = r; i < bh; ++i) {
688
2.59k
        aom_memset16(dst, above[max_base_x], bw);
689
2.59k
        dst += stride;
690
2.59k
      }
691
1.43k
      return;
692
1.43k
    }
693
694
55.3M
    for (c = 0; c < bw; ++c, base += base_inc) {
695
52.9M
      if (base < max_base_x) {
696
52.7M
        val = above[base] * (32 - shift) + above[base + 1] * shift;
697
52.7M
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
698
52.7M
      } else {
699
277k
        dst[c] = above[max_base_x];
700
277k
      }
701
52.9M
    }
702
2.36M
  }
703
202k
}
704
705
// Directional prediction, zone 2: 90 < angle < 180
706
void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
707
                                   int bh, const uint16_t *above,
708
                                   const uint16_t *left, int upsample_above,
709
364k
                                   int upsample_left, int dx, int dy, int bd) {
710
364k
  (void)bd;
711
364k
  assert(dx > 0);
712
364k
  assert(dy > 0);
713
714
364k
  const int min_base_x = -(1 << upsample_above);
715
364k
  const int min_base_y = -(1 << upsample_left);
716
364k
  (void)min_base_y;
717
364k
  const int frac_bits_x = 6 - upsample_above;
718
364k
  const int frac_bits_y = 6 - upsample_left;
719
720
5.05M
  for (int r = 0; r < bh; ++r) {
721
124M
    for (int c = 0; c < bw; ++c) {
722
120M
      int val;
723
120M
      int y = r + 1;
724
120M
      int x = (c << 6) - y * dx;
725
120M
      const int base_x = x >> frac_bits_x;
726
120M
      if (base_x >= min_base_x) {
727
46.7M
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
728
46.7M
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
729
46.7M
        val = ROUND_POWER_OF_TWO(val, 5);
730
73.4M
      } else {
731
73.4M
        x = c + 1;
732
73.4M
        y = (r << 6) - x * dy;
733
73.4M
        const int base_y = y >> frac_bits_y;
734
73.4M
        assert(base_y >= min_base_y);
735
73.4M
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
736
73.4M
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
737
73.4M
        val = ROUND_POWER_OF_TWO(val, 5);
738
73.4M
      }
739
120M
      dst[c] = val;
740
120M
    }
741
4.69M
    dst += stride;
742
4.69M
  }
743
364k
}
744
745
// Directional prediction, zone 3: 180 < angle < 270
746
void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
747
                                   int bh, const uint16_t *above,
748
                                   const uint16_t *left, int upsample_left,
749
230k
                                   int dx, int dy, int bd) {
750
230k
  int r, c, y, base, shift, val;
751
752
230k
  (void)above;
753
230k
  (void)dx;
754
230k
  (void)bd;
755
230k
  assert(dx == 1);
756
230k
  assert(dy > 0);
757
758
230k
  const int max_base_y = (bw + bh - 1) << upsample_left;
759
230k
  const int frac_bits = 6 - upsample_left;
760
230k
  const int base_inc = 1 << upsample_left;
761
230k
  y = dy;
762
3.34M
  for (c = 0; c < bw; ++c, y += dy) {
763
3.11M
    base = y >> frac_bits;
764
3.11M
    shift = ((y << upsample_left) & 0x3F) >> 1;
765
766
76.5M
    for (r = 0; r < bh; ++r, base += base_inc) {
767
73.4M
      if (base < max_base_y) {
768
73.4M
        val = left[base] * (32 - shift) + left[base + 1] * shift;
769
73.4M
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
770
73.4M
      } else {
771
0
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
772
0
        break;
773
0
      }
774
73.4M
    }
775
3.11M
  }
776
230k
}
777
778
static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
779
                                TX_SIZE tx_size, const uint16_t *above,
780
                                const uint16_t *left, int upsample_above,
781
1.11M
                                int upsample_left, int angle, int bd) {
782
1.11M
  const int dx = av1_get_dx(angle);
783
1.11M
  const int dy = av1_get_dy(angle);
784
1.11M
  const int bw = tx_size_wide[tx_size];
785
1.11M
  const int bh = tx_size_high[tx_size];
786
1.11M
  assert(angle > 0 && angle < 270);
787
788
1.11M
  if (angle > 0 && angle < 90) {
789
202k
    av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
790
202k
                                upsample_above, dx, dy, bd);
791
913k
  } else if (angle > 90 && angle < 180) {
792
364k
    av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
793
364k
                                upsample_above, upsample_left, dx, dy, bd);
794
549k
  } else if (angle > 180 && angle < 270) {
795
230k
    av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
796
230k
                                dx, dy, bd);
797
318k
  } else if (angle == 90) {
798
138k
    pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
799
180k
  } else if (angle == 180) {
800
180k
    pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
801
180k
  }
802
1.11M
}
803
#endif  // CONFIG_AV1_HIGHBITDEPTH
804
805
DECLARE_ALIGNED(16, const int8_t,
806
                av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
807
  {
808
      { -6, 10, 0, 0, 0, 12, 0, 0 },
809
      { -5, 2, 10, 0, 0, 9, 0, 0 },
810
      { -3, 1, 1, 10, 0, 7, 0, 0 },
811
      { -3, 1, 1, 2, 10, 5, 0, 0 },
812
      { -4, 6, 0, 0, 0, 2, 12, 0 },
813
      { -3, 2, 6, 0, 0, 2, 9, 0 },
814
      { -3, 2, 2, 6, 0, 2, 7, 0 },
815
      { -3, 1, 2, 2, 6, 3, 5, 0 },
816
  },
817
  {
818
      { -10, 16, 0, 0, 0, 10, 0, 0 },
819
      { -6, 0, 16, 0, 0, 6, 0, 0 },
820
      { -4, 0, 0, 16, 0, 4, 0, 0 },
821
      { -2, 0, 0, 0, 16, 2, 0, 0 },
822
      { -10, 16, 0, 0, 0, 0, 10, 0 },
823
      { -6, 0, 16, 0, 0, 0, 6, 0 },
824
      { -4, 0, 0, 16, 0, 0, 4, 0 },
825
      { -2, 0, 0, 0, 16, 0, 2, 0 },
826
  },
827
  {
828
      { -8, 8, 0, 0, 0, 16, 0, 0 },
829
      { -8, 0, 8, 0, 0, 16, 0, 0 },
830
      { -8, 0, 0, 8, 0, 16, 0, 0 },
831
      { -8, 0, 0, 0, 8, 16, 0, 0 },
832
      { -4, 4, 0, 0, 0, 0, 16, 0 },
833
      { -4, 0, 4, 0, 0, 0, 16, 0 },
834
      { -4, 0, 0, 4, 0, 0, 16, 0 },
835
      { -4, 0, 0, 0, 4, 0, 16, 0 },
836
  },
837
  {
838
      { -2, 8, 0, 0, 0, 10, 0, 0 },
839
      { -1, 3, 8, 0, 0, 6, 0, 0 },
840
      { -1, 2, 3, 8, 0, 4, 0, 0 },
841
      { 0, 1, 2, 3, 8, 2, 0, 0 },
842
      { -1, 4, 0, 0, 0, 3, 10, 0 },
843
      { -1, 3, 4, 0, 0, 4, 6, 0 },
844
      { -1, 2, 3, 4, 0, 4, 4, 0 },
845
      { -1, 2, 2, 3, 4, 3, 3, 0 },
846
  },
847
  {
848
      { -12, 14, 0, 0, 0, 14, 0, 0 },
849
      { -10, 0, 14, 0, 0, 12, 0, 0 },
850
      { -9, 0, 0, 14, 0, 11, 0, 0 },
851
      { -8, 0, 0, 0, 14, 10, 0, 0 },
852
      { -10, 12, 0, 0, 0, 0, 14, 0 },
853
      { -9, 1, 12, 0, 0, 0, 12, 0 },
854
      { -8, 0, 0, 12, 0, 1, 11, 0 },
855
      { -7, 0, 0, 1, 12, 1, 9, 0 },
856
  },
857
};
858
859
void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
860
                                  TX_SIZE tx_size, const uint8_t *above,
861
635k
                                  const uint8_t *left, int mode) {
862
635k
  int r, c;
863
635k
  uint8_t buffer[33][33];
864
635k
  const int bw = tx_size_wide[tx_size];
865
635k
  const int bh = tx_size_high[tx_size];
866
867
635k
  assert(bw <= 32 && bh <= 32);
868
869
6.10M
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
870
635k
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
871
872
3.36M
  for (r = 1; r < bh + 1; r += 2)
873
10.6M
    for (c = 1; c < bw + 1; c += 4) {
874
7.91M
      const uint8_t p0 = buffer[r - 1][c - 1];
875
7.91M
      const uint8_t p1 = buffer[r - 1][c];
876
7.91M
      const uint8_t p2 = buffer[r - 1][c + 1];
877
7.91M
      const uint8_t p3 = buffer[r - 1][c + 2];
878
7.91M
      const uint8_t p4 = buffer[r - 1][c + 3];
879
7.91M
      const uint8_t p5 = buffer[r][c - 1];
880
7.91M
      const uint8_t p6 = buffer[r + 1][c - 1];
881
71.2M
      for (int k = 0; k < 8; ++k) {
882
63.3M
        int r_offset = k >> 2;
883
63.3M
        int c_offset = k & 0x03;
884
63.3M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
885
63.3M
                 av1_filter_intra_taps[mode][k][1] * p1 +
886
63.3M
                 av1_filter_intra_taps[mode][k][2] * p2 +
887
63.3M
                 av1_filter_intra_taps[mode][k][3] * p3 +
888
63.3M
                 av1_filter_intra_taps[mode][k][4] * p4 +
889
63.3M
                 av1_filter_intra_taps[mode][k][5] * p5 +
890
63.3M
                 av1_filter_intra_taps[mode][k][6] * p6;
891
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
892
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
893
        // Since Clip1() clips a negative value to 0, it is safe to replace
894
        // Round2Signed() with Round2().
895
63.3M
        buffer[r + r_offset][c + c_offset] =
896
63.3M
            clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
897
63.3M
      }
898
7.91M
    }
899
900
6.10M
  for (r = 0; r < bh; ++r) {
901
5.46M
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
902
5.46M
    dst += stride;
903
5.46M
  }
904
635k
}
905
906
#if CONFIG_AV1_HIGHBITDEPTH
907
static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
908
                                          TX_SIZE tx_size,
909
                                          const uint16_t *above,
910
                                          const uint16_t *left, int mode,
911
83.2k
                                          int bd) {
912
83.2k
  int r, c;
913
83.2k
  uint16_t buffer[33][33];
914
83.2k
  const int bw = tx_size_wide[tx_size];
915
83.2k
  const int bh = tx_size_high[tx_size];
916
917
83.2k
  assert(bw <= 32 && bh <= 32);
918
919
711k
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
920
83.2k
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
921
922
397k
  for (r = 1; r < bh + 1; r += 2)
923
1.11M
    for (c = 1; c < bw + 1; c += 4) {
924
800k
      const uint16_t p0 = buffer[r - 1][c - 1];
925
800k
      const uint16_t p1 = buffer[r - 1][c];
926
800k
      const uint16_t p2 = buffer[r - 1][c + 1];
927
800k
      const uint16_t p3 = buffer[r - 1][c + 2];
928
800k
      const uint16_t p4 = buffer[r - 1][c + 3];
929
800k
      const uint16_t p5 = buffer[r][c - 1];
930
800k
      const uint16_t p6 = buffer[r + 1][c - 1];
931
7.20M
      for (int k = 0; k < 8; ++k) {
932
6.40M
        int r_offset = k >> 2;
933
6.40M
        int c_offset = k & 0x03;
934
6.40M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
935
6.40M
                 av1_filter_intra_taps[mode][k][1] * p1 +
936
6.40M
                 av1_filter_intra_taps[mode][k][2] * p2 +
937
6.40M
                 av1_filter_intra_taps[mode][k][3] * p3 +
938
6.40M
                 av1_filter_intra_taps[mode][k][4] * p4 +
939
6.40M
                 av1_filter_intra_taps[mode][k][5] * p5 +
940
6.40M
                 av1_filter_intra_taps[mode][k][6] * p6;
941
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
942
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
943
        // Since Clip1() clips a negative value to 0, it is safe to replace
944
        // Round2Signed() with Round2().
945
6.40M
        buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
946
6.40M
            ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
947
6.40M
      }
948
800k
    }
949
950
711k
  for (r = 0; r < bh; ++r) {
951
627k
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
952
627k
    dst += stride;
953
627k
  }
954
83.2k
}
955
#endif  // CONFIG_AV1_HIGHBITDEPTH
956
957
47.1M
static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
958
47.1M
  if (plane == 0) {
959
33.4M
    const PREDICTION_MODE mode = mbmi->mode;
960
33.4M
    return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
961
32.0M
            mode == SMOOTH_H_PRED);
962
33.4M
  } else {
963
    // uv_mode is not set for inter blocks, so need to explicitly
964
    // detect that case.
965
13.6M
    if (is_inter_block(mbmi)) return 0;
966
967
13.6M
    const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
968
13.6M
    return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
969
12.4M
            uv_mode == UV_SMOOTH_H_PRED);
970
13.6M
  }
971
47.1M
}
972
973
26.8M
static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
974
26.8M
  int ab_sm, le_sm;
975
976
26.8M
  if (plane == 0) {
977
19.4M
    const MB_MODE_INFO *ab = xd->above_mbmi;
978
19.4M
    const MB_MODE_INFO *le = xd->left_mbmi;
979
19.4M
    ab_sm = ab ? is_smooth(ab, plane) : 0;
980
19.4M
    le_sm = le ? is_smooth(le, plane) : 0;
981
19.4M
  } else {
982
7.42M
    const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
983
7.42M
    const MB_MODE_INFO *le = xd->chroma_left_mbmi;
984
7.42M
    ab_sm = ab ? is_smooth(ab, plane) : 0;
985
7.42M
    le_sm = le ? is_smooth(le, plane) : 0;
986
7.42M
  }
987
988
26.8M
  return (ab_sm || le_sm) ? 1 : 0;
989
26.8M
}
990
991
15.6M
static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
992
15.6M
  const int d = abs(delta);
993
15.6M
  int strength = 0;
994
995
15.6M
  const int blk_wh = bs0 + bs1;
996
15.6M
  if (type == 0) {
997
14.8M
    if (blk_wh <= 8) {
998
11.7M
      if (d >= 56) strength = 1;
999
11.7M
    } else if (blk_wh <= 12) {
1000
336k
      if (d >= 40) strength = 1;
1001
2.71M
    } else if (blk_wh <= 16) {
1002
994k
      if (d >= 40) strength = 1;
1003
1.71M
    } else if (blk_wh <= 24) {
1004
351k
      if (d >= 8) strength = 1;
1005
351k
      if (d >= 16) strength = 2;
1006
351k
      if (d >= 32) strength = 3;
1007
1.36M
    } else if (blk_wh <= 32) {
1008
622k
      if (d >= 1) strength = 1;
1009
622k
      if (d >= 4) strength = 2;
1010
622k
      if (d >= 32) strength = 3;
1011
741k
    } else {
1012
751k
      if (d >= 1) strength = 3;
1013
741k
    }
1014
14.8M
  } else {
1015
817k
    if (blk_wh <= 8) {
1016
237k
      if (d >= 40) strength = 1;
1017
237k
      if (d >= 64) strength = 2;
1018
580k
    } else if (blk_wh <= 16) {
1019
324k
      if (d >= 20) strength = 1;
1020
324k
      if (d >= 48) strength = 2;
1021
324k
    } else if (blk_wh <= 24) {
1022
122k
      if (d >= 4) strength = 3;
1023
133k
    } else {
1024
134k
      if (d >= 1) strength = 3;
1025
133k
    }
1026
817k
  }
1027
15.6M
  return strength;
1028
15.6M
}
1029
1030
15.3M
void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1031
15.3M
  if (!strength) return;
1032
1033
6.40M
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1034
6.40M
                                                         { 0, 5, 6, 5, 0 },
1035
6.40M
                                                         { 2, 4, 4, 4, 2 } };
1036
6.40M
  const int filt = strength - 1;
1037
6.40M
  uint8_t edge[129];
1038
1039
6.40M
  memcpy(edge, p, sz * sizeof(*p));
1040
79.8M
  for (int i = 1; i < sz; i++) {
1041
73.4M
    int s = 0;
1042
440M
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1043
367M
      int k = i - 2 + j;
1044
367M
      k = (k < 0) ? 0 : k;
1045
367M
      k = (k > sz - 1) ? sz - 1 : k;
1046
367M
      s += edge[k] * kernel[filt][j];
1047
367M
    }
1048
73.4M
    s = (s + 8) >> 4;
1049
73.4M
    p[i] = s;
1050
73.4M
  }
1051
6.40M
}
1052
1053
638k
static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1054
638k
  const int kernel[3] = { 5, 6, 5 };
1055
1056
638k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1057
638k
          (p_above[0] * kernel[2]);
1058
638k
  s = (s + 8) >> 4;
1059
638k
  p_above[-1] = s;
1060
638k
  p_left[-1] = s;
1061
638k
}
1062
1063
241k
void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
1064
241k
  if (!strength) return;
1065
1066
130k
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1067
130k
                                                         { 0, 5, 6, 5, 0 },
1068
130k
                                                         { 2, 4, 4, 4, 2 } };
1069
130k
  const int filt = strength - 1;
1070
130k
  uint16_t edge[129];
1071
1072
130k
  memcpy(edge, p, sz * sizeof(*p));
1073
2.39M
  for (int i = 1; i < sz; i++) {
1074
2.26M
    int s = 0;
1075
13.5M
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1076
11.3M
      int k = i - 2 + j;
1077
11.3M
      k = (k < 0) ? 0 : k;
1078
11.3M
      k = (k > sz - 1) ? sz - 1 : k;
1079
11.3M
      s += edge[k] * kernel[filt][j];
1080
11.3M
    }
1081
2.26M
    s = (s + 8) >> 4;
1082
2.26M
    p[i] = s;
1083
2.26M
  }
1084
130k
}
1085
1086
#if CONFIG_AV1_HIGHBITDEPTH
1087
21.7k
static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
1088
21.7k
  const int kernel[3] = { 5, 6, 5 };
1089
1090
21.7k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1091
21.7k
          (p_above[0] * kernel[2]);
1092
21.7k
  s = (s + 8) >> 4;
1093
21.7k
  p_above[-1] = s;
1094
21.7k
  p_left[-1] = s;
1095
21.7k
}
1096
#endif
1097
1098
8.83M
void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1099
  // interpolate half-sample positions
1100
8.83M
  assert(sz <= MAX_UPSAMPLE_SZ);
1101
1102
8.83M
  uint8_t in[MAX_UPSAMPLE_SZ + 3];
1103
  // copy p[-1..(sz-1)] and extend first and last samples
1104
8.83M
  in[0] = p[-1];
1105
8.83M
  in[1] = p[-1];
1106
66.1M
  for (int i = 0; i < sz; i++) {
1107
57.2M
    in[i + 2] = p[i];
1108
57.2M
  }
1109
8.83M
  in[sz + 2] = p[sz - 1];
1110
1111
  // interpolate half-sample edge positions
1112
8.83M
  p[-2] = in[0];
1113
66.0M
  for (int i = 0; i < sz; i++) {
1114
57.1M
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1115
57.1M
    s = clip_pixel((s + 8) >> 4);
1116
57.1M
    p[2 * i - 1] = s;
1117
57.1M
    p[2 * i] = in[i + 2];
1118
57.1M
  }
1119
8.83M
}
1120
1121
88.6k
void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
1122
  // interpolate half-sample positions
1123
88.6k
  assert(sz <= MAX_UPSAMPLE_SZ);
1124
1125
88.6k
  uint16_t in[MAX_UPSAMPLE_SZ + 3];
1126
  // copy p[-1..(sz-1)] and extend first and last samples
1127
88.6k
  in[0] = p[-1];
1128
88.6k
  in[1] = p[-1];
1129
841k
  for (int i = 0; i < sz; i++) {
1130
752k
    in[i + 2] = p[i];
1131
752k
  }
1132
88.6k
  in[sz + 2] = p[sz - 1];
1133
1134
  // interpolate half-sample edge positions
1135
88.6k
  p[-2] = in[0];
1136
841k
  for (int i = 0; i < sz; i++) {
1137
752k
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1138
752k
    s = (s + 8) >> 4;
1139
752k
    s = clip_pixel_highbd(s, bd);
1140
752k
    p[2 * i - 1] = s;
1141
752k
    p[2 * i] = in[i + 2];
1142
752k
  }
1143
88.6k
}
1144
#if CONFIG_AV1_HIGHBITDEPTH
1145
static void build_intra_predictors_high(
1146
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1147
    PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode,
1148
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1149
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1150
3.62M
    int bit_depth) {
1151
3.62M
  int i;
1152
3.62M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1153
3.62M
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1154
3.62M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1155
3.62M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1156
3.62M
  uint16_t *const above_row = above_data + 16;
1157
3.62M
  uint16_t *const left_col = left_data + 16;
1158
3.62M
  const int txwpx = tx_size_wide[tx_size];
1159
3.62M
  const int txhpx = tx_size_high[tx_size];
1160
3.62M
  int need_left = extend_modes[mode] & NEED_LEFT;
1161
3.62M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1162
3.62M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1163
3.62M
  const uint16_t *above_ref = ref - ref_stride;
1164
3.62M
  const uint16_t *left_ref = ref - 1;
1165
3.62M
  int p_angle = 0;
1166
3.62M
  const int is_dr_mode = av1_is_directional_mode(mode);
1167
3.62M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1168
3.62M
  int base = 128 << (bit_depth - 8);
1169
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1170
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1171
  // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1172
  // seen to be the potential reason for this issue.
1173
3.62M
  aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1174
3.62M
  aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1175
1176
  // The default values if ref pixels are not available:
1177
  // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1178
  // base+1   A      B  ..     Y      Z
1179
  // base+1   C      D  ..     W      X
1180
  // base+1   E      F  ..     U      V
1181
  // base+1   G      H  ..     S      T      T      T      T      T
1182
1183
3.62M
  if (is_dr_mode) {
1184
1.13M
    p_angle = mode_to_angle_map[mode] + angle_delta;
1185
1.13M
    if (p_angle <= 90)
1186
352k
      need_above = 1, need_left = 0, need_above_left = 1;
1187
780k
    else if (p_angle < 180)
1188
364k
      need_above = 1, need_left = 1, need_above_left = 1;
1189
416k
    else
1190
416k
      need_above = 0, need_left = 1, need_above_left = 1;
1191
1.13M
  }
1192
3.62M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1193
1194
3.62M
  assert(n_top_px >= 0);
1195
3.62M
  assert(n_topright_px >= 0);
1196
3.62M
  assert(n_left_px >= 0);
1197
3.62M
  assert(n_bottomleft_px >= 0);
1198
1199
3.62M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1200
16.8k
    int val;
1201
16.8k
    if (need_left) {
1202
5.37k
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1203
11.5k
    } else {
1204
11.5k
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1205
11.5k
    }
1206
488k
    for (i = 0; i < txhpx; ++i) {
1207
471k
      aom_memset16(dst, val, txwpx);
1208
471k
      dst += dst_stride;
1209
471k
    }
1210
16.8k
    return;
1211
16.8k
  }
1212
1213
  // NEED_LEFT
1214
3.60M
  if (need_left) {
1215
3.26M
    int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
1216
3.26M
    if (use_filter_intra) need_bottom = 0;
1217
3.26M
    if (is_dr_mode) need_bottom = p_angle > 180;
1218
3.26M
    const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1219
3.26M
    i = 0;
1220
3.26M
    if (n_left_px > 0) {
1221
42.4M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1222
3.20M
      if (need_bottom && n_bottomleft_px > 0) {
1223
83.7k
        assert(i == txhpx);
1224
926k
        for (; i < txhpx + n_bottomleft_px; i++)
1225
842k
          left_col[i] = left_ref[i * ref_stride];
1226
83.7k
      }
1227
3.20M
      if (i < num_left_pixels_needed)
1228
211k
        aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1229
3.20M
    } else if (n_top_px > 0) {
1230
51.7k
      aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1231
51.7k
    }
1232
3.26M
  }
1233
1234
  // NEED_ABOVE
1235
3.60M
  if (need_above) {
1236
3.19M
    int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
1237
3.19M
    if (use_filter_intra) need_right = 0;
1238
3.19M
    if (is_dr_mode) need_right = p_angle < 90;
1239
3.19M
    const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1240
3.19M
    if (n_top_px > 0) {
1241
3.07M
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1242
3.07M
      i = n_top_px;
1243
3.07M
      if (need_right && n_topright_px > 0) {
1244
123k
        assert(n_top_px == txwpx);
1245
123k
        memcpy(above_row + txwpx, above_ref + txwpx,
1246
123k
               n_topright_px * sizeof(above_ref[0]));
1247
123k
        i += n_topright_px;
1248
123k
      }
1249
3.07M
      if (i < num_top_pixels_needed)
1250
114k
        aom_memset16(&above_row[i], above_row[i - 1],
1251
114k
                     num_top_pixels_needed - i);
1252
3.07M
    } else if (n_left_px > 0) {
1253
109k
      aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1254
109k
    }
1255
3.19M
  }
1256
1257
3.60M
  if (need_above_left) {
1258
1.56M
    if (n_top_px > 0 && n_left_px > 0) {
1259
1.51M
      above_row[-1] = above_ref[-1];
1260
1.51M
    } else if (n_top_px > 0) {
1261
17.5k
      above_row[-1] = above_ref[0];
1262
37.5k
    } else if (n_left_px > 0) {
1263
35.5k
      above_row[-1] = left_ref[0];
1264
35.5k
    } else {
1265
2.01k
      above_row[-1] = base;
1266
2.01k
    }
1267
1.56M
    left_col[-1] = above_row[-1];
1268
1.56M
  }
1269
1270
3.60M
  if (use_filter_intra) {
1271
83.2k
    highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1272
83.2k
                                  filter_intra_mode, bit_depth);
1273
83.2k
    return;
1274
83.2k
  }
1275
1276
3.52M
  if (is_dr_mode) {
1277
1.11M
    int upsample_above = 0;
1278
1.11M
    int upsample_left = 0;
1279
1.11M
    if (!disable_edge_filter) {
1280
247k
      const int need_right = p_angle < 90;
1281
247k
      const int need_bottom = p_angle > 180;
1282
247k
      if (p_angle != 90 && p_angle != 180) {
1283
172k
        const int ab_le = need_above_left ? 1 : 0;
1284
172k
        if (need_above && need_left && (txwpx + txhpx >= 24)) {
1285
21.7k
          filter_intra_edge_corner_high(above_row, left_col);
1286
21.7k
        }
1287
172k
        if (need_above && n_top_px > 0) {
1288
116k
          const int strength = intra_edge_filter_strength(
1289
116k
              txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1290
116k
          const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1291
116k
          av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
1292
116k
        }
1293
172k
        if (need_left && n_left_px > 0) {
1294
124k
          const int strength = intra_edge_filter_strength(
1295
124k
              txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1296
124k
          const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1297
124k
          av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
1298
124k
        }
1299
172k
      }
1300
247k
      upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1301
247k
                                                   intra_edge_filter_type);
1302
247k
      if (need_above && upsample_above) {
1303
34.3k
        const int n_px = txwpx + (need_right ? txhpx : 0);
1304
34.3k
        av1_upsample_intra_edge_high(above_row, n_px, bit_depth);
1305
34.3k
      }
1306
247k
      upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1307
247k
                                                  intra_edge_filter_type);
1308
247k
      if (need_left && upsample_left) {
1309
54.2k
        const int n_px = txhpx + (need_bottom ? txwpx : 0);
1310
54.2k
        av1_upsample_intra_edge_high(left_col, n_px, bit_depth);
1311
54.2k
      }
1312
247k
    }
1313
1.11M
    highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1314
1.11M
                        upsample_above, upsample_left, p_angle, bit_depth);
1315
1.11M
    return;
1316
1.11M
  }
1317
1318
  // predict
1319
2.41M
  if (mode == DC_PRED) {
1320
1.44M
    dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1321
1.44M
        dst, dst_stride, above_row, left_col, bit_depth);
1322
1.44M
  } else {
1323
962k
    pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1324
962k
  }
1325
2.41M
}
1326
#endif  // CONFIG_AV1_HIGHBITDEPTH
1327
1328
static void build_intra_predictors(
1329
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1330
    PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode,
1331
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1332
23.1M
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1333
23.1M
  int i;
1334
23.1M
  const uint8_t *above_ref = ref - ref_stride;
1335
23.1M
  const uint8_t *left_ref = ref - 1;
1336
23.1M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1337
23.1M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1338
23.1M
  uint8_t *const above_row = above_data + 16;
1339
23.1M
  uint8_t *const left_col = left_data + 16;
1340
23.1M
  const int txwpx = tx_size_wide[tx_size];
1341
23.1M
  const int txhpx = tx_size_high[tx_size];
1342
23.1M
  int need_left = extend_modes[mode] & NEED_LEFT;
1343
23.1M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1344
23.1M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1345
23.1M
  int p_angle = 0;
1346
23.1M
  const int is_dr_mode = av1_is_directional_mode(mode);
1347
23.1M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1348
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1349
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1350
  // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1351
  // be the potential reason for this issue.
1352
23.1M
  memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1353
23.1M
  memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1354
1355
  // The default values if ref pixels are not available:
1356
  // 128 127 127 .. 127 127 127 127 127 127
1357
  // 129  A   B  ..  Y   Z
1358
  // 129  C   D  ..  W   X
1359
  // 129  E   F  ..  U   V
1360
  // 129  G   H  ..  S   T   T   T   T   T
1361
  // ..
1362
1363
23.1M
  if (is_dr_mode) {
1364
12.9M
    p_angle = mode_to_angle_map[mode] + angle_delta;
1365
12.9M
    if (p_angle <= 90)
1366
3.77M
      need_above = 1, need_left = 0, need_above_left = 1;
1367
9.13M
    else if (p_angle < 180)
1368
5.30M
      need_above = 1, need_left = 1, need_above_left = 1;
1369
3.83M
    else
1370
3.83M
      need_above = 0, need_left = 1, need_above_left = 1;
1371
12.9M
  }
1372
23.1M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1373
1374
23.1M
  assert(n_top_px >= 0);
1375
23.1M
  assert(n_topright_px >= 0);
1376
23.1M
  assert(n_left_px >= 0);
1377
23.1M
  assert(n_bottomleft_px >= 0);
1378
1379
23.1M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1380
299k
    int val;
1381
299k
    if (need_left) {
1382
130k
      val = (n_top_px > 0) ? above_ref[0] : 129;
1383
168k
    } else {
1384
168k
      val = (n_left_px > 0) ? left_ref[0] : 127;
1385
168k
    }
1386
4.10M
    for (i = 0; i < txhpx; ++i) {
1387
3.80M
      memset(dst, val, txwpx);
1388
3.80M
      dst += dst_stride;
1389
3.80M
    }
1390
299k
    return;
1391
299k
  }
1392
1393
  // NEED_LEFT
1394
22.8M
  if (need_left) {
1395
19.3M
    int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
1396
19.3M
    if (use_filter_intra) need_bottom = 0;
1397
19.3M
    if (is_dr_mode) need_bottom = p_angle > 180;
1398
19.3M
    const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1399
19.3M
    i = 0;
1400
19.3M
    if (n_left_px > 0) {
1401
149M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1402
18.8M
      if (need_bottom && n_bottomleft_px > 0) {
1403
513k
        assert(i == txhpx);
1404
4.39M
        for (; i < txhpx + n_bottomleft_px; i++)
1405
3.88M
          left_col[i] = left_ref[i * ref_stride];
1406
513k
      }
1407
18.8M
      if (i < num_left_pixels_needed)
1408
2.28M
        memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1409
18.8M
    } else if (n_top_px > 0) {
1410
439k
      memset(left_col, above_ref[0], num_left_pixels_needed);
1411
439k
    }
1412
19.3M
  }
1413
1414
  // NEED_ABOVE
1415
22.8M
  if (need_above) {
1416
19.1M
    int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
1417
19.1M
    if (use_filter_intra) need_right = 0;
1418
19.1M
    if (is_dr_mode) need_right = p_angle < 90;
1419
19.1M
    const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1420
19.1M
    if (n_top_px > 0) {
1421
18.5M
      memcpy(above_row, above_ref, n_top_px);
1422
18.5M
      i = n_top_px;
1423
18.5M
      if (need_right && n_topright_px > 0) {
1424
2.09M
        assert(n_top_px == txwpx);
1425
2.09M
        memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1426
2.09M
        i += n_topright_px;
1427
2.09M
      }
1428
18.5M
      if (i < num_top_pixels_needed)
1429
653k
        memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1430
18.5M
    } else if (n_left_px > 0) {
1431
562k
      memset(above_row, left_ref[0], num_top_pixels_needed);
1432
562k
    }
1433
19.1M
  }
1434
1435
22.8M
  if (need_above_left) {
1436
14.6M
    if (n_top_px > 0 && n_left_px > 0) {
1437
13.8M
      above_row[-1] = above_ref[-1];
1438
13.8M
    } else if (n_top_px > 0) {
1439
336k
      above_row[-1] = above_ref[0];
1440
423k
    } else if (n_left_px > 0) {
1441
405k
      above_row[-1] = left_ref[0];
1442
405k
    } else {
1443
18.1k
      above_row[-1] = 128;
1444
18.1k
    }
1445
14.6M
    left_col[-1] = above_row[-1];
1446
14.6M
  }
1447
1448
22.8M
  if (use_filter_intra) {
1449
635k
    av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1450
635k
                               filter_intra_mode);
1451
635k
    return;
1452
635k
  }
1453
1454
22.2M
  if (is_dr_mode) {
1455
12.6M
    int upsample_above = 0;
1456
12.6M
    int upsample_left = 0;
1457
12.6M
    if (!disable_edge_filter) {
1458
12.6M
      const int need_right = p_angle < 90;
1459
12.6M
      const int need_bottom = p_angle > 180;
1460
12.6M
      if (p_angle != 90 && p_angle != 180) {
1461
10.5M
        const int ab_le = need_above_left ? 1 : 0;
1462
10.5M
        if (need_above && need_left && (txwpx + txhpx >= 24)) {
1463
638k
          filter_intra_edge_corner(above_row, left_col);
1464
638k
        }
1465
10.5M
        if (need_above && n_top_px > 0) {
1466
7.70M
          const int strength = intra_edge_filter_strength(
1467
7.70M
              txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1468
7.70M
          const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1469
7.70M
          av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1470
7.70M
        }
1471
10.5M
        if (need_left && n_left_px > 0) {
1472
7.73M
          const int strength = intra_edge_filter_strength(
1473
7.73M
              txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1474
7.73M
          const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1475
7.73M
          av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1476
7.73M
        }
1477
10.5M
      }
1478
12.6M
      upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1479
12.6M
                                                   intra_edge_filter_type);
1480
12.6M
      if (need_above && upsample_above) {
1481
4.38M
        const int n_px = txwpx + (need_right ? txhpx : 0);
1482
4.38M
        av1_upsample_intra_edge(above_row, n_px);
1483
4.38M
      }
1484
12.6M
      upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1485
12.6M
                                                  intra_edge_filter_type);
1486
12.6M
      if (need_left && upsample_left) {
1487
4.48M
        const int n_px = txhpx + (need_bottom ? txwpx : 0);
1488
4.48M
        av1_upsample_intra_edge(left_col, n_px);
1489
4.48M
      }
1490
12.6M
    }
1491
12.6M
    dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1492
12.6M
                 upsample_left, p_angle);
1493
12.6M
    return;
1494
12.6M
  }
1495
1496
  // predict
1497
9.59M
  if (mode == DC_PRED) {
1498
5.97M
    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1499
5.97M
                                                  left_col);
1500
5.97M
  } else {
1501
3.62M
    pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1502
3.62M
  }
1503
9.59M
}
1504
1505
static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1506
7.40M
                                            int subsampling_y) {
1507
7.40M
  assert(subsampling_x >= 0 && subsampling_x < 2);
1508
7.40M
  assert(subsampling_y >= 0 && subsampling_y < 2);
1509
7.40M
  BLOCK_SIZE bs = bsize;
1510
7.40M
  switch (bsize) {
1511
100k
    case BLOCK_4X4:
1512
100k
      if (subsampling_x == 1 && subsampling_y == 1)
1513
100k
        bs = BLOCK_8X8;
1514
32
      else if (subsampling_x == 1)
1515
32
        bs = BLOCK_8X4;
1516
0
      else if (subsampling_y == 1)
1517
0
        bs = BLOCK_4X8;
1518
100k
      break;
1519
185k
    case BLOCK_4X8:
1520
185k
      if (subsampling_x == 1 && subsampling_y == 1)
1521
185k
        bs = BLOCK_8X8;
1522
0
      else if (subsampling_x == 1)
1523
0
        bs = BLOCK_8X8;
1524
0
      else if (subsampling_y == 1)
1525
0
        bs = BLOCK_4X8;
1526
185k
      break;
1527
248k
    case BLOCK_8X4:
1528
248k
      if (subsampling_x == 1 && subsampling_y == 1)
1529
247k
        bs = BLOCK_8X8;
1530
96
      else if (subsampling_x == 1)
1531
96
        bs = BLOCK_8X4;
1532
0
      else if (subsampling_y == 1)
1533
0
        bs = BLOCK_8X8;
1534
248k
      break;
1535
143k
    case BLOCK_4X16:
1536
143k
      if (subsampling_x == 1 && subsampling_y == 1)
1537
143k
        bs = BLOCK_8X16;
1538
0
      else if (subsampling_x == 1)
1539
0
        bs = BLOCK_8X16;
1540
0
      else if (subsampling_y == 1)
1541
0
        bs = BLOCK_4X16;
1542
143k
      break;
1543
294k
    case BLOCK_16X4:
1544
294k
      if (subsampling_x == 1 && subsampling_y == 1)
1545
294k
        bs = BLOCK_16X8;
1546
116
      else if (subsampling_x == 1)
1547
116
        bs = BLOCK_16X4;
1548
0
      else if (subsampling_y == 1)
1549
0
        bs = BLOCK_16X8;
1550
294k
      break;
1551
6.43M
    default: break;
1552
7.40M
  }
1553
7.40M
  return bs;
1554
7.40M
}
1555
1556
void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1557
                             int enable_intra_edge_filter, int wpx, int hpx,
1558
                             TX_SIZE tx_size, PREDICTION_MODE mode,
1559
                             int angle_delta, int use_palette,
1560
                             FILTER_INTRA_MODE filter_intra_mode,
1561
                             const uint8_t *ref, int ref_stride, uint8_t *dst,
1562
                             int dst_stride, int col_off, int row_off,
1563
26.9M
                             int plane) {
1564
26.9M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1565
26.9M
  const int txwpx = tx_size_wide[tx_size];
1566
26.9M
  const int txhpx = tx_size_high[tx_size];
1567
26.9M
  const int x = col_off << MI_SIZE_LOG2;
1568
26.9M
  const int y = row_off << MI_SIZE_LOG2;
1569
1570
26.9M
  if (use_palette) {
1571
59.7k
    int r, c;
1572
59.7k
    const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1573
59.7k
                               xd->color_index_map_offset[plane != 0];
1574
59.7k
    const uint16_t *const palette =
1575
59.7k
        mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1576
59.7k
    if (is_cur_buf_hbd(xd)) {
1577
12.1k
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1578
121k
      for (r = 0; r < txhpx; ++r) {
1579
1.69M
        for (c = 0; c < txwpx; ++c) {
1580
1.58M
          dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1581
1.58M
        }
1582
109k
      }
1583
47.5k
    } else {
1584
408k
      for (r = 0; r < txhpx; ++r) {
1585
4.08M
        for (c = 0; c < txwpx; ++c) {
1586
3.72M
          dst[r * dst_stride + c] =
1587
3.72M
              (uint8_t)palette[map[(r + y) * wpx + c + x]];
1588
3.72M
        }
1589
361k
      }
1590
47.5k
    }
1591
59.7k
    return;
1592
59.7k
  }
1593
1594
26.8M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
1595
26.8M
  const int txw = tx_size_wide_unit[tx_size];
1596
26.8M
  const int txh = tx_size_high_unit[tx_size];
1597
26.8M
  const int ss_x = pd->subsampling_x;
1598
26.8M
  const int ss_y = pd->subsampling_y;
1599
26.8M
  const int have_top =
1600
26.8M
      row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1601
26.8M
  const int have_left =
1602
26.8M
      col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1603
26.8M
  const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1604
26.8M
  const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1605
1606
  // Distance between the right edge of this prediction block to
1607
  // the frame right edge
1608
26.8M
  const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1609
  // Distance between the bottom edge of this prediction block to
1610
  // the frame bottom edge
1611
26.8M
  const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1612
26.8M
  const int right_available =
1613
26.8M
      mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1614
26.8M
  const int bottom_available =
1615
26.8M
      (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1616
1617
26.8M
  const PARTITION_TYPE partition = mbmi->partition;
1618
1619
26.8M
  BLOCK_SIZE bsize = mbmi->bsize;
1620
  // force 4x4 chroma component block size.
1621
26.8M
  if (ss_x || ss_y) {
1622
7.40M
    bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1623
7.40M
  }
1624
1625
26.8M
  const int have_top_right =
1626
26.8M
      has_top_right(sb_size, bsize, mi_row, mi_col, have_top, right_available,
1627
26.8M
                    partition, tx_size, row_off, col_off, ss_x, ss_y);
1628
26.8M
  const int have_bottom_left = has_bottom_left(
1629
26.8M
      sb_size, bsize, mi_row, mi_col, bottom_available, have_left, partition,
1630
26.8M
      tx_size, row_off, col_off, ss_x, ss_y);
1631
1632
26.8M
  const int disable_edge_filter = !enable_intra_edge_filter;
1633
26.8M
  const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1634
26.8M
#if CONFIG_AV1_HIGHBITDEPTH
1635
26.8M
  if (is_cur_buf_hbd(xd)) {
1636
3.62M
    build_intra_predictors_high(
1637
3.62M
        ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode,
1638
3.62M
        tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1639
3.62M
        have_top_right ? AOMMIN(txwpx, xr) : 0,
1640
3.62M
        have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1641
3.62M
        have_bottom_left ? AOMMIN(txhpx, yd) : 0, intra_edge_filter_type,
1642
3.62M
        xd->bd);
1643
3.62M
    return;
1644
3.62M
  }
1645
23.2M
#endif
1646
23.2M
  build_intra_predictors(
1647
23.2M
      ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode,
1648
23.2M
      tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1649
23.2M
      have_top_right ? AOMMIN(txwpx, xr) : 0,
1650
23.2M
      have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1651
23.2M
      have_bottom_left ? AOMMIN(txhpx, yd) : 0, intra_edge_filter_type);
1652
23.2M
}
1653
1654
void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1655
                                    int plane, int blk_col, int blk_row,
1656
27.1M
                                    TX_SIZE tx_size) {
1657
27.1M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1658
27.1M
  struct macroblockd_plane *const pd = &xd->plane[plane];
1659
27.1M
  const int dst_stride = pd->dst.stride;
1660
27.1M
  uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1661
27.1M
  const PREDICTION_MODE mode =
1662
27.1M
      (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1663
27.1M
  const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1664
27.1M
  const FILTER_INTRA_MODE filter_intra_mode =
1665
27.1M
      (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1666
27.1M
          ? mbmi->filter_intra_mode_info.filter_intra_mode
1667
27.1M
          : FILTER_INTRA_MODES;
1668
27.1M
  const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1669
27.1M
  const SequenceHeader *seq_params = cm->seq_params;
1670
1671
27.1M
  if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1672
#if CONFIG_DEBUG
1673
    assert(is_cfl_allowed(xd));
1674
    const BLOCK_SIZE plane_bsize =
1675
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1676
    (void)plane_bsize;
1677
    assert(plane_bsize < BLOCK_SIZES_ALL);
1678
    if (!xd->lossless[mbmi->segment_id]) {
1679
      assert(blk_col == 0);
1680
      assert(blk_row == 0);
1681
      assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1682
      assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1683
    }
1684
#endif
1685
1.49M
    CFL_CTX *const cfl = &xd->cfl;
1686
1.49M
    CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1687
1.49M
    if (cfl->dc_pred_is_cached[pred_plane] == 0) {
1688
1.26M
      av1_predict_intra_block(xd, seq_params->sb_size,
1689
1.26M
                              seq_params->enable_intra_edge_filter, pd->width,
1690
1.26M
                              pd->height, tx_size, mode, angle_delta,
1691
1.26M
                              use_palette, filter_intra_mode, dst, dst_stride,
1692
1.26M
                              dst, dst_stride, blk_col, blk_row, plane);
1693
1.26M
      if (cfl->use_dc_pred_cache) {
1694
76.8k
        cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1695
76.8k
        cfl->dc_pred_is_cached[pred_plane] = 1;
1696
76.8k
      }
1697
1.26M
    } else {
1698
230k
      cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1699
230k
    }
1700
1.49M
    cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1701
1.49M
    return;
1702
1.49M
  }
1703
25.6M
  av1_predict_intra_block(
1704
25.6M
      xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1705
25.6M
      pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1706
25.6M
      dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1707
25.6M
}
1708
1709
2
void av1_init_intra_predictors(void) {
1710
2
  aom_once(init_intra_predictors_internal);
1711
2
}