Coverage Report

Created: 2025-07-23 06:32

/src/aom/av1/common/reconintra.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <math.h>
14
15
#include "config/aom_config.h"
16
#include "config/aom_dsp_rtcd.h"
17
#include "config/av1_rtcd.h"
18
19
#include "aom_dsp/aom_dsp_common.h"
20
#include "aom_mem/aom_mem.h"
21
#include "aom_ports/aom_once.h"
22
#include "aom_ports/mem.h"
23
#include "av1/common/av1_common_int.h"
24
#include "av1/common/cfl.h"
25
#include "av1/common/reconintra.h"
26
27
enum {
28
  NEED_LEFT = 1 << 1,
29
  NEED_ABOVE = 1 << 2,
30
  NEED_ABOVERIGHT = 1 << 3,
31
  NEED_ABOVELEFT = 1 << 4,
32
  NEED_BOTTOMLEFT = 1 << 5,
33
};
34
35
#define INTRA_EDGE_FILT 3
36
0
#define INTRA_EDGE_TAPS 5
37
#define MAX_UPSAMPLE_SZ 16
38
192M
#define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
39
40
static const uint8_t extend_modes[INTRA_MODES] = {
41
  NEED_ABOVE | NEED_LEFT,                   // DC
42
  NEED_ABOVE,                               // V
43
  NEED_LEFT,                                // H
44
  NEED_ABOVE | NEED_ABOVERIGHT,             // D45
45
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
46
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
47
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
48
  NEED_LEFT | NEED_BOTTOMLEFT,              // D203
49
  NEED_ABOVE | NEED_ABOVERIGHT,             // D67
50
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH
51
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
52
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
53
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
54
};
55
56
// Tables to store if the top-right reference pixels are available. The flags
57
// are represented with bits, packed into 8-bit integers. E.g., for the 32x32
58
// blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
59
// order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
60
// i.e. (table[10 / 8] >> (10 % 8)) & 1.
61
//       . . . .
62
//       . . . .
63
//       . . o .
64
//       . . . .
65
static uint8_t has_tr_4x4[128] = {
66
  255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70
  255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
74
};
75
static uint8_t has_tr_4x8[64] = {
76
  255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
77
  119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
78
  127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
79
  119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
80
  119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
81
};
82
static uint8_t has_tr_8x4[64] = {
83
  255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85
  255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
87
};
88
static uint8_t has_tr_8x8[32] = {
89
  255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90
  255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
91
};
92
static uint8_t has_tr_8x16[16] = {
93
  255, 255, 119, 119, 127, 127, 119, 119,
94
  255, 127, 119, 119, 127, 127, 119, 119,
95
};
96
static uint8_t has_tr_16x8[16] = {
97
  255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
98
};
99
static uint8_t has_tr_16x16[8] = {
100
  255, 85, 119, 85, 127, 85, 119, 85,
101
};
102
static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
103
static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
104
static uint8_t has_tr_32x32[2] = { 95, 87 };
105
static uint8_t has_tr_32x64[1] = { 127 };
106
static uint8_t has_tr_64x32[1] = { 19 };
107
static uint8_t has_tr_64x64[1] = { 7 };
108
static uint8_t has_tr_64x128[1] = { 3 };
109
static uint8_t has_tr_128x64[1] = { 1 };
110
static uint8_t has_tr_128x128[1] = { 1 };
111
static uint8_t has_tr_4x16[32] = {
112
  255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
113
  127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
114
  127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
115
};
116
static uint8_t has_tr_16x4[32] = {
117
  255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118
  127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
119
};
120
static uint8_t has_tr_8x32[8] = {
121
  255, 255, 127, 127, 255, 127, 127, 127,
122
};
123
static uint8_t has_tr_32x8[8] = {
124
  15, 0, 5, 0, 7, 0, 5, 0,
125
};
126
static uint8_t has_tr_16x64[2] = { 255, 127 };
127
static uint8_t has_tr_64x16[2] = { 3, 1 };
128
129
static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
130
  // 4X4
131
  has_tr_4x4,
132
  // 4X8,       8X4,            8X8
133
  has_tr_4x8, has_tr_8x4, has_tr_8x8,
134
  // 8X16,      16X8,           16X16
135
  has_tr_8x16, has_tr_16x8, has_tr_16x16,
136
  // 16X32,     32X16,          32X32
137
  has_tr_16x32, has_tr_32x16, has_tr_32x32,
138
  // 32X64,     64X32,          64X64
139
  has_tr_32x64, has_tr_64x32, has_tr_64x64,
140
  // 64x128,    128x64,         128x128
141
  has_tr_64x128, has_tr_128x64, has_tr_128x128,
142
  // 4x16,      16x4,            8x32
143
  has_tr_4x16, has_tr_16x4, has_tr_8x32,
144
  // 32x8,      16x64,           64x16
145
  has_tr_32x8, has_tr_16x64, has_tr_64x16
146
};
147
148
static uint8_t has_tr_vert_8x8[32] = {
149
  255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150
  255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
151
};
152
static uint8_t has_tr_vert_16x16[8] = {
153
  255, 0, 119, 0, 127, 0, 119, 0,
154
};
155
static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
156
static uint8_t has_tr_vert_64x64[1] = { 3 };
157
158
// The _vert_* tables are like the ordinary tables above, but describe the
159
// order we visit square blocks when doing a PARTITION_VERT_A or
160
// PARTITION_VERT_B. This is the same order as normal except for on the last
161
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
162
// as a pair of squares, which means that these tables work correctly for both
163
// mixed vertical partition types.
164
//
165
// There are tables for each of the square sizes. Vertical rectangles (like
166
// BLOCK_16X32) use their respective "non-vert" table
167
static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
168
  // 4X4
169
  NULL,
170
  // 4X8,      8X4,         8X8
171
  has_tr_4x8, NULL, has_tr_vert_8x8,
172
  // 8X16,     16X8,        16X16
173
  has_tr_8x16, NULL, has_tr_vert_16x16,
174
  // 16X32,    32X16,       32X32
175
  has_tr_16x32, NULL, has_tr_vert_32x32,
176
  // 32X64,    64X32,       64X64
177
  has_tr_32x64, NULL, has_tr_vert_64x64,
178
  // 64x128,   128x64,      128x128
179
  has_tr_64x128, NULL, has_tr_128x128
180
};
181
182
static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
183
635k
                                       BLOCK_SIZE bsize) {
184
635k
  const uint8_t *ret = NULL;
185
  // If this is a mixed vertical partition, look up bsize in orders_vert.
186
635k
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
187
57.3k
    assert(bsize < BLOCK_SIZES);
188
57.3k
    ret = has_tr_vert_tables[bsize];
189
577k
  } else {
190
577k
    ret = has_tr_tables[bsize];
191
577k
  }
192
635k
  assert(ret);
193
635k
  return ret;
194
635k
}
195
196
static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
197
                         int mi_col, int top_available, int right_available,
198
                         PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
199
1.20M
                         int col_off, int ss_x, int ss_y) {
200
1.20M
  if (!top_available || !right_available) return 0;
201
202
1.12M
  const int bw_unit = mi_size_wide[bsize];
203
1.12M
  const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
204
1.12M
  const int top_right_count_unit = tx_size_wide_unit[txsz];
205
206
1.12M
  if (row_off > 0) {  // Just need to check if enough pixels on the right.
207
211k
    if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
208
      // Special case: For 128x128 blocks, the transform unit whose
209
      // top-right corner is at the center of the block does in fact have
210
      // pixels available at its top-right corner.
211
123k
      if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
212
123k
          col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
213
15.1k
        return 1;
214
15.1k
      }
215
108k
      const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
216
108k
      const int col_off_64 = col_off % plane_bw_unit_64;
217
108k
      return col_off_64 + top_right_count_unit < plane_bw_unit_64;
218
123k
    }
219
87.8k
    return col_off + top_right_count_unit < plane_bw_unit;
220
911k
  } else {
221
    // All top-right pixels are in the block above, which is already available.
222
911k
    if (col_off + top_right_count_unit < plane_bw_unit) return 1;
223
224
862k
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
225
862k
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
226
862k
    const int sb_mi_size = mi_size_high[sb_size];
227
862k
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
228
862k
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
229
230
    // Top row of superblock: so top-right pixels are in the top and/or
231
    // top-right superblocks, both of which are already available.
232
862k
    if (blk_row_in_sb == 0) return 1;
233
234
    // Rightmost column of superblock (and not the top row): so top-right pixels
235
    // fall in the right superblock, which is not available yet.
236
760k
    if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
237
125k
      return 0;
238
125k
    }
239
240
    // General case (neither top row nor rightmost column): check if the
241
    // top-right block is coded before the current block.
242
635k
    const int this_blk_index =
243
635k
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
244
635k
        blk_col_in_sb + 0;
245
635k
    const int idx1 = this_blk_index / 8;
246
635k
    const int idx2 = this_blk_index % 8;
247
635k
    const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
248
635k
    return (has_tr_table[idx1] >> idx2) & 1;
249
760k
  }
250
1.12M
}
251
252
// Similar to the has_tr_* tables, but store if the bottom-left reference
253
// pixels are available.
254
static uint8_t has_bl_4x4[128] = {
255
  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
256
  85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
257
  17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
258
  85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
259
  0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
260
  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
261
  85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
262
};
263
static uint8_t has_bl_4x8[64] = {
264
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
265
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
266
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
267
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
268
};
269
static uint8_t has_bl_8x4[64] = {
270
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
271
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
272
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
273
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
274
};
275
static uint8_t has_bl_8x8[32] = {
276
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
278
};
279
static uint8_t has_bl_8x16[16] = {
280
  16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
281
};
282
static uint8_t has_bl_16x8[16] = {
283
  254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
284
};
285
static uint8_t has_bl_16x16[8] = {
286
  84, 16, 84, 0, 84, 16, 84, 0,
287
};
288
static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
289
static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
290
static uint8_t has_bl_32x32[2] = { 4, 4 };
291
static uint8_t has_bl_32x64[1] = { 0 };
292
static uint8_t has_bl_64x32[1] = { 34 };
293
static uint8_t has_bl_64x64[1] = { 0 };
294
static uint8_t has_bl_64x128[1] = { 0 };
295
static uint8_t has_bl_128x64[1] = { 0 };
296
static uint8_t has_bl_128x128[1] = { 0 };
297
static uint8_t has_bl_4x16[32] = {
298
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
300
};
301
static uint8_t has_bl_16x4[32] = {
302
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
304
};
305
static uint8_t has_bl_8x32[8] = {
306
  0, 1, 0, 0, 0, 1, 0, 0,
307
};
308
static uint8_t has_bl_32x8[8] = {
309
  238, 78, 238, 14, 238, 78, 238, 14,
310
};
311
static uint8_t has_bl_16x64[2] = { 0, 0 };
312
static uint8_t has_bl_64x16[2] = { 42, 42 };
313
314
static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
315
  // 4X4
316
  has_bl_4x4,
317
  // 4X8,         8X4,         8X8
318
  has_bl_4x8, has_bl_8x4, has_bl_8x8,
319
  // 8X16,        16X8,        16X16
320
  has_bl_8x16, has_bl_16x8, has_bl_16x16,
321
  // 16X32,       32X16,       32X32
322
  has_bl_16x32, has_bl_32x16, has_bl_32x32,
323
  // 32X64,       64X32,       64X64
324
  has_bl_32x64, has_bl_64x32, has_bl_64x64,
325
  // 64x128,      128x64,      128x128
326
  has_bl_64x128, has_bl_128x64, has_bl_128x128,
327
  // 4x16,        16x4,        8x32
328
  has_bl_4x16, has_bl_16x4, has_bl_8x32,
329
  // 32x8,        16x64,       64x16
330
  has_bl_32x8, has_bl_16x64, has_bl_64x16
331
};
332
333
static uint8_t has_bl_vert_8x8[32] = {
334
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
336
};
337
static uint8_t has_bl_vert_16x16[8] = {
338
  254, 16, 254, 0, 254, 16, 254, 0,
339
};
340
static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
341
static uint8_t has_bl_vert_64x64[1] = { 2 };
342
343
// The _vert_* tables are like the ordinary tables above, but describe the
344
// order we visit square blocks when doing a PARTITION_VERT_A or
345
// PARTITION_VERT_B. This is the same order as normal except for on the last
346
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
347
// as a pair of squares, which means that these tables work correctly for both
348
// mixed vertical partition types.
349
//
350
// There are tables for each of the square sizes. Vertical rectangles (like
351
// BLOCK_16X32) use their respective "non-vert" table
352
static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
353
  // 4X4
354
  NULL,
355
  // 4X8,     8X4,         8X8
356
  has_bl_4x8, NULL, has_bl_vert_8x8,
357
  // 8X16,    16X8,        16X16
358
  has_bl_8x16, NULL, has_bl_vert_16x16,
359
  // 16X32,   32X16,       32X32
360
  has_bl_16x32, NULL, has_bl_vert_32x32,
361
  // 32X64,   64X32,       64X64
362
  has_bl_32x64, NULL, has_bl_vert_64x64,
363
  // 64x128,  128x64,      128x128
364
  has_bl_64x128, NULL, has_bl_128x128
365
};
366
367
static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
368
869k
                                       BLOCK_SIZE bsize) {
369
869k
  const uint8_t *ret = NULL;
370
  // If this is a mixed vertical partition, look up bsize in orders_vert.
371
869k
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
372
68.4k
    assert(bsize < BLOCK_SIZES);
373
68.4k
    ret = has_bl_vert_tables[bsize];
374
800k
  } else {
375
800k
    ret = has_bl_tables[bsize];
376
800k
  }
377
869k
  assert(ret);
378
869k
  return ret;
379
869k
}
380
381
static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
382
                           int mi_col, int bottom_available, int left_available,
383
                           PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
384
1.93M
                           int col_off, int ss_x, int ss_y) {
385
1.93M
  if (!bottom_available || !left_available) return 0;
386
387
  // Special case for 128x* blocks, when col_off is half the block width.
388
  // This is needed because 128x* superblocks are divided into 64x* blocks in
389
  // raster order
390
1.67M
  if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
391
156k
    const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
392
156k
    const int col_off_64 = col_off % plane_bw_unit_64;
393
156k
    if (col_off_64 == 0) {
394
      // We are at the left edge of top-right or bottom-right 64x* block.
395
71.1k
      const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
396
71.1k
      const int row_off_64 = row_off % plane_bh_unit_64;
397
71.1k
      const int plane_bh_unit =
398
71.1k
          AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
399
      // Check if all bottom-left pixels are in the left 64x* block (which is
400
      // already coded).
401
71.1k
      return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
402
71.1k
    }
403
156k
  }
404
405
1.59M
  if (col_off > 0) {
406
    // Bottom-left pixels are in the bottom-left block, which is not available.
407
326k
    return 0;
408
1.27M
  } else {
409
1.27M
    const int bh_unit = mi_size_high[bsize];
410
1.27M
    const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
411
1.27M
    const int bottom_left_count_unit = tx_size_high_unit[txsz];
412
413
    // All bottom-left pixels are in the left block, which is already available.
414
1.27M
    if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
415
416
1.20M
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
417
1.20M
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
418
1.20M
    const int sb_mi_size = mi_size_high[sb_size];
419
1.20M
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
420
1.20M
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
421
422
    // Leftmost column of superblock: so bottom-left pixels maybe in the left
423
    // and/or bottom-left superblocks. But only the left superblock is
424
    // available, so check if all required pixels fall in that superblock.
425
1.20M
    if (blk_col_in_sb == 0) {
426
190k
      const int blk_start_row_off =
427
190k
          blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
428
190k
          ss_y;
429
190k
      const int row_off_in_sb = blk_start_row_off + row_off;
430
190k
      const int sb_height_unit = sb_mi_size >> ss_y;
431
190k
      return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
432
190k
    }
433
434
    // Bottom row of superblock (and not the leftmost column): so bottom-left
435
    // pixels fall in the bottom superblock, which is not available yet.
436
1.01M
    if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
437
438
    // General case (neither leftmost column nor bottom row): check if the
439
    // bottom-left block is coded before the current block.
440
869k
    const int this_blk_index =
441
869k
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
442
869k
        blk_col_in_sb + 0;
443
869k
    const int idx1 = this_blk_index / 8;
444
869k
    const int idx2 = this_blk_index % 8;
445
869k
    const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
446
869k
    return (has_bl_table[idx1] >> idx2) & 1;
447
1.01M
  }
448
1.59M
}
449
450
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
451
                              const uint8_t *above, const uint8_t *left);
452
453
static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
454
static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
455
456
#if CONFIG_AV1_HIGHBITDEPTH
457
typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
458
                                   const uint16_t *above, const uint16_t *left,
459
                                   int bd);
460
static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
461
static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
462
#endif
463
464
1
static void init_intra_predictors_internal(void) {
465
1
  assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
466
467
#if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
468
#define INIT_RECTANGULAR(p, type)             \
469
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
470
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
471
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
472
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
473
  p[TX_16X32] = aom_##type##_predictor_16x32; \
474
  p[TX_32X16] = aom_##type##_predictor_32x16; \
475
  p[TX_32X64] = aom_##type##_predictor_32x64; \
476
  p[TX_64X32] = aom_##type##_predictor_64x32;
477
#else
478
1
#define INIT_RECTANGULAR(p, type)             \
479
20
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
480
20
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
481
20
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
482
20
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
483
20
  p[TX_16X32] = aom_##type##_predictor_16x32; \
484
20
  p[TX_32X16] = aom_##type##_predictor_32x16; \
485
20
  p[TX_32X64] = aom_##type##_predictor_32x64; \
486
20
  p[TX_64X32] = aom_##type##_predictor_64x32; \
487
20
  p[TX_4X16] = aom_##type##_predictor_4x16;   \
488
20
  p[TX_16X4] = aom_##type##_predictor_16x4;   \
489
20
  p[TX_8X32] = aom_##type##_predictor_8x32;   \
490
20
  p[TX_32X8] = aom_##type##_predictor_32x8;   \
491
20
  p[TX_16X64] = aom_##type##_predictor_16x64; \
492
20
  p[TX_64X16] = aom_##type##_predictor_64x16;
493
1
#endif  // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
494
495
1
#define INIT_NO_4X4(p, type)                  \
496
20
  p[TX_8X8] = aom_##type##_predictor_8x8;     \
497
20
  p[TX_16X16] = aom_##type##_predictor_16x16; \
498
20
  p[TX_32X32] = aom_##type##_predictor_32x32; \
499
20
  p[TX_64X64] = aom_##type##_predictor_64x64; \
500
20
  INIT_RECTANGULAR(p, type)
501
502
1
#define INIT_ALL_SIZES(p, type)           \
503
20
  p[TX_4X4] = aom_##type##_predictor_4x4; \
504
20
  INIT_NO_4X4(p, type)
505
506
1
  INIT_ALL_SIZES(pred[V_PRED], v)
507
1
  INIT_ALL_SIZES(pred[H_PRED], h)
508
1
  INIT_ALL_SIZES(pred[PAETH_PRED], paeth)
509
1
  INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth)
510
1
  INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v)
511
1
  INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h)
512
1
  INIT_ALL_SIZES(dc_pred[0][0], dc_128)
513
1
  INIT_ALL_SIZES(dc_pred[0][1], dc_top)
514
1
  INIT_ALL_SIZES(dc_pred[1][0], dc_left)
515
1
  INIT_ALL_SIZES(dc_pred[1][1], dc)
516
1
#if CONFIG_AV1_HIGHBITDEPTH
517
1
  INIT_ALL_SIZES(pred_high[V_PRED], highbd_v)
518
1
  INIT_ALL_SIZES(pred_high[H_PRED], highbd_h)
519
1
  INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth)
520
1
  INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth)
521
1
  INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v)
522
1
  INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h)
523
1
  INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128)
524
1
  INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top)
525
1
  INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left)
526
1
  INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc)
527
1
#endif
528
1
#undef intra_pred_allsizes
529
1
}
530
531
// Directional prediction, zone 1: 0 < angle < 90
532
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
533
                            const uint8_t *above, const uint8_t *left,
534
0
                            int upsample_above, int dx, int dy) {
535
0
  int r, c, x, base, shift, val;
536
537
0
  (void)left;
538
0
  (void)dy;
539
0
  assert(dy == 1);
540
0
  assert(dx > 0);
541
542
0
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
543
0
  const int frac_bits = 6 - upsample_above;
544
0
  const int base_inc = 1 << upsample_above;
545
0
  x = dx;
546
0
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
547
0
    base = x >> frac_bits;
548
0
    shift = ((x << upsample_above) & 0x3F) >> 1;
549
550
0
    if (base >= max_base_x) {
551
0
      for (int i = r; i < bh; ++i) {
552
0
        memset(dst, above[max_base_x], bw * sizeof(dst[0]));
553
0
        dst += stride;
554
0
      }
555
0
      return;
556
0
    }
557
558
0
    for (c = 0; c < bw; ++c, base += base_inc) {
559
0
      if (base < max_base_x) {
560
0
        val = above[base] * (32 - shift) + above[base + 1] * shift;
561
0
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
562
0
      } else {
563
0
        dst[c] = above[max_base_x];
564
0
      }
565
0
    }
566
0
  }
567
0
}
568
569
// Directional prediction, zone 2: 90 < angle < 180
570
void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
571
                            const uint8_t *above, const uint8_t *left,
572
                            int upsample_above, int upsample_left, int dx,
573
0
                            int dy) {
574
0
  assert(dx > 0);
575
0
  assert(dy > 0);
576
577
0
  const int min_base_x = -(1 << upsample_above);
578
0
  const int min_base_y = -(1 << upsample_left);
579
0
  (void)min_base_y;
580
0
  const int frac_bits_x = 6 - upsample_above;
581
0
  const int frac_bits_y = 6 - upsample_left;
582
583
0
  for (int r = 0; r < bh; ++r) {
584
0
    for (int c = 0; c < bw; ++c) {
585
0
      int val;
586
0
      int y = r + 1;
587
0
      int x = (c << 6) - y * dx;
588
0
      const int base_x = x >> frac_bits_x;
589
0
      if (base_x >= min_base_x) {
590
0
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
591
0
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
592
0
        val = ROUND_POWER_OF_TWO(val, 5);
593
0
      } else {
594
0
        x = c + 1;
595
0
        y = (r << 6) - x * dy;
596
0
        const int base_y = y >> frac_bits_y;
597
0
        assert(base_y >= min_base_y);
598
0
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
599
0
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
600
0
        val = ROUND_POWER_OF_TWO(val, 5);
601
0
      }
602
0
      dst[c] = val;
603
0
    }
604
0
    dst += stride;
605
0
  }
606
0
}
607
608
// Directional prediction, zone 3: 180 < angle < 270
609
void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
610
                            const uint8_t *above, const uint8_t *left,
611
0
                            int upsample_left, int dx, int dy) {
612
0
  int r, c, y, base, shift, val;
613
614
0
  (void)above;
615
0
  (void)dx;
616
617
0
  assert(dx == 1);
618
0
  assert(dy > 0);
619
620
0
  const int max_base_y = (bw + bh - 1) << upsample_left;
621
0
  const int frac_bits = 6 - upsample_left;
622
0
  const int base_inc = 1 << upsample_left;
623
0
  y = dy;
624
0
  for (c = 0; c < bw; ++c, y += dy) {
625
0
    base = y >> frac_bits;
626
0
    shift = ((y << upsample_left) & 0x3F) >> 1;
627
628
0
    for (r = 0; r < bh; ++r, base += base_inc) {
629
0
      if (base < max_base_y) {
630
0
        val = left[base] * (32 - shift) + left[base + 1] * shift;
631
0
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
632
0
      } else {
633
0
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
634
0
        break;
635
0
      }
636
0
    }
637
0
  }
638
0
}
639
640
static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
641
                         const uint8_t *above, const uint8_t *left,
642
3.74M
                         int upsample_above, int upsample_left, int angle) {
643
3.74M
  const int dx = av1_get_dx(angle);
644
3.74M
  const int dy = av1_get_dy(angle);
645
3.74M
  const int bw = tx_size_wide[tx_size];
646
3.74M
  const int bh = tx_size_high[tx_size];
647
3.74M
  assert(angle > 0 && angle < 270);
648
649
3.74M
  if (angle > 0 && angle < 90) {
650
411k
    av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
651
411k
                         dy);
652
3.32M
  } else if (angle > 90 && angle < 180) {
653
991k
    av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
654
991k
                         upsample_left, dx, dy);
655
2.33M
  } else if (angle > 180 && angle < 270) {
656
630k
    av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
657
630k
                         dy);
658
1.70M
  } else if (angle == 90) {
659
445k
    pred[V_PRED][tx_size](dst, stride, above, left);
660
1.26M
  } else if (angle == 180) {
661
1.26M
    pred[H_PRED][tx_size](dst, stride, above, left);
662
1.26M
  }
663
3.74M
}
664
665
#if CONFIG_AV1_HIGHBITDEPTH
666
// Directional prediction, zone 1: 0 < angle < 90
667
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
668
                                   int bh, const uint16_t *above,
669
                                   const uint16_t *left, int upsample_above,
670
0
                                   int dx, int dy, int bd) {
671
0
  int r, c, x, base, shift, val;
672
673
0
  (void)left;
674
0
  (void)dy;
675
0
  (void)bd;
676
0
  assert(dy == 1);
677
0
  assert(dx > 0);
678
679
0
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
680
0
  const int frac_bits = 6 - upsample_above;
681
0
  const int base_inc = 1 << upsample_above;
682
0
  x = dx;
683
0
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
684
0
    base = x >> frac_bits;
685
0
    shift = ((x << upsample_above) & 0x3F) >> 1;
686
687
0
    if (base >= max_base_x) {
688
0
      for (int i = r; i < bh; ++i) {
689
0
        aom_memset16(dst, above[max_base_x], bw);
690
0
        dst += stride;
691
0
      }
692
0
      return;
693
0
    }
694
695
0
    for (c = 0; c < bw; ++c, base += base_inc) {
696
0
      if (base < max_base_x) {
697
0
        val = above[base] * (32 - shift) + above[base + 1] * shift;
698
0
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
699
0
      } else {
700
0
        dst[c] = above[max_base_x];
701
0
      }
702
0
    }
703
0
  }
704
0
}
705
706
// Directional prediction, zone 2: 90 < angle < 180
707
void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
708
                                   int bh, const uint16_t *above,
709
                                   const uint16_t *left, int upsample_above,
710
0
                                   int upsample_left, int dx, int dy, int bd) {
711
0
  (void)bd;
712
0
  assert(dx > 0);
713
0
  assert(dy > 0);
714
715
0
  const int min_base_x = -(1 << upsample_above);
716
0
  const int min_base_y = -(1 << upsample_left);
717
0
  (void)min_base_y;
718
0
  const int frac_bits_x = 6 - upsample_above;
719
0
  const int frac_bits_y = 6 - upsample_left;
720
721
0
  for (int r = 0; r < bh; ++r) {
722
0
    for (int c = 0; c < bw; ++c) {
723
0
      int val;
724
0
      int y = r + 1;
725
0
      int x = (c << 6) - y * dx;
726
0
      const int base_x = x >> frac_bits_x;
727
0
      if (base_x >= min_base_x) {
728
0
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
729
0
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
730
0
        val = ROUND_POWER_OF_TWO(val, 5);
731
0
      } else {
732
0
        x = c + 1;
733
0
        y = (r << 6) - x * dy;
734
0
        const int base_y = y >> frac_bits_y;
735
0
        assert(base_y >= min_base_y);
736
0
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
737
0
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
738
0
        val = ROUND_POWER_OF_TWO(val, 5);
739
0
      }
740
0
      dst[c] = val;
741
0
    }
742
0
    dst += stride;
743
0
  }
744
0
}
745
746
// Directional prediction, zone 3: 180 < angle < 270
747
void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
748
                                   int bh, const uint16_t *above,
749
                                   const uint16_t *left, int upsample_left,
750
0
                                   int dx, int dy, int bd) {
751
0
  int r, c, y, base, shift, val;
752
753
0
  (void)above;
754
0
  (void)dx;
755
0
  (void)bd;
756
0
  assert(dx == 1);
757
0
  assert(dy > 0);
758
759
0
  const int max_base_y = (bw + bh - 1) << upsample_left;
760
0
  const int frac_bits = 6 - upsample_left;
761
0
  const int base_inc = 1 << upsample_left;
762
0
  y = dy;
763
0
  for (c = 0; c < bw; ++c, y += dy) {
764
0
    base = y >> frac_bits;
765
0
    shift = ((y << upsample_left) & 0x3F) >> 1;
766
767
0
    for (r = 0; r < bh; ++r, base += base_inc) {
768
0
      if (base < max_base_y) {
769
0
        val = left[base] * (32 - shift) + left[base + 1] * shift;
770
0
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
771
0
      } else {
772
0
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
773
0
        break;
774
0
      }
775
0
    }
776
0
  }
777
0
}
778
779
static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
780
                                TX_SIZE tx_size, const uint16_t *above,
781
                                const uint16_t *left, int upsample_above,
782
5.43M
                                int upsample_left, int angle, int bd) {
783
5.43M
  const int dx = av1_get_dx(angle);
784
5.43M
  const int dy = av1_get_dy(angle);
785
5.43M
  const int bw = tx_size_wide[tx_size];
786
5.43M
  const int bh = tx_size_high[tx_size];
787
5.43M
  assert(angle > 0 && angle < 270);
788
789
5.43M
  if (angle > 0 && angle < 90) {
790
736k
    av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
791
736k
                                upsample_above, dx, dy, bd);
792
4.69M
  } else if (angle > 90 && angle < 180) {
793
1.49M
    av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
794
1.49M
                                upsample_above, upsample_left, dx, dy, bd);
795
3.20M
  } else if (angle > 180 && angle < 270) {
796
1.06M
    av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
797
1.06M
                                dx, dy, bd);
798
2.14M
  } else if (angle == 90) {
799
609k
    pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
800
1.53M
  } else if (angle == 180) {
801
1.53M
    pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
802
1.53M
  }
803
5.43M
}
804
#endif  // CONFIG_AV1_HIGHBITDEPTH
805
806
DECLARE_ALIGNED(16, const int8_t,
807
                av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
808
  {
809
      { -6, 10, 0, 0, 0, 12, 0, 0 },
810
      { -5, 2, 10, 0, 0, 9, 0, 0 },
811
      { -3, 1, 1, 10, 0, 7, 0, 0 },
812
      { -3, 1, 1, 2, 10, 5, 0, 0 },
813
      { -4, 6, 0, 0, 0, 2, 12, 0 },
814
      { -3, 2, 6, 0, 0, 2, 9, 0 },
815
      { -3, 2, 2, 6, 0, 2, 7, 0 },
816
      { -3, 1, 2, 2, 6, 3, 5, 0 },
817
  },
818
  {
819
      { -10, 16, 0, 0, 0, 10, 0, 0 },
820
      { -6, 0, 16, 0, 0, 6, 0, 0 },
821
      { -4, 0, 0, 16, 0, 4, 0, 0 },
822
      { -2, 0, 0, 0, 16, 2, 0, 0 },
823
      { -10, 16, 0, 0, 0, 0, 10, 0 },
824
      { -6, 0, 16, 0, 0, 0, 6, 0 },
825
      { -4, 0, 0, 16, 0, 0, 4, 0 },
826
      { -2, 0, 0, 0, 16, 0, 2, 0 },
827
  },
828
  {
829
      { -8, 8, 0, 0, 0, 16, 0, 0 },
830
      { -8, 0, 8, 0, 0, 16, 0, 0 },
831
      { -8, 0, 0, 8, 0, 16, 0, 0 },
832
      { -8, 0, 0, 0, 8, 16, 0, 0 },
833
      { -4, 4, 0, 0, 0, 0, 16, 0 },
834
      { -4, 0, 4, 0, 0, 0, 16, 0 },
835
      { -4, 0, 0, 4, 0, 0, 16, 0 },
836
      { -4, 0, 0, 0, 4, 0, 16, 0 },
837
  },
838
  {
839
      { -2, 8, 0, 0, 0, 10, 0, 0 },
840
      { -1, 3, 8, 0, 0, 6, 0, 0 },
841
      { -1, 2, 3, 8, 0, 4, 0, 0 },
842
      { 0, 1, 2, 3, 8, 2, 0, 0 },
843
      { -1, 4, 0, 0, 0, 3, 10, 0 },
844
      { -1, 3, 4, 0, 0, 4, 6, 0 },
845
      { -1, 2, 3, 4, 0, 4, 4, 0 },
846
      { -1, 2, 2, 3, 4, 3, 3, 0 },
847
  },
848
  {
849
      { -12, 14, 0, 0, 0, 14, 0, 0 },
850
      { -10, 0, 14, 0, 0, 12, 0, 0 },
851
      { -9, 0, 0, 14, 0, 11, 0, 0 },
852
      { -8, 0, 0, 0, 14, 10, 0, 0 },
853
      { -10, 12, 0, 0, 0, 0, 14, 0 },
854
      { -9, 1, 12, 0, 0, 0, 12, 0 },
855
      { -8, 0, 0, 12, 0, 1, 11, 0 },
856
      { -7, 0, 0, 1, 12, 1, 9, 0 },
857
  },
858
};
859
860
void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
861
                                  TX_SIZE tx_size, const uint8_t *above,
862
0
                                  const uint8_t *left, int mode) {
863
0
  int r, c;
864
0
  uint8_t buffer[33][33];
865
0
  const int bw = tx_size_wide[tx_size];
866
0
  const int bh = tx_size_high[tx_size];
867
868
0
  assert(bw <= 32 && bh <= 32);
869
870
0
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
871
0
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
872
873
0
  for (r = 1; r < bh + 1; r += 2)
874
0
    for (c = 1; c < bw + 1; c += 4) {
875
0
      const uint8_t p0 = buffer[r - 1][c - 1];
876
0
      const uint8_t p1 = buffer[r - 1][c];
877
0
      const uint8_t p2 = buffer[r - 1][c + 1];
878
0
      const uint8_t p3 = buffer[r - 1][c + 2];
879
0
      const uint8_t p4 = buffer[r - 1][c + 3];
880
0
      const uint8_t p5 = buffer[r][c - 1];
881
0
      const uint8_t p6 = buffer[r + 1][c - 1];
882
0
      for (int k = 0; k < 8; ++k) {
883
0
        int r_offset = k >> 2;
884
0
        int c_offset = k & 0x03;
885
0
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
886
0
                 av1_filter_intra_taps[mode][k][1] * p1 +
887
0
                 av1_filter_intra_taps[mode][k][2] * p2 +
888
0
                 av1_filter_intra_taps[mode][k][3] * p3 +
889
0
                 av1_filter_intra_taps[mode][k][4] * p4 +
890
0
                 av1_filter_intra_taps[mode][k][5] * p5 +
891
0
                 av1_filter_intra_taps[mode][k][6] * p6;
892
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
893
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
894
        // Since Clip1() clips a negative value to 0, it is safe to replace
895
        // Round2Signed() with Round2().
896
0
        buffer[r + r_offset][c + c_offset] =
897
0
            clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
898
0
      }
899
0
    }
900
901
0
  for (r = 0; r < bh; ++r) {
902
0
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
903
0
    dst += stride;
904
0
  }
905
0
}
906
907
#if CONFIG_AV1_HIGHBITDEPTH
908
static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
909
                                          TX_SIZE tx_size,
910
                                          const uint16_t *above,
911
                                          const uint16_t *left, int mode,
912
603k
                                          int bd) {
913
603k
  int r, c;
914
603k
  uint16_t buffer[33][33];
915
603k
  const int bw = tx_size_wide[tx_size];
916
603k
  const int bh = tx_size_high[tx_size];
917
918
603k
  assert(bw <= 32 && bh <= 32);
919
920
6.45M
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
921
603k
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
922
923
3.52M
  for (r = 1; r < bh + 1; r += 2)
924
12.9M
    for (c = 1; c < bw + 1; c += 4) {
925
9.99M
      const uint16_t p0 = buffer[r - 1][c - 1];
926
9.99M
      const uint16_t p1 = buffer[r - 1][c];
927
9.99M
      const uint16_t p2 = buffer[r - 1][c + 1];
928
9.99M
      const uint16_t p3 = buffer[r - 1][c + 2];
929
9.99M
      const uint16_t p4 = buffer[r - 1][c + 3];
930
9.99M
      const uint16_t p5 = buffer[r][c - 1];
931
9.99M
      const uint16_t p6 = buffer[r + 1][c - 1];
932
89.9M
      for (int k = 0; k < 8; ++k) {
933
79.9M
        int r_offset = k >> 2;
934
79.9M
        int c_offset = k & 0x03;
935
79.9M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
936
79.9M
                 av1_filter_intra_taps[mode][k][1] * p1 +
937
79.9M
                 av1_filter_intra_taps[mode][k][2] * p2 +
938
79.9M
                 av1_filter_intra_taps[mode][k][3] * p3 +
939
79.9M
                 av1_filter_intra_taps[mode][k][4] * p4 +
940
79.9M
                 av1_filter_intra_taps[mode][k][5] * p5 +
941
79.9M
                 av1_filter_intra_taps[mode][k][6] * p6;
942
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
943
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
944
        // Since Clip1() clips a negative value to 0, it is safe to replace
945
        // Round2Signed() with Round2().
946
79.9M
        buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
947
79.9M
            ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
948
79.9M
      }
949
9.99M
    }
950
951
6.45M
  for (r = 0; r < bh; ++r) {
952
5.85M
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
953
5.85M
    dst += stride;
954
5.85M
  }
955
603k
}
956
#endif  // CONFIG_AV1_HIGHBITDEPTH
957
958
18.0M
static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
959
18.0M
  if (plane == 0) {
960
8.77M
    const PREDICTION_MODE mode = mbmi->mode;
961
8.77M
    return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
962
8.77M
            mode == SMOOTH_H_PRED);
963
9.23M
  } else {
964
    // uv_mode is not set for inter blocks, so need to explicitly
965
    // detect that case.
966
9.23M
    if (is_inter_block(mbmi)) return 0;
967
968
8.32M
    const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
969
8.32M
    return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
970
8.32M
            uv_mode == UV_SMOOTH_H_PRED);
971
9.23M
  }
972
18.0M
}
973
974
10.7M
static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
975
10.7M
  const MB_MODE_INFO *above;
976
10.7M
  const MB_MODE_INFO *left;
977
978
10.7M
  if (plane == 0) {
979
5.44M
    above = xd->above_mbmi;
980
5.44M
    left = xd->left_mbmi;
981
5.44M
  } else {
982
5.25M
    above = xd->chroma_above_mbmi;
983
5.25M
    left = xd->chroma_left_mbmi;
984
5.25M
  }
985
986
10.7M
  return (above && is_smooth(above, plane)) || (left && is_smooth(left, plane));
987
10.7M
}
988
989
5.90M
static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
990
5.90M
  const int d = abs(delta);
991
5.90M
  int strength = 0;
992
993
5.90M
  const int blk_wh = bs0 + bs1;
994
5.90M
  if (type == 0) {
995
4.61M
    if (blk_wh <= 8) {
996
1.10M
      if (d >= 56) strength = 1;
997
3.50M
    } else if (blk_wh <= 12) {
998
438k
      if (d >= 40) strength = 1;
999
3.07M
    } else if (blk_wh <= 16) {
1000
660k
      if (d >= 40) strength = 1;
1001
2.41M
    } else if (blk_wh <= 24) {
1002
908k
      if (d >= 8) strength = 1;
1003
908k
      if (d >= 16) strength = 2;
1004
908k
      if (d >= 32) strength = 3;
1005
1.50M
    } else if (blk_wh <= 32) {
1006
433k
      if (d >= 1) strength = 1;
1007
433k
      if (d >= 4) strength = 2;
1008
433k
      if (d >= 32) strength = 3;
1009
1.06M
    } else {
1010
1.06M
      if (d >= 1) strength = 3;
1011
1.06M
    }
1012
4.61M
  } else {
1013
1.28M
    if (blk_wh <= 8) {
1014
204k
      if (d >= 40) strength = 1;
1015
204k
      if (d >= 64) strength = 2;
1016
1.08M
    } else if (blk_wh <= 16) {
1017
356k
      if (d >= 20) strength = 1;
1018
356k
      if (d >= 48) strength = 2;
1019
728k
    } else if (blk_wh <= 24) {
1020
289k
      if (d >= 4) strength = 3;
1021
439k
    } else {
1022
439k
      if (d >= 1) strength = 3;
1023
439k
    }
1024
1.28M
  }
1025
5.90M
  return strength;
1026
5.90M
}
1027
1028
0
void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1029
0
  if (!strength) return;
1030
1031
0
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1032
0
                                                         { 0, 5, 6, 5, 0 },
1033
0
                                                         { 2, 4, 4, 4, 2 } };
1034
0
  const int filt = strength - 1;
1035
0
  uint8_t edge[129];
1036
1037
0
  memcpy(edge, p, sz * sizeof(*p));
1038
0
  for (int i = 1; i < sz; i++) {
1039
0
    int s = 0;
1040
0
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1041
0
      int k = i - 2 + j;
1042
0
      k = (k < 0) ? 0 : k;
1043
0
      k = (k > sz - 1) ? sz - 1 : k;
1044
0
      s += edge[k] * kernel[filt][j];
1045
0
    }
1046
0
    s = (s + 8) >> 4;
1047
0
    p[i] = s;
1048
0
  }
1049
0
}
1050
1051
371k
static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1052
371k
  const int kernel[3] = { 5, 6, 5 };
1053
1054
371k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1055
371k
          (p_above[0] * kernel[2]);
1056
371k
  s = (s + 8) >> 4;
1057
371k
  p_above[-1] = s;
1058
371k
  p_left[-1] = s;
1059
371k
}
1060
1061
0
void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1062
  // interpolate half-sample positions
1063
0
  assert(sz <= MAX_UPSAMPLE_SZ);
1064
1065
0
  uint8_t in[MAX_UPSAMPLE_SZ + 3];
1066
  // copy p[-1..(sz-1)] and extend first and last samples
1067
0
  in[0] = p[-1];
1068
0
  in[1] = p[-1];
1069
0
  for (int i = 0; i < sz; i++) {
1070
0
    in[i + 2] = p[i];
1071
0
  }
1072
0
  in[sz + 2] = p[sz - 1];
1073
1074
  // interpolate half-sample edge positions
1075
0
  p[-2] = in[0];
1076
0
  for (int i = 0; i < sz; i++) {
1077
0
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1078
0
    s = clip_pixel((s + 8) >> 4);
1079
0
    p[2 * i - 1] = s;
1080
0
    p[2 * i] = in[i + 2];
1081
0
  }
1082
0
}
1083
1084
static void build_directional_and_filter_intra_predictors(
1085
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1086
    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1087
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1088
4.37M
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1089
4.37M
  int i;
1090
4.37M
  const uint8_t *above_ref = ref - ref_stride;
1091
4.37M
  const uint8_t *left_ref = ref - 1;
1092
4.37M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1093
4.37M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1094
4.37M
  uint8_t *const above_row = above_data + 16;
1095
4.37M
  uint8_t *const left_col = left_data + 16;
1096
4.37M
  const int txwpx = tx_size_wide[tx_size];
1097
4.37M
  const int txhpx = tx_size_high[tx_size];
1098
4.37M
  int need_left = extend_modes[mode] & NEED_LEFT;
1099
4.37M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1100
4.37M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1101
4.37M
  const int is_dr_mode = av1_is_directional_mode(mode);
1102
4.37M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1103
4.37M
  assert(use_filter_intra || is_dr_mode);
1104
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1105
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1106
  // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1107
  // be the potential reason for this issue.
1108
4.37M
  memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1109
4.37M
  memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1110
1111
  // The default values if ref pixels are not available:
1112
  // 128 127 127 .. 127 127 127 127 127 127
1113
  // 129  A   B  ..  Y   Z
1114
  // 129  C   D  ..  W   X
1115
  // 129  E   F  ..  U   V
1116
  // 129  G   H  ..  S   T   T   T   T   T
1117
  // ..
1118
1119
4.37M
  if (is_dr_mode) {
1120
3.80M
    if (p_angle <= 90)
1121
885k
      need_above = 1, need_left = 0, need_above_left = 1;
1122
2.92M
    else if (p_angle < 180)
1123
992k
      need_above = 1, need_left = 1, need_above_left = 1;
1124
1.93M
    else
1125
1.93M
      need_above = 0, need_left = 1, need_above_left = 1;
1126
3.80M
  }
1127
4.37M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1128
1129
4.37M
  assert(n_top_px >= 0);
1130
4.37M
  assert(n_topright_px >= -1);
1131
4.37M
  assert(n_left_px >= 0);
1132
4.37M
  assert(n_bottomleft_px >= -1);
1133
1134
4.37M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1135
66.5k
    int val;
1136
66.5k
    if (need_left) {
1137
38.9k
      val = (n_top_px > 0) ? above_ref[0] : 129;
1138
38.9k
    } else {
1139
27.5k
      val = (n_left_px > 0) ? left_ref[0] : 127;
1140
27.5k
    }
1141
2.11M
    for (i = 0; i < txhpx; ++i) {
1142
2.04M
      memset(dst, val, txwpx);
1143
2.04M
      dst += dst_stride;
1144
2.04M
    }
1145
66.5k
    return;
1146
66.5k
  }
1147
1148
  // NEED_LEFT
1149
4.31M
  if (need_left) {
1150
3.45M
    const int num_left_pixels_needed =
1151
3.45M
        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1152
3.45M
    i = 0;
1153
3.45M
    if (n_left_px > 0) {
1154
47.0M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1155
3.41M
      if (n_bottomleft_px > 0) {
1156
255k
        assert(i == txhpx);
1157
3.22M
        for (; i < txhpx + n_bottomleft_px; i++)
1158
2.96M
          left_col[i] = left_ref[i * ref_stride];
1159
255k
      }
1160
3.41M
      if (i < num_left_pixels_needed)
1161
528k
        memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1162
3.41M
    } else if (n_top_px > 0) {
1163
26.9k
      memset(left_col, above_ref[0], num_left_pixels_needed);
1164
26.9k
    }
1165
3.45M
  }
1166
1167
  // NEED_ABOVE
1168
4.31M
  if (need_above) {
1169
2.41M
    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1170
2.41M
    if (n_top_px > 0) {
1171
2.36M
      memcpy(above_row, above_ref, n_top_px);
1172
2.36M
      i = n_top_px;
1173
2.36M
      if (n_topright_px > 0) {
1174
252k
        assert(n_top_px == txwpx);
1175
252k
        memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1176
252k
        i += n_topright_px;
1177
252k
      }
1178
2.36M
      if (i < num_top_pixels_needed)
1179
226k
        memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1180
2.36M
    } else if (n_left_px > 0) {
1181
37.5k
      memset(above_row, left_ref[0], num_top_pixels_needed);
1182
37.5k
    }
1183
2.41M
  }
1184
1185
4.31M
  if (need_above_left) {
1186
4.31M
    if (n_top_px > 0 && n_left_px > 0) {
1187
4.15M
      above_row[-1] = above_ref[-1];
1188
4.15M
    } else if (n_top_px > 0) {
1189
53.4k
      above_row[-1] = above_ref[0];
1190
100k
    } else if (n_left_px > 0) {
1191
86.9k
      above_row[-1] = left_ref[0];
1192
86.9k
    } else {
1193
13.3k
      above_row[-1] = 128;
1194
13.3k
    }
1195
4.31M
    left_col[-1] = above_row[-1];
1196
4.31M
  }
1197
1198
4.31M
  if (use_filter_intra) {
1199
569k
    av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1200
569k
                               filter_intra_mode);
1201
569k
    return;
1202
569k
  }
1203
1204
3.74M
  assert(is_dr_mode);
1205
3.74M
  int upsample_above = 0;
1206
3.74M
  int upsample_left = 0;
1207
3.74M
  if (!disable_edge_filter) {
1208
3.56M
    const int need_right = p_angle < 90;
1209
3.56M
    const int need_bottom = p_angle > 180;
1210
3.56M
    if (p_angle != 90 && p_angle != 180) {
1211
1.93M
      assert(need_above_left);
1212
1.93M
      const int ab_le = 1;
1213
1.93M
      if (need_above && need_left && (txwpx + txhpx >= 24)) {
1214
371k
        filter_intra_edge_corner(above_row, left_col);
1215
371k
      }
1216
1.93M
      if (need_above && n_top_px > 0) {
1217
1.31M
        const int strength = intra_edge_filter_strength(
1218
1.31M
            txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1219
1.31M
        const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1220
1.31M
        av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1221
1.31M
      }
1222
1.93M
      if (need_left && n_left_px > 0) {
1223
1.53M
        const int strength = intra_edge_filter_strength(
1224
1.53M
            txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1225
1.53M
        const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1226
1.53M
        av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1227
1.53M
      }
1228
1.93M
    }
1229
3.56M
    upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1230
3.56M
                                                 intra_edge_filter_type);
1231
3.56M
    if (need_above && upsample_above) {
1232
223k
      const int n_px = txwpx + (need_right ? txhpx : 0);
1233
223k
      av1_upsample_intra_edge(above_row, n_px);
1234
223k
    }
1235
3.56M
    upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1236
3.56M
                                                intra_edge_filter_type);
1237
3.56M
    if (need_left && upsample_left) {
1238
481k
      const int n_px = txhpx + (need_bottom ? txwpx : 0);
1239
481k
      av1_upsample_intra_edge(left_col, n_px);
1240
481k
    }
1241
3.56M
  }
1242
3.74M
  dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1243
3.74M
               upsample_left, p_angle);
1244
3.74M
}
1245
1246
// This function generates the pred data of a given block for non-directional
1247
// intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H, SMOOTH_V and PAETH).
1248
static void build_non_directional_intra_predictors(
1249
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1250
10.7M
    PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px) {
1251
10.7M
  const uint8_t *above_ref = ref - ref_stride;
1252
10.7M
  const uint8_t *left_ref = ref - 1;
1253
10.7M
  const int txwpx = tx_size_wide[tx_size];
1254
10.7M
  const int txhpx = tx_size_high[tx_size];
1255
10.7M
  const int need_left = extend_modes[mode] & NEED_LEFT;
1256
10.7M
  const int need_above = extend_modes[mode] & NEED_ABOVE;
1257
10.7M
  const int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1258
10.7M
  int i = 0;
1259
10.7M
  assert(n_top_px >= 0);
1260
10.7M
  assert(n_left_px >= 0);
1261
10.7M
  assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1262
10.7M
         mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1263
1264
10.7M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1265
0
    int val = 0;
1266
0
    if (need_left) {
1267
0
      val = (n_top_px > 0) ? above_ref[0] : 129;
1268
0
    } else {
1269
0
      val = (n_left_px > 0) ? left_ref[0] : 127;
1270
0
    }
1271
0
    for (i = 0; i < txhpx; ++i) {
1272
0
      memset(dst, val, txwpx);
1273
0
      dst += dst_stride;
1274
0
    }
1275
0
    return;
1276
0
  }
1277
1278
10.7M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1279
10.7M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1280
10.7M
  uint8_t *const above_row = above_data + 16;
1281
10.7M
  uint8_t *const left_col = left_data + 16;
1282
1283
10.7M
  if (need_left) {
1284
10.7M
    memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1285
10.7M
    if (n_left_px > 0) {
1286
188M
      for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1287
10.4M
      if (i < txhpx) memset(&left_col[i], left_col[i - 1], txhpx - i);
1288
10.4M
    } else if (n_top_px > 0) {
1289
247k
      memset(left_col, above_ref[0], txhpx);
1290
247k
    }
1291
10.7M
  }
1292
1293
10.7M
  if (need_above) {
1294
10.7M
    memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1295
10.7M
    if (n_top_px > 0) {
1296
10.3M
      memcpy(above_row, above_ref, n_top_px);
1297
10.3M
      i = n_top_px;
1298
10.3M
      if (i < txwpx) memset(&above_row[i], above_row[i - 1], txwpx - i);
1299
10.3M
    } else if (n_left_px > 0) {
1300
323k
      memset(above_row, left_ref[0], txwpx);
1301
323k
    }
1302
10.7M
  }
1303
1304
10.7M
  if (need_above_left) {
1305
2.43M
    if (n_top_px > 0 && n_left_px > 0) {
1306
2.31M
      above_row[-1] = above_ref[-1];
1307
2.31M
    } else if (n_top_px > 0) {
1308
59.0k
      above_row[-1] = above_ref[0];
1309
59.0k
    } else if (n_left_px > 0) {
1310
52.7k
      above_row[-1] = left_ref[0];
1311
52.7k
    } else {
1312
2.22k
      above_row[-1] = 128;
1313
2.22k
    }
1314
2.43M
    left_col[-1] = above_row[-1];
1315
2.43M
  }
1316
1317
10.7M
  if (mode == DC_PRED) {
1318
6.73M
    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1319
6.73M
                                                  left_col);
1320
6.73M
  } else {
1321
4.01M
    pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1322
4.01M
  }
1323
10.7M
}
1324
1325
#if CONFIG_AV1_HIGHBITDEPTH
1326
0
void av1_highbd_filter_intra_edge_c(uint16_t *p, int sz, int strength) {
1327
0
  if (!strength) return;
1328
1329
0
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1330
0
                                                         { 0, 5, 6, 5, 0 },
1331
0
                                                         { 2, 4, 4, 4, 2 } };
1332
0
  const int filt = strength - 1;
1333
0
  uint16_t edge[129];
1334
1335
0
  memcpy(edge, p, sz * sizeof(*p));
1336
0
  for (int i = 1; i < sz; i++) {
1337
0
    int s = 0;
1338
0
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1339
0
      int k = i - 2 + j;
1340
0
      k = (k < 0) ? 0 : k;
1341
0
      k = (k > sz - 1) ? sz - 1 : k;
1342
0
      s += edge[k] * kernel[filt][j];
1343
0
    }
1344
0
    s = (s + 8) >> 4;
1345
0
    p[i] = s;
1346
0
  }
1347
0
}
1348
1349
static void highbd_filter_intra_edge_corner(uint16_t *p_above,
1350
399k
                                            uint16_t *p_left) {
1351
399k
  const int kernel[3] = { 5, 6, 5 };
1352
1353
399k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1354
399k
          (p_above[0] * kernel[2]);
1355
399k
  s = (s + 8) >> 4;
1356
399k
  p_above[-1] = s;
1357
399k
  p_left[-1] = s;
1358
399k
}
1359
1360
0
void av1_highbd_upsample_intra_edge_c(uint16_t *p, int sz, int bd) {
1361
  // interpolate half-sample positions
1362
0
  assert(sz <= MAX_UPSAMPLE_SZ);
1363
1364
0
  uint16_t in[MAX_UPSAMPLE_SZ + 3];
1365
  // copy p[-1..(sz-1)] and extend first and last samples
1366
0
  in[0] = p[-1];
1367
0
  in[1] = p[-1];
1368
0
  for (int i = 0; i < sz; i++) {
1369
0
    in[i + 2] = p[i];
1370
0
  }
1371
0
  in[sz + 2] = p[sz - 1];
1372
1373
  // interpolate half-sample edge positions
1374
0
  p[-2] = in[0];
1375
0
  for (int i = 0; i < sz; i++) {
1376
0
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1377
0
    s = (s + 8) >> 4;
1378
0
    s = clip_pixel_highbd(s, bd);
1379
0
    p[2 * i - 1] = s;
1380
0
    p[2 * i] = in[i + 2];
1381
0
  }
1382
0
}
1383
1384
static void highbd_build_directional_and_filter_intra_predictors(
1385
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1386
    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1387
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1388
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1389
6.32M
    int bit_depth) {
1390
6.32M
  int i;
1391
6.32M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1392
6.32M
  const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1393
6.32M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1394
6.32M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1395
6.32M
  uint16_t *const above_row = above_data + 16;
1396
6.32M
  uint16_t *const left_col = left_data + 16;
1397
6.32M
  const int txwpx = tx_size_wide[tx_size];
1398
6.32M
  const int txhpx = tx_size_high[tx_size];
1399
6.32M
  int need_left = extend_modes[mode] & NEED_LEFT;
1400
6.32M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1401
6.32M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1402
6.32M
  const uint16_t *above_ref = ref - ref_stride;
1403
6.32M
  const uint16_t *left_ref = ref - 1;
1404
6.32M
  const int is_dr_mode = av1_is_directional_mode(mode);
1405
6.32M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1406
6.32M
  assert(use_filter_intra || is_dr_mode);
1407
6.32M
  const int base = 128 << (bit_depth - 8);
1408
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1409
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1410
  // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1411
  // seen to be the potential reason for this issue.
1412
6.32M
  aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1413
6.32M
  aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1414
1415
  // The default values if ref pixels are not available:
1416
  // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1417
  // base+1   A      B  ..     Y      Z
1418
  // base+1   C      D  ..     W      X
1419
  // base+1   E      F  ..     U      V
1420
  // base+1   G      H  ..     S      T      T      T      T      T
1421
1422
6.32M
  if (is_dr_mode) {
1423
5.71M
    if (p_angle <= 90)
1424
1.40M
      need_above = 1, need_left = 0, need_above_left = 1;
1425
4.31M
    else if (p_angle < 180)
1426
1.49M
      need_above = 1, need_left = 1, need_above_left = 1;
1427
2.82M
    else
1428
2.82M
      need_above = 0, need_left = 1, need_above_left = 1;
1429
5.71M
  }
1430
6.32M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1431
1432
6.32M
  assert(n_top_px >= 0);
1433
6.32M
  assert(n_topright_px >= -1);
1434
6.32M
  assert(n_left_px >= 0);
1435
6.32M
  assert(n_bottomleft_px >= -1);
1436
1437
6.32M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1438
286k
    int val;
1439
286k
    if (need_left) {
1440
230k
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1441
230k
    } else {
1442
55.3k
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1443
55.3k
    }
1444
5.10M
    for (i = 0; i < txhpx; ++i) {
1445
4.81M
      aom_memset16(dst, val, txwpx);
1446
4.81M
      dst += dst_stride;
1447
4.81M
    }
1448
286k
    return;
1449
286k
  }
1450
1451
  // NEED_LEFT
1452
6.03M
  if (need_left) {
1453
4.69M
    const int num_left_pixels_needed =
1454
4.69M
        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1455
4.69M
    i = 0;
1456
4.69M
    if (n_left_px > 0) {
1457
59.2M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1458
4.62M
      if (n_bottomleft_px > 0) {
1459
356k
        assert(i == txhpx);
1460
4.23M
        for (; i < txhpx + n_bottomleft_px; i++)
1461
3.87M
          left_col[i] = left_ref[i * ref_stride];
1462
356k
      }
1463
4.62M
      if (i < num_left_pixels_needed)
1464
913k
        aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1465
4.62M
    } else if (n_top_px > 0) {
1466
60.9k
      aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1467
60.9k
    }
1468
4.69M
  }
1469
1470
  // NEED_ABOVE
1471
6.03M
  if (need_above) {
1472
3.43M
    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1473
3.43M
    if (n_top_px > 0) {
1474
3.37M
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1475
3.37M
      i = n_top_px;
1476
3.37M
      if (n_topright_px > 0) {
1477
397k
        assert(n_top_px == txwpx);
1478
397k
        memcpy(above_row + txwpx, above_ref + txwpx,
1479
397k
               n_topright_px * sizeof(above_ref[0]));
1480
397k
        i += n_topright_px;
1481
397k
      }
1482
3.37M
      if (i < num_top_pixels_needed)
1483
450k
        aom_memset16(&above_row[i], above_row[i - 1],
1484
450k
                     num_top_pixels_needed - i);
1485
3.37M
    } else if (n_left_px > 0) {
1486
55.5k
      aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1487
55.5k
    }
1488
3.43M
  }
1489
1490
6.03M
  if (need_above_left) {
1491
6.03M
    if (n_top_px > 0 && n_left_px > 0) {
1492
5.77M
      above_row[-1] = above_ref[-1];
1493
5.77M
    } else if (n_top_px > 0) {
1494
101k
      above_row[-1] = above_ref[0];
1495
154k
    } else if (n_left_px > 0) {
1496
149k
      above_row[-1] = left_ref[0];
1497
149k
    } else {
1498
5.10k
      above_row[-1] = base;
1499
5.10k
    }
1500
6.03M
    left_col[-1] = above_row[-1];
1501
6.03M
  }
1502
1503
6.03M
  if (use_filter_intra) {
1504
603k
    highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1505
603k
                                  filter_intra_mode, bit_depth);
1506
603k
    return;
1507
603k
  }
1508
1509
5.43M
  assert(is_dr_mode);
1510
5.43M
  int upsample_above = 0;
1511
5.43M
  int upsample_left = 0;
1512
5.43M
  if (!disable_edge_filter) {
1513
4.00M
    const int need_right = p_angle < 90;
1514
4.00M
    const int need_bottom = p_angle > 180;
1515
4.00M
    if (p_angle != 90 && p_angle != 180) {
1516
2.19M
      assert(need_above_left);
1517
2.19M
      const int ab_le = 1;
1518
2.19M
      if (need_above && need_left && (txwpx + txhpx >= 24)) {
1519
399k
        highbd_filter_intra_edge_corner(above_row, left_col);
1520
399k
      }
1521
2.19M
      if (need_above && n_top_px > 0) {
1522
1.38M
        const int strength = intra_edge_filter_strength(
1523
1.38M
            txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1524
1.38M
        const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1525
1.38M
        av1_highbd_filter_intra_edge(above_row - ab_le, n_px, strength);
1526
1.38M
      }
1527
2.19M
      if (need_left && n_left_px > 0) {
1528
1.66M
        const int strength = intra_edge_filter_strength(
1529
1.66M
            txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1530
1.66M
        const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1531
1.66M
        av1_highbd_filter_intra_edge(left_col - ab_le, n_px, strength);
1532
1.66M
      }
1533
2.19M
    }
1534
4.00M
    upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1535
4.00M
                                                 intra_edge_filter_type);
1536
4.00M
    if (need_above && upsample_above) {
1537
257k
      const int n_px = txwpx + (need_right ? txhpx : 0);
1538
257k
      av1_highbd_upsample_intra_edge(above_row, n_px, bit_depth);
1539
257k
    }
1540
4.00M
    upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1541
4.00M
                                                intra_edge_filter_type);
1542
4.00M
    if (need_left && upsample_left) {
1543
495k
      const int n_px = txhpx + (need_bottom ? txwpx : 0);
1544
495k
      av1_highbd_upsample_intra_edge(left_col, n_px, bit_depth);
1545
495k
    }
1546
4.00M
  }
1547
5.43M
  highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1548
5.43M
                      upsample_above, upsample_left, p_angle, bit_depth);
1549
5.43M
}
1550
1551
// For HBD encode/decode, this function generates the pred data of a given
1552
// block for non-directional intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H,
1553
// SMOOTH_V and PAETH).
1554
static void highbd_build_non_directional_intra_predictors(
1555
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1556
    PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px,
1557
74.9M
    int bit_depth) {
1558
74.9M
  int i = 0;
1559
74.9M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1560
74.9M
  const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1561
74.9M
  const int txwpx = tx_size_wide[tx_size];
1562
74.9M
  const int txhpx = tx_size_high[tx_size];
1563
74.9M
  int need_left = extend_modes[mode] & NEED_LEFT;
1564
74.9M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1565
74.9M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1566
74.9M
  const uint16_t *above_ref = ref - ref_stride;
1567
74.9M
  const uint16_t *left_ref = ref - 1;
1568
74.9M
  const int base = 128 << (bit_depth - 8);
1569
1570
74.9M
  assert(n_top_px >= 0);
1571
74.9M
  assert(n_left_px >= 0);
1572
74.9M
  assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1573
74.9M
         mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1574
1575
74.9M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1576
0
    int val = 0;
1577
0
    if (need_left) {
1578
0
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1579
0
    } else {
1580
0
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1581
0
    }
1582
0
    for (i = 0; i < txhpx; ++i) {
1583
0
      aom_memset16(dst, val, txwpx);
1584
0
      dst += dst_stride;
1585
0
    }
1586
0
    return;
1587
0
  }
1588
1589
74.9M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1590
74.9M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1591
74.9M
  uint16_t *const above_row = above_data + 16;
1592
74.9M
  uint16_t *const left_col = left_data + 16;
1593
1594
74.9M
  if (need_left) {
1595
74.9M
    aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1596
74.9M
    if (n_left_px > 0) {
1597
544M
      for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1598
73.3M
      if (i < txhpx) aom_memset16(&left_col[i], left_col[i - 1], txhpx - i);
1599
73.3M
    } else if (n_top_px > 0) {
1600
1.52M
      aom_memset16(left_col, above_ref[0], txhpx);
1601
1.52M
    }
1602
74.9M
  }
1603
1604
74.9M
  if (need_above) {
1605
74.9M
    aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1606
74.9M
    if (n_top_px > 0) {
1607
74.3M
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1608
74.3M
      i = n_top_px;
1609
74.3M
      if (i < txwpx) aom_memset16(&above_row[i], above_row[i - 1], (txwpx - i));
1610
74.3M
    } else if (n_left_px > 0) {
1611
561k
      aom_memset16(above_row, left_ref[0], txwpx);
1612
561k
    }
1613
74.9M
  }
1614
1615
74.9M
  if (need_above_left) {
1616
3.43M
    if (n_top_px > 0 && n_left_px > 0) {
1617
3.25M
      above_row[-1] = above_ref[-1];
1618
3.25M
    } else if (n_top_px > 0) {
1619
86.0k
      above_row[-1] = above_ref[0];
1620
92.4k
    } else if (n_left_px > 0) {
1621
90.2k
      above_row[-1] = left_ref[0];
1622
90.2k
    } else {
1623
2.19k
      above_row[-1] = base;
1624
2.19k
    }
1625
3.43M
    left_col[-1] = above_row[-1];
1626
3.43M
  }
1627
1628
74.9M
  if (mode == DC_PRED) {
1629
68.8M
    dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1630
68.8M
        dst, dst_stride, above_row, left_col, bit_depth);
1631
68.8M
  } else {
1632
6.07M
    pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1633
6.07M
  }
1634
74.9M
}
1635
#endif  // CONFIG_AV1_HIGHBITDEPTH
1636
1637
static inline BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1638
2.71M
                                            int subsampling_y) {
1639
2.71M
  assert(subsampling_x >= 0 && subsampling_x < 2);
1640
2.71M
  assert(subsampling_y >= 0 && subsampling_y < 2);
1641
2.71M
  BLOCK_SIZE bs = bsize;
1642
2.71M
  switch (bsize) {
1643
12.1k
    case BLOCK_4X4:
1644
12.1k
      if (subsampling_x == 1 && subsampling_y == 1)
1645
12.0k
        bs = BLOCK_8X8;
1646
70
      else if (subsampling_x == 1)
1647
70
        bs = BLOCK_8X4;
1648
0
      else if (subsampling_y == 1)
1649
0
        bs = BLOCK_4X8;
1650
12.1k
      break;
1651
25.8k
    case BLOCK_4X8:
1652
25.8k
      if (subsampling_x == 1 && subsampling_y == 1)
1653
25.8k
        bs = BLOCK_8X8;
1654
0
      else if (subsampling_x == 1)
1655
0
        bs = BLOCK_8X8;
1656
0
      else if (subsampling_y == 1)
1657
0
        bs = BLOCK_4X8;
1658
25.8k
      break;
1659
38.7k
    case BLOCK_8X4:
1660
38.7k
      if (subsampling_x == 1 && subsampling_y == 1)
1661
38.1k
        bs = BLOCK_8X8;
1662
602
      else if (subsampling_x == 1)
1663
602
        bs = BLOCK_8X4;
1664
0
      else if (subsampling_y == 1)
1665
0
        bs = BLOCK_8X8;
1666
38.7k
      break;
1667
31.1k
    case BLOCK_4X16:
1668
31.1k
      if (subsampling_x == 1 && subsampling_y == 1)
1669
31.1k
        bs = BLOCK_8X16;
1670
0
      else if (subsampling_x == 1)
1671
0
        bs = BLOCK_8X16;
1672
0
      else if (subsampling_y == 1)
1673
0
        bs = BLOCK_4X16;
1674
31.1k
      break;
1675
54.3k
    case BLOCK_16X4:
1676
54.3k
      if (subsampling_x == 1 && subsampling_y == 1)
1677
53.6k
        bs = BLOCK_16X8;
1678
722
      else if (subsampling_x == 1)
1679
722
        bs = BLOCK_16X4;
1680
0
      else if (subsampling_y == 1)
1681
0
        bs = BLOCK_16X8;
1682
54.3k
      break;
1683
2.55M
    default: break;
1684
2.71M
  }
1685
2.71M
  return bs;
1686
2.71M
}
1687
1688
void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1689
                             int enable_intra_edge_filter, int wpx, int hpx,
1690
                             TX_SIZE tx_size, PREDICTION_MODE mode,
1691
                             int angle_delta, int use_palette,
1692
                             FILTER_INTRA_MODE filter_intra_mode,
1693
                             const uint8_t *ref, int ref_stride, uint8_t *dst,
1694
                             int dst_stride, int col_off, int row_off,
1695
96.6M
                             int plane) {
1696
96.6M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1697
96.6M
  const int txwpx = tx_size_wide[tx_size];
1698
96.6M
  const int txhpx = tx_size_high[tx_size];
1699
96.6M
  const int x = col_off << MI_SIZE_LOG2;
1700
96.6M
  const int y = row_off << MI_SIZE_LOG2;
1701
96.6M
  const int is_hbd = is_cur_buf_hbd(xd);
1702
1703
96.6M
  assert(mode < INTRA_MODES);
1704
1705
96.6M
  if (use_palette) {
1706
264k
    int r, c;
1707
264k
    const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1708
264k
                               xd->color_index_map_offset[plane != 0];
1709
264k
    const uint16_t *const palette =
1710
264k
        mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1711
264k
    if (is_hbd) {
1712
139k
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1713
1.38M
      for (r = 0; r < txhpx; ++r) {
1714
16.0M
        for (c = 0; c < txwpx; ++c) {
1715
14.7M
          dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1716
14.7M
        }
1717
1.25M
      }
1718
139k
    } else {
1719
1.15M
      for (r = 0; r < txhpx; ++r) {
1720
11.8M
        for (c = 0; c < txwpx; ++c) {
1721
10.8M
          dst[r * dst_stride + c] =
1722
10.8M
              (uint8_t)palette[map[(r + y) * wpx + c + x]];
1723
10.8M
        }
1724
1.03M
      }
1725
124k
    }
1726
264k
    return;
1727
264k
  }
1728
1729
96.4M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
1730
96.4M
  const int ss_x = pd->subsampling_x;
1731
96.4M
  const int ss_y = pd->subsampling_y;
1732
96.4M
  const int have_top =
1733
96.4M
      row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1734
96.4M
  const int have_left =
1735
96.4M
      col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1736
1737
  // Distance between the right edge of this prediction block to
1738
  // the frame right edge
1739
96.4M
  const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1740
  // Distance between the bottom edge of this prediction block to
1741
  // the frame bottom edge
1742
96.4M
  const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1743
96.4M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1744
96.4M
  const int is_dr_mode = av1_is_directional_mode(mode);
1745
1746
  // The computations in this function, as well as in build_intra_predictors(),
1747
  // are generalized for all intra modes. Some of these operations are not
1748
  // required since non-directional intra modes (i.e., DC, SMOOTH, SMOOTH_H,
1749
  // SMOOTH_V, and PAETH) specifically require left and top neighbors. Hence, a
1750
  // separate function build_non_directional_intra_predictors() is introduced
1751
  // for these modes to avoid redundant computations while generating pred data.
1752
1753
96.4M
  const int n_top_px = have_top ? AOMMIN(txwpx, xr + txwpx) : 0;
1754
96.4M
  const int n_left_px = have_left ? AOMMIN(txhpx, yd + txhpx) : 0;
1755
96.4M
  if (!use_filter_intra && !is_dr_mode) {
1756
85.7M
#if CONFIG_AV1_HIGHBITDEPTH
1757
85.7M
    if (is_hbd) {
1758
74.9M
      highbd_build_non_directional_intra_predictors(
1759
74.9M
          ref, ref_stride, dst, dst_stride, mode, tx_size, n_top_px, n_left_px,
1760
74.9M
          xd->bd);
1761
74.9M
      return;
1762
74.9M
    }
1763
10.7M
#endif  // CONFIG_AV1_HIGHBITDEPTH
1764
10.7M
    build_non_directional_intra_predictors(ref, ref_stride, dst, dst_stride,
1765
10.7M
                                           mode, tx_size, n_top_px, n_left_px);
1766
10.7M
    return;
1767
85.7M
  }
1768
1769
10.6M
  const int txw = tx_size_wide_unit[tx_size];
1770
10.6M
  const int txh = tx_size_high_unit[tx_size];
1771
10.6M
  const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1772
10.6M
  const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1773
10.6M
  const int right_available =
1774
10.6M
      mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1775
10.6M
  const int bottom_available =
1776
10.6M
      (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1777
1778
10.6M
  const PARTITION_TYPE partition = mbmi->partition;
1779
1780
10.6M
  BLOCK_SIZE bsize = mbmi->bsize;
1781
  // force 4x4 chroma component block size.
1782
10.6M
  if (ss_x || ss_y) {
1783
2.71M
    bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1784
2.71M
  }
1785
1786
10.6M
  int p_angle = 0;
1787
10.6M
  int need_top_right = extend_modes[mode] & NEED_ABOVERIGHT;
1788
10.6M
  int need_bottom_left = extend_modes[mode] & NEED_BOTTOMLEFT;
1789
1790
10.6M
  if (use_filter_intra) {
1791
1.17M
    need_top_right = 0;
1792
1.17M
    need_bottom_left = 0;
1793
1.17M
  }
1794
10.6M
  if (is_dr_mode) {
1795
9.52M
    p_angle = mode_to_angle_map[mode] + angle_delta;
1796
9.52M
    need_top_right = p_angle < 90;
1797
9.52M
    need_bottom_left = p_angle > 180;
1798
9.52M
  }
1799
1800
  // Possible states for have_top_right(TR) and have_bottom_left(BL)
1801
  // -1 : TR and BL are not needed
1802
  //  0 : TR and BL are needed but not available
1803
  // > 0 : TR and BL are needed and pixels are available
1804
10.6M
  const int have_top_right =
1805
10.6M
      need_top_right ? has_top_right(sb_size, bsize, mi_row, mi_col, have_top,
1806
1.20M
                                     right_available, partition, tx_size,
1807
1.20M
                                     row_off, col_off, ss_x, ss_y)
1808
10.6M
                     : -1;
1809
10.6M
  const int have_bottom_left =
1810
10.6M
      need_bottom_left ? has_bottom_left(sb_size, bsize, mi_row, mi_col,
1811
1.93M
                                         bottom_available, have_left, partition,
1812
1.93M
                                         tx_size, row_off, col_off, ss_x, ss_y)
1813
10.6M
                       : -1;
1814
1815
10.6M
  const int disable_edge_filter = !enable_intra_edge_filter;
1816
10.6M
  const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1817
10.6M
  const int n_topright_px =
1818
10.6M
      have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right;
1819
10.6M
  const int n_bottomleft_px =
1820
10.6M
      have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left;
1821
10.6M
#if CONFIG_AV1_HIGHBITDEPTH
1822
10.6M
  if (is_hbd) {
1823
6.32M
    highbd_build_directional_and_filter_intra_predictors(
1824
6.32M
        ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1825
6.32M
        tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1826
6.32M
        n_bottomleft_px, intra_edge_filter_type, xd->bd);
1827
6.32M
    return;
1828
6.32M
  }
1829
4.36M
#endif
1830
4.36M
  build_directional_and_filter_intra_predictors(
1831
4.36M
      ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1832
4.36M
      tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1833
4.36M
      n_bottomleft_px, intra_edge_filter_type);
1834
4.36M
}
1835
1836
void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1837
                                    int plane, int blk_col, int blk_row,
1838
95.8M
                                    TX_SIZE tx_size) {
1839
95.8M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1840
95.8M
  struct macroblockd_plane *const pd = &xd->plane[plane];
1841
95.8M
  const int dst_stride = pd->dst.stride;
1842
95.8M
  uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1843
95.8M
  const PREDICTION_MODE mode =
1844
95.8M
      (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1845
95.8M
  const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1846
95.8M
  const FILTER_INTRA_MODE filter_intra_mode =
1847
95.8M
      (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1848
95.8M
          ? mbmi->filter_intra_mode_info.filter_intra_mode
1849
95.8M
          : FILTER_INTRA_MODES;
1850
95.8M
  const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1851
95.8M
  const SequenceHeader *seq_params = cm->seq_params;
1852
1853
95.8M
#if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
1854
95.8M
  if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1855
#if CONFIG_DEBUG
1856
    assert(is_cfl_allowed(xd));
1857
    const BLOCK_SIZE plane_bsize =
1858
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1859
    (void)plane_bsize;
1860
    assert(plane_bsize < BLOCK_SIZES_ALL);
1861
    if (!xd->lossless[mbmi->segment_id]) {
1862
      assert(blk_col == 0);
1863
      assert(blk_row == 0);
1864
      assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1865
      assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1866
    }
1867
#endif
1868
2.77M
    CFL_CTX *const cfl = &xd->cfl;
1869
2.77M
    CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1870
2.77M
    if (!cfl->dc_pred_is_cached[pred_plane]) {
1871
2.77M
      av1_predict_intra_block(xd, seq_params->sb_size,
1872
2.77M
                              seq_params->enable_intra_edge_filter, pd->width,
1873
2.77M
                              pd->height, tx_size, mode, angle_delta,
1874
2.77M
                              use_palette, filter_intra_mode, dst, dst_stride,
1875
2.77M
                              dst, dst_stride, blk_col, blk_row, plane);
1876
2.77M
      if (cfl->use_dc_pred_cache) {
1877
0
        cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1878
0
        cfl->dc_pred_is_cached[pred_plane] = true;
1879
0
      }
1880
2.77M
    } else {
1881
1
      cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1882
1
    }
1883
2.77M
    av1_cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1884
2.77M
    return;
1885
2.77M
  }
1886
93.1M
#endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
1887
93.1M
  av1_predict_intra_block(
1888
93.1M
      xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1889
93.1M
      pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1890
93.1M
      dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1891
93.1M
}
1892
1893
17.3k
void av1_init_intra_predictors(void) {
1894
17.3k
  aom_once(init_intra_predictors_internal);
1895
17.3k
}