Coverage Report

Created: 2025-12-31 06:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/common/reconintra.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <math.h>
14
15
#include "config/aom_config.h"
16
#include "config/aom_dsp_rtcd.h"
17
#include "config/av1_rtcd.h"
18
19
#include "aom_dsp/aom_dsp_common.h"
20
#include "aom_mem/aom_mem.h"
21
#include "aom_ports/aom_once.h"
22
#include "aom_ports/mem.h"
23
#include "av1/common/av1_common_int.h"
24
#include "av1/common/cfl.h"
25
#include "av1/common/reconintra.h"
26
27
enum {
28
  NEED_LEFT = 1 << 1,
29
  NEED_ABOVE = 1 << 2,
30
  NEED_ABOVERIGHT = 1 << 3,
31
  NEED_ABOVELEFT = 1 << 4,
32
  NEED_BOTTOMLEFT = 1 << 5,
33
};
34
35
#define INTRA_EDGE_FILT 3
36
0
#define INTRA_EDGE_TAPS 5
37
#define MAX_UPSAMPLE_SZ 16
38
127M
#define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
39
40
static const uint8_t extend_modes[INTRA_MODES] = {
41
  NEED_ABOVE | NEED_LEFT,                   // DC
42
  NEED_ABOVE,                               // V
43
  NEED_LEFT,                                // H
44
  NEED_ABOVE | NEED_ABOVERIGHT,             // D45
45
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
46
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
47
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
48
  NEED_LEFT | NEED_BOTTOMLEFT,              // D203
49
  NEED_ABOVE | NEED_ABOVERIGHT,             // D67
50
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH
51
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
52
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
53
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
54
};
55
56
// Tables to store if the top-right reference pixels are available. The flags
57
// are represented with bits, packed into 8-bit integers. E.g., for the 32x32
58
// blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
59
// order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
60
// i.e. (table[10 / 8] >> (10 % 8)) & 1.
61
//       . . . .
62
//       . . . .
63
//       . . o .
64
//       . . . .
65
static uint8_t has_tr_4x4[128] = {
66
  255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70
  255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
74
};
75
static uint8_t has_tr_4x8[64] = {
76
  255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
77
  119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
78
  127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
79
  119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
80
  119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
81
};
82
static uint8_t has_tr_8x4[64] = {
83
  255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85
  255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
87
};
88
static uint8_t has_tr_8x8[32] = {
89
  255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90
  255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
91
};
92
static uint8_t has_tr_8x16[16] = {
93
  255, 255, 119, 119, 127, 127, 119, 119,
94
  255, 127, 119, 119, 127, 127, 119, 119,
95
};
96
static uint8_t has_tr_16x8[16] = {
97
  255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
98
};
99
static uint8_t has_tr_16x16[8] = {
100
  255, 85, 119, 85, 127, 85, 119, 85,
101
};
102
static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
103
static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
104
static uint8_t has_tr_32x32[2] = { 95, 87 };
105
static uint8_t has_tr_32x64[1] = { 127 };
106
static uint8_t has_tr_64x32[1] = { 19 };
107
static uint8_t has_tr_64x64[1] = { 7 };
108
static uint8_t has_tr_64x128[1] = { 3 };
109
static uint8_t has_tr_128x64[1] = { 1 };
110
static uint8_t has_tr_128x128[1] = { 1 };
111
static uint8_t has_tr_4x16[32] = {
112
  255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
113
  127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
114
  127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
115
};
116
static uint8_t has_tr_16x4[32] = {
117
  255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118
  127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
119
};
120
static uint8_t has_tr_8x32[8] = {
121
  255, 255, 127, 127, 255, 127, 127, 127,
122
};
123
static uint8_t has_tr_32x8[8] = {
124
  15, 0, 5, 0, 7, 0, 5, 0,
125
};
126
static uint8_t has_tr_16x64[2] = { 255, 127 };
127
static uint8_t has_tr_64x16[2] = { 3, 1 };
128
129
static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
130
  // 4X4
131
  has_tr_4x4,
132
  // 4X8,       8X4,            8X8
133
  has_tr_4x8, has_tr_8x4, has_tr_8x8,
134
  // 8X16,      16X8,           16X16
135
  has_tr_8x16, has_tr_16x8, has_tr_16x16,
136
  // 16X32,     32X16,          32X32
137
  has_tr_16x32, has_tr_32x16, has_tr_32x32,
138
  // 32X64,     64X32,          64X64
139
  has_tr_32x64, has_tr_64x32, has_tr_64x64,
140
  // 64x128,    128x64,         128x128
141
  has_tr_64x128, has_tr_128x64, has_tr_128x128,
142
  // 4x16,      16x4,            8x32
143
  has_tr_4x16, has_tr_16x4, has_tr_8x32,
144
  // 32x8,      16x64,           64x16
145
  has_tr_32x8, has_tr_16x64, has_tr_64x16
146
};
147
148
static uint8_t has_tr_vert_8x8[32] = {
149
  255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150
  255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
151
};
152
static uint8_t has_tr_vert_16x16[8] = {
153
  255, 0, 119, 0, 127, 0, 119, 0,
154
};
155
static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
156
static uint8_t has_tr_vert_64x64[1] = { 3 };
157
158
// The _vert_* tables are like the ordinary tables above, but describe the
159
// order we visit square blocks when doing a PARTITION_VERT_A or
160
// PARTITION_VERT_B. This is the same order as normal except for on the last
161
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
162
// as a pair of squares, which means that these tables work correctly for both
163
// mixed vertical partition types.
164
//
165
// There are tables for each of the square sizes. Vertical rectangles (like
166
// BLOCK_16X32) use their respective "non-vert" table
167
static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
168
  // 4X4
169
  NULL,
170
  // 4X8,      8X4,         8X8
171
  has_tr_4x8, NULL, has_tr_vert_8x8,
172
  // 8X16,     16X8,        16X16
173
  has_tr_8x16, NULL, has_tr_vert_16x16,
174
  // 16X32,    32X16,       32X32
175
  has_tr_16x32, NULL, has_tr_vert_32x32,
176
  // 32X64,    64X32,       64X64
177
  has_tr_32x64, NULL, has_tr_vert_64x64,
178
  // 64x128,   128x64,      128x128
179
  has_tr_64x128, NULL, has_tr_128x128
180
};
181
182
static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
183
356k
                                       BLOCK_SIZE bsize) {
184
356k
  const uint8_t *ret = NULL;
185
  // If this is a mixed vertical partition, look up bsize in orders_vert.
186
356k
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
187
36.7k
    assert(bsize < BLOCK_SIZES);
188
36.7k
    ret = has_tr_vert_tables[bsize];
189
319k
  } else {
190
319k
    ret = has_tr_tables[bsize];
191
319k
  }
192
356k
  assert(ret);
193
356k
  return ret;
194
356k
}
195
196
static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
197
                         int mi_col, int top_available, int right_available,
198
                         PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
199
582k
                         int col_off, int ss_x, int ss_y) {
200
582k
  if (!top_available || !right_available) return 0;
201
202
553k
  const int bw_unit = mi_size_wide[bsize];
203
553k
  const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
204
553k
  const int top_right_count_unit = tx_size_wide_unit[txsz];
205
206
553k
  if (row_off > 0) {  // Just need to check if enough pixels on the right.
207
63.7k
    if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
208
      // Special case: For 128x128 blocks, the transform unit whose
209
      // top-right corner is at the center of the block does in fact have
210
      // pixels available at its top-right corner.
211
34.4k
      if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
212
15.0k
          col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
213
7.01k
        return 1;
214
7.01k
      }
215
27.4k
      const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
216
27.4k
      const int col_off_64 = col_off % plane_bw_unit_64;
217
27.4k
      return col_off_64 + top_right_count_unit < plane_bw_unit_64;
218
34.4k
    }
219
29.2k
    return col_off + top_right_count_unit < plane_bw_unit;
220
489k
  } else {
221
    // All top-right pixels are in the block above, which is already available.
222
489k
    if (col_off + top_right_count_unit < plane_bw_unit) return 1;
223
224
473k
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
225
473k
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
226
473k
    const int sb_mi_size = mi_size_high[sb_size];
227
473k
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
228
473k
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
229
230
    // Top row of superblock: so top-right pixels are in the top and/or
231
    // top-right superblocks, both of which are already available.
232
473k
    if (blk_row_in_sb == 0) return 1;
233
234
    // Rightmost column of superblock (and not the top row): so top-right pixels
235
    // fall in the right superblock, which is not available yet.
236
410k
    if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
237
53.7k
      return 0;
238
53.7k
    }
239
240
    // General case (neither top row nor rightmost column): check if the
241
    // top-right block is coded before the current block.
242
356k
    const int this_blk_index =
243
356k
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
244
356k
        blk_col_in_sb + 0;
245
356k
    const int idx1 = this_blk_index / 8;
246
356k
    const int idx2 = this_blk_index % 8;
247
356k
    const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
248
356k
    return (has_tr_table[idx1] >> idx2) & 1;
249
410k
  }
250
553k
}
251
252
// Similar to the has_tr_* tables, but store if the bottom-left reference
253
// pixels are available.
254
static uint8_t has_bl_4x4[128] = {
255
  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
256
  85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
257
  17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
258
  85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
259
  0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
260
  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
261
  85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
262
};
263
static uint8_t has_bl_4x8[64] = {
264
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
265
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
266
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
267
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
268
};
269
static uint8_t has_bl_8x4[64] = {
270
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
271
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
272
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
273
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
274
};
275
static uint8_t has_bl_8x8[32] = {
276
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
278
};
279
static uint8_t has_bl_8x16[16] = {
280
  16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
281
};
282
static uint8_t has_bl_16x8[16] = {
283
  254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
284
};
285
static uint8_t has_bl_16x16[8] = {
286
  84, 16, 84, 0, 84, 16, 84, 0,
287
};
288
static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
289
static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
290
static uint8_t has_bl_32x32[2] = { 4, 4 };
291
static uint8_t has_bl_32x64[1] = { 0 };
292
static uint8_t has_bl_64x32[1] = { 34 };
293
static uint8_t has_bl_64x64[1] = { 0 };
294
static uint8_t has_bl_64x128[1] = { 0 };
295
static uint8_t has_bl_128x64[1] = { 0 };
296
static uint8_t has_bl_128x128[1] = { 0 };
297
static uint8_t has_bl_4x16[32] = {
298
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
300
};
301
static uint8_t has_bl_16x4[32] = {
302
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
304
};
305
static uint8_t has_bl_8x32[8] = {
306
  0, 1, 0, 0, 0, 1, 0, 0,
307
};
308
static uint8_t has_bl_32x8[8] = {
309
  238, 78, 238, 14, 238, 78, 238, 14,
310
};
311
static uint8_t has_bl_16x64[2] = { 0, 0 };
312
static uint8_t has_bl_64x16[2] = { 42, 42 };
313
314
static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
315
  // 4X4
316
  has_bl_4x4,
317
  // 4X8,         8X4,         8X8
318
  has_bl_4x8, has_bl_8x4, has_bl_8x8,
319
  // 8X16,        16X8,        16X16
320
  has_bl_8x16, has_bl_16x8, has_bl_16x16,
321
  // 16X32,       32X16,       32X32
322
  has_bl_16x32, has_bl_32x16, has_bl_32x32,
323
  // 32X64,       64X32,       64X64
324
  has_bl_32x64, has_bl_64x32, has_bl_64x64,
325
  // 64x128,      128x64,      128x128
326
  has_bl_64x128, has_bl_128x64, has_bl_128x128,
327
  // 4x16,        16x4,        8x32
328
  has_bl_4x16, has_bl_16x4, has_bl_8x32,
329
  // 32x8,        16x64,       64x16
330
  has_bl_32x8, has_bl_16x64, has_bl_64x16
331
};
332
333
static uint8_t has_bl_vert_8x8[32] = {
334
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
336
};
337
static uint8_t has_bl_vert_16x16[8] = {
338
  254, 16, 254, 0, 254, 16, 254, 0,
339
};
340
static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
341
static uint8_t has_bl_vert_64x64[1] = { 2 };
342
343
// The _vert_* tables are like the ordinary tables above, but describe the
344
// order we visit square blocks when doing a PARTITION_VERT_A or
345
// PARTITION_VERT_B. This is the same order as normal except for on the last
346
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
347
// as a pair of squares, which means that these tables work correctly for both
348
// mixed vertical partition types.
349
//
350
// There are tables for each of the square sizes. Vertical rectangles (like
351
// BLOCK_16X32) use their respective "non-vert" table
352
static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
353
  // 4X4
354
  NULL,
355
  // 4X8,     8X4,         8X8
356
  has_bl_4x8, NULL, has_bl_vert_8x8,
357
  // 8X16,    16X8,        16X16
358
  has_bl_8x16, NULL, has_bl_vert_16x16,
359
  // 16X32,   32X16,       32X32
360
  has_bl_16x32, NULL, has_bl_vert_32x32,
361
  // 32X64,   64X32,       64X64
362
  has_bl_32x64, NULL, has_bl_vert_64x64,
363
  // 64x128,  128x64,      128x128
364
  has_bl_64x128, NULL, has_bl_128x128
365
};
366
367
static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
368
519k
                                       BLOCK_SIZE bsize) {
369
519k
  const uint8_t *ret = NULL;
370
  // If this is a mixed vertical partition, look up bsize in orders_vert.
371
519k
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
372
43.9k
    assert(bsize < BLOCK_SIZES);
373
43.9k
    ret = has_bl_vert_tables[bsize];
374
475k
  } else {
375
475k
    ret = has_bl_tables[bsize];
376
475k
  }
377
519k
  assert(ret);
378
519k
  return ret;
379
519k
}
380
381
static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
382
                           int mi_col, int bottom_available, int left_available,
383
                           PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
384
952k
                           int col_off, int ss_x, int ss_y) {
385
952k
  if (!bottom_available || !left_available) return 0;
386
387
  // Special case for 128x* blocks, when col_off is half the block width.
388
  // This is needed because 128x* superblocks are divided into 64x* blocks in
389
  // raster order
390
870k
  if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
391
40.5k
    const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
392
40.5k
    const int col_off_64 = col_off % plane_bw_unit_64;
393
40.5k
    if (col_off_64 == 0) {
394
      // We are at the left edge of top-right or bottom-right 64x* block.
395
26.4k
      const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
396
26.4k
      const int row_off_64 = row_off % plane_bh_unit_64;
397
26.4k
      const int plane_bh_unit =
398
26.4k
          AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
399
      // Check if all bottom-left pixels are in the left 64x* block (which is
400
      // already coded).
401
26.4k
      return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
402
26.4k
    }
403
40.5k
  }
404
405
843k
  if (col_off > 0) {
406
    // Bottom-left pixels are in the bottom-left block, which is not available.
407
89.9k
    return 0;
408
753k
  } else {
409
753k
    const int bh_unit = mi_size_high[bsize];
410
753k
    const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
411
753k
    const int bottom_left_count_unit = tx_size_high_unit[txsz];
412
413
    // All bottom-left pixels are in the left block, which is already available.
414
753k
    if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
415
416
726k
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
417
726k
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
418
726k
    const int sb_mi_size = mi_size_high[sb_size];
419
726k
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
420
726k
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
421
422
    // Leftmost column of superblock: so bottom-left pixels maybe in the left
423
    // and/or bottom-left superblocks. But only the left superblock is
424
    // available, so check if all required pixels fall in that superblock.
425
726k
    if (blk_col_in_sb == 0) {
426
124k
      const int blk_start_row_off =
427
124k
          blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
428
124k
          ss_y;
429
124k
      const int row_off_in_sb = blk_start_row_off + row_off;
430
124k
      const int sb_height_unit = sb_mi_size >> ss_y;
431
124k
      return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
432
124k
    }
433
434
    // Bottom row of superblock (and not the leftmost column): so bottom-left
435
    // pixels fall in the bottom superblock, which is not available yet.
436
601k
    if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
437
438
    // General case (neither leftmost column nor bottom row): check if the
439
    // bottom-left block is coded before the current block.
440
519k
    const int this_blk_index =
441
519k
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
442
519k
        blk_col_in_sb + 0;
443
519k
    const int idx1 = this_blk_index / 8;
444
519k
    const int idx2 = this_blk_index % 8;
445
519k
    const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
446
519k
    return (has_bl_table[idx1] >> idx2) & 1;
447
601k
  }
448
843k
}
449
450
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
451
                              const uint8_t *above, const uint8_t *left);
452
453
static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
454
static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
455
456
#if CONFIG_AV1_HIGHBITDEPTH
457
typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
458
                                   const uint16_t *above, const uint16_t *left,
459
                                   int bd);
460
static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
461
static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
462
#endif
463
464
1
static void init_intra_predictors_internal(void) {
465
1
  assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
466
467
#if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
468
#define INIT_RECTANGULAR(p, type)             \
469
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
470
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
471
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
472
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
473
  p[TX_16X32] = aom_##type##_predictor_16x32; \
474
  p[TX_32X16] = aom_##type##_predictor_32x16; \
475
  p[TX_32X64] = aom_##type##_predictor_32x64; \
476
  p[TX_64X32] = aom_##type##_predictor_64x32;
477
#else
478
1
#define INIT_RECTANGULAR(p, type)             \
479
20
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
480
20
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
481
20
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
482
20
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
483
20
  p[TX_16X32] = aom_##type##_predictor_16x32; \
484
20
  p[TX_32X16] = aom_##type##_predictor_32x16; \
485
20
  p[TX_32X64] = aom_##type##_predictor_32x64; \
486
20
  p[TX_64X32] = aom_##type##_predictor_64x32; \
487
20
  p[TX_4X16] = aom_##type##_predictor_4x16;   \
488
20
  p[TX_16X4] = aom_##type##_predictor_16x4;   \
489
20
  p[TX_8X32] = aom_##type##_predictor_8x32;   \
490
20
  p[TX_32X8] = aom_##type##_predictor_32x8;   \
491
20
  p[TX_16X64] = aom_##type##_predictor_16x64; \
492
20
  p[TX_64X16] = aom_##type##_predictor_64x16;
493
1
#endif  // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
494
495
1
#define INIT_NO_4X4(p, type)                  \
496
20
  p[TX_8X8] = aom_##type##_predictor_8x8;     \
497
20
  p[TX_16X16] = aom_##type##_predictor_16x16; \
498
20
  p[TX_32X32] = aom_##type##_predictor_32x32; \
499
20
  p[TX_64X64] = aom_##type##_predictor_64x64; \
500
20
  INIT_RECTANGULAR(p, type)
501
502
1
#define INIT_ALL_SIZES(p, type)           \
503
20
  p[TX_4X4] = aom_##type##_predictor_4x4; \
504
20
  INIT_NO_4X4(p, type)
505
506
1
  INIT_ALL_SIZES(pred[V_PRED], v)
507
1
  INIT_ALL_SIZES(pred[H_PRED], h)
508
1
  INIT_ALL_SIZES(pred[PAETH_PRED], paeth)
509
1
  INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth)
510
1
  INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v)
511
1
  INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h)
512
1
  INIT_ALL_SIZES(dc_pred[0][0], dc_128)
513
1
  INIT_ALL_SIZES(dc_pred[0][1], dc_top)
514
1
  INIT_ALL_SIZES(dc_pred[1][0], dc_left)
515
1
  INIT_ALL_SIZES(dc_pred[1][1], dc)
516
1
#if CONFIG_AV1_HIGHBITDEPTH
517
1
  INIT_ALL_SIZES(pred_high[V_PRED], highbd_v)
518
1
  INIT_ALL_SIZES(pred_high[H_PRED], highbd_h)
519
1
  INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth)
520
1
  INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth)
521
1
  INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v)
522
1
  INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h)
523
1
  INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128)
524
1
  INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top)
525
1
  INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left)
526
1
  INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc)
527
1
#endif
528
1
#undef intra_pred_allsizes
529
1
}
530
531
// Directional prediction, zone 1: 0 < angle < 90
532
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
533
                            const uint8_t *above, const uint8_t *left,
534
0
                            int upsample_above, int dx, int dy) {
535
0
  int r, c, x, base, shift, val;
536
537
0
  (void)left;
538
0
  (void)dy;
539
0
  assert(dy == 1);
540
0
  assert(dx > 0);
541
542
0
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
543
0
  const int frac_bits = 6 - upsample_above;
544
0
  const int base_inc = 1 << upsample_above;
545
0
  x = dx;
546
0
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
547
0
    base = x >> frac_bits;
548
0
    shift = ((x << upsample_above) & 0x3F) >> 1;
549
550
0
    if (base >= max_base_x) {
551
0
      for (int i = r; i < bh; ++i) {
552
0
        memset(dst, above[max_base_x], bw * sizeof(dst[0]));
553
0
        dst += stride;
554
0
      }
555
0
      return;
556
0
    }
557
558
0
    for (c = 0; c < bw; ++c, base += base_inc) {
559
0
      if (base < max_base_x) {
560
0
        val = above[base] * (32 - shift) + above[base + 1] * shift;
561
0
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
562
0
      } else {
563
0
        dst[c] = above[max_base_x];
564
0
      }
565
0
    }
566
0
  }
567
0
}
568
569
// Directional prediction, zone 2: 90 < angle < 180
570
void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
571
                            const uint8_t *above, const uint8_t *left,
572
                            int upsample_above, int upsample_left, int dx,
573
0
                            int dy) {
574
0
  assert(dx > 0);
575
0
  assert(dy > 0);
576
577
0
  const int min_base_x = -(1 << upsample_above);
578
0
  const int min_base_y = -(1 << upsample_left);
579
0
  (void)min_base_y;
580
0
  const int frac_bits_x = 6 - upsample_above;
581
0
  const int frac_bits_y = 6 - upsample_left;
582
583
0
  for (int r = 0; r < bh; ++r) {
584
0
    for (int c = 0; c < bw; ++c) {
585
0
      int val;
586
0
      int y = r + 1;
587
0
      int x = (c << 6) - y * dx;
588
0
      const int base_x = x >> frac_bits_x;
589
0
      if (base_x >= min_base_x) {
590
0
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
591
0
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
592
0
        val = ROUND_POWER_OF_TWO(val, 5);
593
0
      } else {
594
0
        x = c + 1;
595
0
        y = (r << 6) - x * dy;
596
0
        const int base_y = y >> frac_bits_y;
597
0
        assert(base_y >= min_base_y);
598
0
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
599
0
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
600
0
        val = ROUND_POWER_OF_TWO(val, 5);
601
0
      }
602
0
      dst[c] = val;
603
0
    }
604
0
    dst += stride;
605
0
  }
606
0
}
607
608
// Directional prediction, zone 3: 180 < angle < 270
609
void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
610
                            const uint8_t *above, const uint8_t *left,
611
0
                            int upsample_left, int dx, int dy) {
612
0
  int r, c, y, base, shift, val;
613
614
0
  (void)above;
615
0
  (void)dx;
616
617
0
  assert(dx == 1);
618
0
  assert(dy > 0);
619
620
0
  const int max_base_y = (bw + bh - 1) << upsample_left;
621
0
  const int frac_bits = 6 - upsample_left;
622
0
  const int base_inc = 1 << upsample_left;
623
0
  y = dy;
624
0
  for (c = 0; c < bw; ++c, y += dy) {
625
0
    base = y >> frac_bits;
626
0
    shift = ((y << upsample_left) & 0x3F) >> 1;
627
628
0
    for (r = 0; r < bh; ++r, base += base_inc) {
629
0
      if (base < max_base_y) {
630
0
        val = left[base] * (32 - shift) + left[base + 1] * shift;
631
0
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
632
0
      } else {
633
0
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
634
0
        break;
635
0
      }
636
0
    }
637
0
  }
638
0
}
639
640
static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
641
                         const uint8_t *above, const uint8_t *left,
642
2.13M
                         int upsample_above, int upsample_left, int angle) {
643
2.13M
  const int dx = av1_get_dx(angle);
644
2.13M
  const int dy = av1_get_dy(angle);
645
2.13M
  const int bw = tx_size_wide[tx_size];
646
2.13M
  const int bh = tx_size_high[tx_size];
647
2.13M
  assert(angle > 0 && angle < 270);
648
649
2.13M
  if (angle > 0 && angle < 90) {
650
215k
    av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
651
215k
                         dy);
652
1.91M
  } else if (angle > 90 && angle < 180) {
653
519k
    av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
654
519k
                         upsample_left, dx, dy);
655
1.39M
  } else if (angle > 180 && angle < 270) {
656
320k
    av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
657
320k
                         dy);
658
1.07M
  } else if (angle == 90) {
659
264k
    pred[V_PRED][tx_size](dst, stride, above, left);
660
814k
  } else if (angle == 180) {
661
814k
    pred[H_PRED][tx_size](dst, stride, above, left);
662
814k
  }
663
2.13M
}
664
665
#if CONFIG_AV1_HIGHBITDEPTH
666
// Directional prediction, zone 1: 0 < angle < 90
667
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
668
                                   int bh, const uint16_t *above,
669
                                   const uint16_t *left, int upsample_above,
670
0
                                   int dx, int dy, int bd) {
671
0
  int r, c, x, base, shift, val;
672
673
0
  (void)left;
674
0
  (void)dy;
675
0
  (void)bd;
676
0
  assert(dy == 1);
677
0
  assert(dx > 0);
678
679
0
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
680
0
  const int frac_bits = 6 - upsample_above;
681
0
  const int base_inc = 1 << upsample_above;
682
0
  x = dx;
683
0
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
684
0
    base = x >> frac_bits;
685
0
    shift = ((x << upsample_above) & 0x3F) >> 1;
686
687
0
    if (base >= max_base_x) {
688
0
      for (int i = r; i < bh; ++i) {
689
0
        aom_memset16(dst, above[max_base_x], bw);
690
0
        dst += stride;
691
0
      }
692
0
      return;
693
0
    }
694
695
0
    for (c = 0; c < bw; ++c, base += base_inc) {
696
0
      if (base < max_base_x) {
697
0
        val = above[base] * (32 - shift) + above[base + 1] * shift;
698
0
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
699
0
      } else {
700
0
        dst[c] = above[max_base_x];
701
0
      }
702
0
    }
703
0
  }
704
0
}
705
706
// Directional prediction, zone 2: 90 < angle < 180
707
void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
708
                                   int bh, const uint16_t *above,
709
                                   const uint16_t *left, int upsample_above,
710
0
                                   int upsample_left, int dx, int dy, int bd) {
711
0
  (void)bd;
712
0
  assert(dx > 0);
713
0
  assert(dy > 0);
714
715
0
  const int min_base_x = -(1 << upsample_above);
716
0
  const int min_base_y = -(1 << upsample_left);
717
0
  (void)min_base_y;
718
0
  const int frac_bits_x = 6 - upsample_above;
719
0
  const int frac_bits_y = 6 - upsample_left;
720
721
0
  for (int r = 0; r < bh; ++r) {
722
0
    for (int c = 0; c < bw; ++c) {
723
0
      int val;
724
0
      int y = r + 1;
725
0
      int x = (c << 6) - y * dx;
726
0
      const int base_x = x >> frac_bits_x;
727
0
      if (base_x >= min_base_x) {
728
0
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
729
0
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
730
0
        val = ROUND_POWER_OF_TWO(val, 5);
731
0
      } else {
732
0
        x = c + 1;
733
0
        y = (r << 6) - x * dy;
734
0
        const int base_y = y >> frac_bits_y;
735
0
        assert(base_y >= min_base_y);
736
0
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
737
0
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
738
0
        val = ROUND_POWER_OF_TWO(val, 5);
739
0
      }
740
0
      dst[c] = val;
741
0
    }
742
0
    dst += stride;
743
0
  }
744
0
}
745
746
// Directional prediction, zone 3: 180 < angle < 270
747
void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
748
                                   int bh, const uint16_t *above,
749
                                   const uint16_t *left, int upsample_left,
750
0
                                   int dx, int dy, int bd) {
751
0
  int r, c, y, base, shift, val;
752
753
0
  (void)above;
754
0
  (void)dx;
755
0
  (void)bd;
756
0
  assert(dx == 1);
757
0
  assert(dy > 0);
758
759
0
  const int max_base_y = (bw + bh - 1) << upsample_left;
760
0
  const int frac_bits = 6 - upsample_left;
761
0
  const int base_inc = 1 << upsample_left;
762
0
  y = dy;
763
0
  for (c = 0; c < bw; ++c, y += dy) {
764
0
    base = y >> frac_bits;
765
0
    shift = ((y << upsample_left) & 0x3F) >> 1;
766
767
0
    for (r = 0; r < bh; ++r, base += base_inc) {
768
0
      if (base < max_base_y) {
769
0
        val = left[base] * (32 - shift) + left[base + 1] * shift;
770
0
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
771
0
      } else {
772
0
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
773
0
        break;
774
0
      }
775
0
    }
776
0
  }
777
0
}
778
779
static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
780
                                TX_SIZE tx_size, const uint16_t *above,
781
                                const uint16_t *left, int upsample_above,
782
3.34M
                                int upsample_left, int angle, int bd) {
783
3.34M
  const int dx = av1_get_dx(angle);
784
3.34M
  const int dy = av1_get_dy(angle);
785
3.34M
  const int bw = tx_size_wide[tx_size];
786
3.34M
  const int bh = tx_size_high[tx_size];
787
3.34M
  assert(angle > 0 && angle < 270);
788
789
3.34M
  if (angle > 0 && angle < 90) {
790
350k
    av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
791
350k
                                upsample_above, dx, dy, bd);
792
2.99M
  } else if (angle > 90 && angle < 180) {
793
646k
    av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
794
646k
                                upsample_above, upsample_left, dx, dy, bd);
795
2.34M
  } else if (angle > 180 && angle < 270) {
796
563k
    av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
797
563k
                                dx, dy, bd);
798
1.78M
  } else if (angle == 90) {
799
326k
    pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
800
1.46M
  } else if (angle == 180) {
801
1.46M
    pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
802
1.46M
  }
803
3.34M
}
804
#endif  // CONFIG_AV1_HIGHBITDEPTH
805
806
DECLARE_ALIGNED(16, const int8_t,
807
                av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
808
  {
809
      { -6, 10, 0, 0, 0, 12, 0, 0 },
810
      { -5, 2, 10, 0, 0, 9, 0, 0 },
811
      { -3, 1, 1, 10, 0, 7, 0, 0 },
812
      { -3, 1, 1, 2, 10, 5, 0, 0 },
813
      { -4, 6, 0, 0, 0, 2, 12, 0 },
814
      { -3, 2, 6, 0, 0, 2, 9, 0 },
815
      { -3, 2, 2, 6, 0, 2, 7, 0 },
816
      { -3, 1, 2, 2, 6, 3, 5, 0 },
817
  },
818
  {
819
      { -10, 16, 0, 0, 0, 10, 0, 0 },
820
      { -6, 0, 16, 0, 0, 6, 0, 0 },
821
      { -4, 0, 0, 16, 0, 4, 0, 0 },
822
      { -2, 0, 0, 0, 16, 2, 0, 0 },
823
      { -10, 16, 0, 0, 0, 0, 10, 0 },
824
      { -6, 0, 16, 0, 0, 0, 6, 0 },
825
      { -4, 0, 0, 16, 0, 0, 4, 0 },
826
      { -2, 0, 0, 0, 16, 0, 2, 0 },
827
  },
828
  {
829
      { -8, 8, 0, 0, 0, 16, 0, 0 },
830
      { -8, 0, 8, 0, 0, 16, 0, 0 },
831
      { -8, 0, 0, 8, 0, 16, 0, 0 },
832
      { -8, 0, 0, 0, 8, 16, 0, 0 },
833
      { -4, 4, 0, 0, 0, 0, 16, 0 },
834
      { -4, 0, 4, 0, 0, 0, 16, 0 },
835
      { -4, 0, 0, 4, 0, 0, 16, 0 },
836
      { -4, 0, 0, 0, 4, 0, 16, 0 },
837
  },
838
  {
839
      { -2, 8, 0, 0, 0, 10, 0, 0 },
840
      { -1, 3, 8, 0, 0, 6, 0, 0 },
841
      { -1, 2, 3, 8, 0, 4, 0, 0 },
842
      { 0, 1, 2, 3, 8, 2, 0, 0 },
843
      { -1, 4, 0, 0, 0, 3, 10, 0 },
844
      { -1, 3, 4, 0, 0, 4, 6, 0 },
845
      { -1, 2, 3, 4, 0, 4, 4, 0 },
846
      { -1, 2, 2, 3, 4, 3, 3, 0 },
847
  },
848
  {
849
      { -12, 14, 0, 0, 0, 14, 0, 0 },
850
      { -10, 0, 14, 0, 0, 12, 0, 0 },
851
      { -9, 0, 0, 14, 0, 11, 0, 0 },
852
      { -8, 0, 0, 0, 14, 10, 0, 0 },
853
      { -10, 12, 0, 0, 0, 0, 14, 0 },
854
      { -9, 1, 12, 0, 0, 0, 12, 0 },
855
      { -8, 0, 0, 12, 0, 1, 11, 0 },
856
      { -7, 0, 0, 1, 12, 1, 9, 0 },
857
  },
858
};
859
860
void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
861
                                  TX_SIZE tx_size, const uint8_t *above,
862
0
                                  const uint8_t *left, int mode) {
863
0
  int r, c;
864
0
  uint8_t buffer[33][33];
865
0
  const int bw = tx_size_wide[tx_size];
866
0
  const int bh = tx_size_high[tx_size];
867
868
0
  assert(bw <= 32 && bh <= 32);
869
870
0
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
871
0
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
872
873
0
  for (r = 1; r < bh + 1; r += 2)
874
0
    for (c = 1; c < bw + 1; c += 4) {
875
0
      const uint8_t p0 = buffer[r - 1][c - 1];
876
0
      const uint8_t p1 = buffer[r - 1][c];
877
0
      const uint8_t p2 = buffer[r - 1][c + 1];
878
0
      const uint8_t p3 = buffer[r - 1][c + 2];
879
0
      const uint8_t p4 = buffer[r - 1][c + 3];
880
0
      const uint8_t p5 = buffer[r][c - 1];
881
0
      const uint8_t p6 = buffer[r + 1][c - 1];
882
0
      for (int k = 0; k < 8; ++k) {
883
0
        int r_offset = k >> 2;
884
0
        int c_offset = k & 0x03;
885
0
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
886
0
                 av1_filter_intra_taps[mode][k][1] * p1 +
887
0
                 av1_filter_intra_taps[mode][k][2] * p2 +
888
0
                 av1_filter_intra_taps[mode][k][3] * p3 +
889
0
                 av1_filter_intra_taps[mode][k][4] * p4 +
890
0
                 av1_filter_intra_taps[mode][k][5] * p5 +
891
0
                 av1_filter_intra_taps[mode][k][6] * p6;
892
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
893
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
894
        // Since Clip1() clips a negative value to 0, it is safe to replace
895
        // Round2Signed() with Round2().
896
0
        buffer[r + r_offset][c + c_offset] =
897
0
            clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
898
0
      }
899
0
    }
900
901
0
  for (r = 0; r < bh; ++r) {
902
0
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
903
0
    dst += stride;
904
0
  }
905
0
}
906
907
#if CONFIG_AV1_HIGHBITDEPTH
908
static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
909
                                          TX_SIZE tx_size,
910
                                          const uint16_t *above,
911
                                          const uint16_t *left, int mode,
912
521k
                                          int bd) {
913
521k
  int r, c;
914
521k
  uint16_t buffer[33][33];
915
521k
  const int bw = tx_size_wide[tx_size];
916
521k
  const int bh = tx_size_high[tx_size];
917
918
521k
  assert(bw <= 32 && bh <= 32);
919
920
5.87M
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
921
521k
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
922
923
3.18M
  for (r = 1; r < bh + 1; r += 2)
924
12.5M
    for (c = 1; c < bw + 1; c += 4) {
925
9.86M
      const uint16_t p0 = buffer[r - 1][c - 1];
926
9.86M
      const uint16_t p1 = buffer[r - 1][c];
927
9.86M
      const uint16_t p2 = buffer[r - 1][c + 1];
928
9.86M
      const uint16_t p3 = buffer[r - 1][c + 2];
929
9.86M
      const uint16_t p4 = buffer[r - 1][c + 3];
930
9.86M
      const uint16_t p5 = buffer[r][c - 1];
931
9.86M
      const uint16_t p6 = buffer[r + 1][c - 1];
932
88.8M
      for (int k = 0; k < 8; ++k) {
933
78.9M
        int r_offset = k >> 2;
934
78.9M
        int c_offset = k & 0x03;
935
78.9M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
936
78.9M
                 av1_filter_intra_taps[mode][k][1] * p1 +
937
78.9M
                 av1_filter_intra_taps[mode][k][2] * p2 +
938
78.9M
                 av1_filter_intra_taps[mode][k][3] * p3 +
939
78.9M
                 av1_filter_intra_taps[mode][k][4] * p4 +
940
78.9M
                 av1_filter_intra_taps[mode][k][5] * p5 +
941
78.9M
                 av1_filter_intra_taps[mode][k][6] * p6;
942
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
943
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
944
        // Since Clip1() clips a negative value to 0, it is safe to replace
945
        // Round2Signed() with Round2().
946
78.9M
        buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
947
78.9M
            ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
948
78.9M
      }
949
9.86M
    }
950
951
5.87M
  for (r = 0; r < bh; ++r) {
952
5.35M
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
953
5.35M
    dst += stride;
954
5.35M
  }
955
521k
}
956
#endif  // CONFIG_AV1_HIGHBITDEPTH
957
958
11.5M
static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
959
11.5M
  if (plane == 0) {
960
5.83M
    const PREDICTION_MODE mode = mbmi->mode;
961
5.83M
    return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
962
5.29M
            mode == SMOOTH_H_PRED);
963
5.83M
  } else {
964
    // uv_mode is not set for inter blocks, so need to explicitly
965
    // detect that case.
966
5.67M
    if (is_inter_block(mbmi)) return 0;
967
968
4.92M
    const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
969
4.92M
    return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
970
4.65M
            uv_mode == UV_SMOOTH_H_PRED);
971
5.67M
  }
972
11.5M
}
973
974
6.46M
static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
975
6.46M
  const MB_MODE_INFO *above;
976
6.46M
  const MB_MODE_INFO *left;
977
978
6.46M
  if (plane == 0) {
979
3.36M
    above = xd->above_mbmi;
980
3.36M
    left = xd->left_mbmi;
981
3.36M
  } else {
982
3.09M
    above = xd->chroma_above_mbmi;
983
3.09M
    left = xd->chroma_left_mbmi;
984
3.09M
  }
985
986
6.46M
  return (above && is_smooth(above, plane)) || (left && is_smooth(left, plane));
987
6.46M
}
988
989
3.29M
static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
990
3.29M
  const int d = abs(delta);
991
3.29M
  int strength = 0;
992
993
3.29M
  const int blk_wh = bs0 + bs1;
994
3.29M
  if (type == 0) {
995
2.62M
    if (blk_wh <= 8) {
996
468k
      if (d >= 56) strength = 1;
997
2.15M
    } else if (blk_wh <= 12) {
998
287k
      if (d >= 40) strength = 1;
999
1.86M
    } else if (blk_wh <= 16) {
1000
418k
      if (d >= 40) strength = 1;
1001
1.44M
    } else if (blk_wh <= 24) {
1002
535k
      if (d >= 8) strength = 1;
1003
535k
      if (d >= 16) strength = 2;
1004
535k
      if (d >= 32) strength = 3;
1005
914k
    } else if (blk_wh <= 32) {
1006
300k
      if (d >= 1) strength = 1;
1007
300k
      if (d >= 4) strength = 2;
1008
300k
      if (d >= 32) strength = 3;
1009
613k
    } else {
1010
614k
      if (d >= 1) strength = 3;
1011
613k
    }
1012
2.62M
  } else {
1013
673k
    if (blk_wh <= 8) {
1014
79.0k
      if (d >= 40) strength = 1;
1015
79.0k
      if (d >= 64) strength = 2;
1016
594k
    } else if (blk_wh <= 16) {
1017
193k
      if (d >= 20) strength = 1;
1018
193k
      if (d >= 48) strength = 2;
1019
401k
    } else if (blk_wh <= 24) {
1020
144k
      if (d >= 4) strength = 3;
1021
256k
    } else {
1022
257k
      if (d >= 1) strength = 3;
1023
256k
    }
1024
673k
  }
1025
3.29M
  return strength;
1026
3.29M
}
1027
1028
0
void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1029
0
  if (!strength) return;
1030
1031
0
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1032
0
                                                         { 0, 5, 6, 5, 0 },
1033
0
                                                         { 2, 4, 4, 4, 2 } };
1034
0
  const int filt = strength - 1;
1035
0
  uint8_t edge[129];
1036
1037
0
  memcpy(edge, p, sz * sizeof(*p));
1038
0
  for (int i = 1; i < sz; i++) {
1039
0
    int s = 0;
1040
0
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1041
0
      int k = i - 2 + j;
1042
0
      k = (k < 0) ? 0 : k;
1043
0
      k = (k > sz - 1) ? sz - 1 : k;
1044
0
      s += edge[k] * kernel[filt][j];
1045
0
    }
1046
0
    s = (s + 8) >> 4;
1047
0
    p[i] = s;
1048
0
  }
1049
0
}
1050
1051
182k
static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1052
182k
  const int kernel[3] = { 5, 6, 5 };
1053
1054
182k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1055
182k
          (p_above[0] * kernel[2]);
1056
182k
  s = (s + 8) >> 4;
1057
182k
  p_above[-1] = s;
1058
182k
  p_left[-1] = s;
1059
182k
}
1060
1061
0
void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1062
  // interpolate half-sample positions
1063
0
  assert(sz <= MAX_UPSAMPLE_SZ);
1064
1065
0
  uint8_t in[MAX_UPSAMPLE_SZ + 3];
1066
  // copy p[-1..(sz-1)] and extend first and last samples
1067
0
  in[0] = p[-1];
1068
0
  in[1] = p[-1];
1069
0
  for (int i = 0; i < sz; i++) {
1070
0
    in[i + 2] = p[i];
1071
0
  }
1072
0
  in[sz + 2] = p[sz - 1];
1073
1074
  // interpolate half-sample edge positions
1075
0
  p[-2] = in[0];
1076
0
  for (int i = 0; i < sz; i++) {
1077
0
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1078
0
    s = clip_pixel((s + 8) >> 4);
1079
0
    p[2 * i - 1] = s;
1080
0
    p[2 * i] = in[i + 2];
1081
0
  }
1082
0
}
1083
1084
static void build_directional_and_filter_intra_predictors(
1085
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1086
    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1087
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1088
2.51M
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1089
2.51M
  int i;
1090
2.51M
  const uint8_t *above_ref = ref - ref_stride;
1091
2.51M
  const uint8_t *left_ref = ref - 1;
1092
2.51M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1093
2.51M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1094
2.51M
  uint8_t *const above_row = above_data + 16;
1095
2.51M
  uint8_t *const left_col = left_data + 16;
1096
2.51M
  const int txwpx = tx_size_wide[tx_size];
1097
2.51M
  const int txhpx = tx_size_high[tx_size];
1098
2.51M
  int need_left = extend_modes[mode] & NEED_LEFT;
1099
2.51M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1100
2.51M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1101
2.51M
  const int is_dr_mode = av1_is_directional_mode(mode);
1102
2.51M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1103
2.51M
  assert(use_filter_intra || is_dr_mode);
1104
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1105
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1106
  // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1107
  // be the potential reason for this issue.
1108
2.51M
  memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1109
2.51M
  memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1110
1111
  // The default values if ref pixels are not available:
1112
  // 128 127 127 .. 127 127 127 127 127 127
1113
  // 129  A   B  ..  Y   Z
1114
  // 129  C   D  ..  W   X
1115
  // 129  E   F  ..  U   V
1116
  // 129  G   H  ..  S   T   T   T   T   T
1117
  // ..
1118
1119
2.51M
  if (is_dr_mode) {
1120
2.15M
    if (p_angle <= 90)
1121
490k
      need_above = 1, need_left = 0, need_above_left = 1;
1122
1.66M
    else if (p_angle < 180)
1123
519k
      need_above = 1, need_left = 1, need_above_left = 1;
1124
1.14M
    else
1125
1.14M
      need_above = 0, need_left = 1, need_above_left = 1;
1126
2.15M
  }
1127
2.51M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1128
1129
2.51M
  assert(n_top_px >= 0);
1130
2.51M
  assert(n_topright_px >= -1);
1131
2.51M
  assert(n_left_px >= 0);
1132
2.51M
  assert(n_bottomleft_px >= -1);
1133
1134
2.51M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1135
27.5k
    int val;
1136
27.5k
    if (need_left) {
1137
17.2k
      val = (n_top_px > 0) ? above_ref[0] : 129;
1138
17.2k
    } else {
1139
10.3k
      val = (n_left_px > 0) ? left_ref[0] : 127;
1140
10.3k
    }
1141
872k
    for (i = 0; i < txhpx; ++i) {
1142
845k
      memset(dst, val, txwpx);
1143
845k
      dst += dst_stride;
1144
845k
    }
1145
27.5k
    return;
1146
27.5k
  }
1147
1148
  // NEED_LEFT
1149
2.49M
  if (need_left) {
1150
2.01M
    const int num_left_pixels_needed =
1151
2.01M
        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1152
2.01M
    i = 0;
1153
2.01M
    if (n_left_px > 0) {
1154
26.3M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1155
1.99M
      if (n_bottomleft_px > 0) {
1156
137k
        assert(i == txhpx);
1157
1.65M
        for (; i < txhpx + n_bottomleft_px; i++)
1158
1.51M
          left_col[i] = left_ref[i * ref_stride];
1159
137k
      }
1160
1.99M
      if (i < num_left_pixels_needed)
1161
269k
        memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1162
1.99M
    } else if (n_top_px > 0) {
1163
14.2k
      memset(left_col, above_ref[0], num_left_pixels_needed);
1164
14.2k
    }
1165
2.01M
  }
1166
1167
  // NEED_ABOVE
1168
2.49M
  if (need_above) {
1169
1.36M
    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1170
1.36M
    if (n_top_px > 0) {
1171
1.33M
      memcpy(above_row, above_ref, n_top_px);
1172
1.33M
      i = n_top_px;
1173
1.33M
      if (n_topright_px > 0) {
1174
126k
        assert(n_top_px == txwpx);
1175
126k
        memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1176
126k
        i += n_topright_px;
1177
126k
      }
1178
1.33M
      if (i < num_top_pixels_needed)
1179
126k
        memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1180
1.33M
    } else if (n_left_px > 0) {
1181
20.5k
      memset(above_row, left_ref[0], num_top_pixels_needed);
1182
20.5k
    }
1183
1.36M
  }
1184
1185
2.49M
  if (need_above_left) {
1186
2.49M
    if (n_top_px > 0 && n_left_px > 0) {
1187
2.40M
      above_row[-1] = above_ref[-1];
1188
2.40M
    } else if (n_top_px > 0) {
1189
28.7k
      above_row[-1] = above_ref[0];
1190
56.7k
    } else if (n_left_px > 0) {
1191
49.5k
      above_row[-1] = left_ref[0];
1192
49.5k
    } else {
1193
7.10k
      above_row[-1] = 128;
1194
7.10k
    }
1195
2.49M
    left_col[-1] = above_row[-1];
1196
2.49M
  }
1197
1198
2.49M
  if (use_filter_intra) {
1199
362k
    av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1200
362k
                               filter_intra_mode);
1201
362k
    return;
1202
362k
  }
1203
1204
2.49M
  assert(is_dr_mode);
1205
2.13M
  int upsample_above = 0;
1206
2.13M
  int upsample_left = 0;
1207
2.13M
  if (!disable_edge_filter) {
1208
1.99M
    const int need_right = p_angle < 90;
1209
1.99M
    const int need_bottom = p_angle > 180;
1210
1.99M
    if (p_angle != 90 && p_angle != 180) {
1211
983k
      assert(need_above_left);
1212
983k
      const int ab_le = 1;
1213
983k
      if (need_above && need_left && (txwpx + txhpx >= 24)) {
1214
182k
        filter_intra_edge_corner(above_row, left_col);
1215
182k
      }
1216
983k
      if (need_above && n_top_px > 0) {
1217
674k
        const int strength = intra_edge_filter_strength(
1218
674k
            txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1219
674k
        const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1220
674k
        av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1221
674k
      }
1222
983k
      if (need_left && n_left_px > 0) {
1223
783k
        const int strength = intra_edge_filter_strength(
1224
783k
            txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1225
783k
        const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1226
783k
        av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1227
783k
      }
1228
983k
    }
1229
1.99M
    upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1230
1.99M
                                                 intra_edge_filter_type);
1231
1.99M
    if (need_above && upsample_above) {
1232
119k
      const int n_px = txwpx + (need_right ? txhpx : 0);
1233
119k
      av1_upsample_intra_edge(above_row, n_px);
1234
119k
    }
1235
1.99M
    upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1236
1.99M
                                                intra_edge_filter_type);
1237
1.99M
    if (need_left && upsample_left) {
1238
247k
      const int n_px = txhpx + (need_bottom ? txwpx : 0);
1239
247k
      av1_upsample_intra_edge(left_col, n_px);
1240
247k
    }
1241
1.99M
  }
1242
2.13M
  dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1243
2.13M
               upsample_left, p_angle);
1244
2.13M
}
1245
1246
// This function generates the pred data of a given block for non-directional
1247
// intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H, SMOOTH_V and PAETH).
1248
static void build_non_directional_intra_predictors(
1249
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1250
4.59M
    PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px) {
1251
4.59M
  const uint8_t *above_ref = ref - ref_stride;
1252
4.59M
  const uint8_t *left_ref = ref - 1;
1253
4.59M
  const int txwpx = tx_size_wide[tx_size];
1254
4.59M
  const int txhpx = tx_size_high[tx_size];
1255
4.59M
  const int need_left = extend_modes[mode] & NEED_LEFT;
1256
4.59M
  const int need_above = extend_modes[mode] & NEED_ABOVE;
1257
4.59M
  const int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1258
4.59M
  int i = 0;
1259
4.59M
  assert(n_top_px >= 0);
1260
4.59M
  assert(n_left_px >= 0);
1261
4.59M
  assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1262
4.59M
         mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1263
1264
4.59M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1265
0
    int val = 0;
1266
0
    if (need_left) {
1267
0
      val = (n_top_px > 0) ? above_ref[0] : 129;
1268
0
    } else {
1269
0
      val = (n_left_px > 0) ? left_ref[0] : 127;
1270
0
    }
1271
0
    for (i = 0; i < txhpx; ++i) {
1272
0
      memset(dst, val, txwpx);
1273
0
      dst += dst_stride;
1274
0
    }
1275
0
    return;
1276
0
  }
1277
1278
4.59M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1279
4.59M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1280
4.59M
  uint8_t *const above_row = above_data + 16;
1281
4.59M
  uint8_t *const left_col = left_data + 16;
1282
1283
4.59M
  if (need_left) {
1284
4.59M
    memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1285
4.59M
    if (n_left_px > 0) {
1286
73.7M
      for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1287
4.45M
      if (i < txhpx) memset(&left_col[i], left_col[i - 1], txhpx - i);
1288
4.45M
    } else if (n_top_px > 0) {
1289
102k
      memset(left_col, above_ref[0], txhpx);
1290
102k
    }
1291
4.59M
  }
1292
1293
4.59M
  if (need_above) {
1294
4.59M
    memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1295
4.59M
    if (n_top_px > 0) {
1296
4.40M
      memcpy(above_row, above_ref, n_top_px);
1297
4.40M
      i = n_top_px;
1298
4.40M
      if (i < txwpx) memset(&above_row[i], above_row[i - 1], txwpx - i);
1299
4.40M
    } else if (n_left_px > 0) {
1300
153k
      memset(above_row, left_ref[0], txwpx);
1301
153k
    }
1302
4.59M
  }
1303
1304
4.59M
  if (need_above_left) {
1305
872k
    if (n_top_px > 0 && n_left_px > 0) {
1306
832k
      above_row[-1] = above_ref[-1];
1307
832k
    } else if (n_top_px > 0) {
1308
18.0k
      above_row[-1] = above_ref[0];
1309
22.3k
    } else if (n_left_px > 0) {
1310
21.6k
      above_row[-1] = left_ref[0];
1311
21.6k
    } else {
1312
769
      above_row[-1] = 128;
1313
769
    }
1314
872k
    left_col[-1] = above_row[-1];
1315
872k
  }
1316
1317
4.59M
  if (mode == DC_PRED) {
1318
2.87M
    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1319
2.87M
                                                  left_col);
1320
2.87M
  } else {
1321
1.71M
    pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1322
1.71M
  }
1323
4.59M
}
1324
1325
#if CONFIG_AV1_HIGHBITDEPTH
1326
0
void av1_highbd_filter_intra_edge_c(uint16_t *p, int sz, int strength) {
1327
0
  if (!strength) return;
1328
1329
0
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1330
0
                                                         { 0, 5, 6, 5, 0 },
1331
0
                                                         { 2, 4, 4, 4, 2 } };
1332
0
  const int filt = strength - 1;
1333
0
  uint16_t edge[129];
1334
1335
0
  memcpy(edge, p, sz * sizeof(*p));
1336
0
  for (int i = 1; i < sz; i++) {
1337
0
    int s = 0;
1338
0
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1339
0
      int k = i - 2 + j;
1340
0
      k = (k < 0) ? 0 : k;
1341
0
      k = (k > sz - 1) ? sz - 1 : k;
1342
0
      s += edge[k] * kernel[filt][j];
1343
0
    }
1344
0
    s = (s + 8) >> 4;
1345
0
    p[i] = s;
1346
0
  }
1347
0
}
1348
1349
static void highbd_filter_intra_edge_corner(uint16_t *p_above,
1350
272k
                                            uint16_t *p_left) {
1351
272k
  const int kernel[3] = { 5, 6, 5 };
1352
1353
272k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1354
272k
          (p_above[0] * kernel[2]);
1355
272k
  s = (s + 8) >> 4;
1356
272k
  p_above[-1] = s;
1357
272k
  p_left[-1] = s;
1358
272k
}
1359
1360
0
void av1_highbd_upsample_intra_edge_c(uint16_t *p, int sz, int bd) {
1361
  // interpolate half-sample positions
1362
0
  assert(sz <= MAX_UPSAMPLE_SZ);
1363
1364
0
  uint16_t in[MAX_UPSAMPLE_SZ + 3];
1365
  // copy p[-1..(sz-1)] and extend first and last samples
1366
0
  in[0] = p[-1];
1367
0
  in[1] = p[-1];
1368
0
  for (int i = 0; i < sz; i++) {
1369
0
    in[i + 2] = p[i];
1370
0
  }
1371
0
  in[sz + 2] = p[sz - 1];
1372
1373
  // interpolate half-sample edge positions
1374
0
  p[-2] = in[0];
1375
0
  for (int i = 0; i < sz; i++) {
1376
0
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1377
0
    s = (s + 8) >> 4;
1378
0
    s = clip_pixel_highbd(s, bd);
1379
0
    p[2 * i - 1] = s;
1380
0
    p[2 * i] = in[i + 2];
1381
0
  }
1382
0
}
1383
1384
static void highbd_build_directional_and_filter_intra_predictors(
1385
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1386
    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1387
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1388
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1389
3.94M
    int bit_depth) {
1390
3.94M
  int i;
1391
3.94M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1392
3.94M
  const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1393
3.94M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1394
3.94M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1395
3.94M
  uint16_t *const above_row = above_data + 16;
1396
3.94M
  uint16_t *const left_col = left_data + 16;
1397
3.94M
  const int txwpx = tx_size_wide[tx_size];
1398
3.94M
  const int txhpx = tx_size_high[tx_size];
1399
3.94M
  int need_left = extend_modes[mode] & NEED_LEFT;
1400
3.94M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1401
3.94M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1402
3.94M
  const uint16_t *above_ref = ref - ref_stride;
1403
3.94M
  const uint16_t *left_ref = ref - 1;
1404
3.94M
  const int is_dr_mode = av1_is_directional_mode(mode);
1405
3.94M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1406
3.94M
  assert(use_filter_intra || is_dr_mode);
1407
3.94M
  const int base = 128 << (bit_depth - 8);
1408
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1409
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1410
  // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1411
  // seen to be the potential reason for this issue.
1412
3.94M
  aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1413
3.94M
  aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1414
1415
  // The default values if ref pixels are not available:
1416
  // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1417
  // base+1   A      B  ..     Y      Z
1418
  // base+1   C      D  ..     W      X
1419
  // base+1   E      F  ..     U      V
1420
  // base+1   G      H  ..     S      T      T      T      T      T
1421
1422
3.94M
  if (is_dr_mode) {
1423
3.42M
    if (p_angle <= 90)
1424
696k
      need_above = 1, need_left = 0, need_above_left = 1;
1425
2.73M
    else if (p_angle < 180)
1426
646k
      need_above = 1, need_left = 1, need_above_left = 1;
1427
2.08M
    else
1428
2.08M
      need_above = 0, need_left = 1, need_above_left = 1;
1429
3.42M
  }
1430
3.94M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1431
1432
3.94M
  assert(n_top_px >= 0);
1433
3.94M
  assert(n_topright_px >= -1);
1434
3.94M
  assert(n_left_px >= 0);
1435
3.94M
  assert(n_bottomleft_px >= -1);
1436
1437
3.94M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1438
85.6k
    int val;
1439
85.6k
    if (need_left) {
1440
66.4k
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1441
66.4k
    } else {
1442
19.2k
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1443
19.2k
    }
1444
1.73M
    for (i = 0; i < txhpx; ++i) {
1445
1.64M
      aom_memset16(dst, val, txwpx);
1446
1.64M
      dst += dst_stride;
1447
1.64M
    }
1448
85.6k
    return;
1449
85.6k
  }
1450
1451
  // NEED_LEFT
1452
3.86M
  if (need_left) {
1453
3.18M
    const int num_left_pixels_needed =
1454
3.18M
        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1455
3.18M
    i = 0;
1456
3.18M
    if (n_left_px > 0) {
1457
44.2M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1458
3.17M
      if (n_bottomleft_px > 0) {
1459
228k
        assert(i == txhpx);
1460
2.76M
        for (; i < txhpx + n_bottomleft_px; i++)
1461
2.53M
          left_col[i] = left_ref[i * ref_stride];
1462
228k
      }
1463
3.17M
      if (i < num_left_pixels_needed)
1464
487k
        aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1465
3.17M
    } else if (n_top_px > 0) {
1466
12.6k
      aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1467
12.6k
    }
1468
3.18M
  }
1469
1470
  // NEED_ABOVE
1471
3.86M
  if (need_above) {
1472
1.84M
    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1473
1.84M
    if (n_top_px > 0) {
1474
1.82M
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1475
1.82M
      i = n_top_px;
1476
1.82M
      if (n_topright_px > 0) {
1477
202k
        assert(n_top_px == txwpx);
1478
202k
        memcpy(above_row + txwpx, above_ref + txwpx,
1479
202k
               n_topright_px * sizeof(above_ref[0]));
1480
202k
        i += n_topright_px;
1481
202k
      }
1482
1.82M
      if (i < num_top_pixels_needed)
1483
207k
        aom_memset16(&above_row[i], above_row[i - 1],
1484
207k
                     num_top_pixels_needed - i);
1485
1.82M
    } else if (n_left_px > 0) {
1486
19.5k
      aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1487
19.5k
    }
1488
1.84M
  }
1489
1490
3.86M
  if (need_above_left) {
1491
3.86M
    if (n_top_px > 0 && n_left_px > 0) {
1492
3.76M
      above_row[-1] = above_ref[-1];
1493
3.76M
    } else if (n_top_px > 0) {
1494
23.4k
      above_row[-1] = above_ref[0];
1495
76.7k
    } else if (n_left_px > 0) {
1496
74.0k
      above_row[-1] = left_ref[0];
1497
74.0k
    } else {
1498
2.67k
      above_row[-1] = base;
1499
2.67k
    }
1500
3.86M
    left_col[-1] = above_row[-1];
1501
3.86M
  }
1502
1503
3.86M
  if (use_filter_intra) {
1504
521k
    highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1505
521k
                                  filter_intra_mode, bit_depth);
1506
521k
    return;
1507
521k
  }
1508
1509
3.86M
  assert(is_dr_mode);
1510
3.34M
  int upsample_above = 0;
1511
3.34M
  int upsample_left = 0;
1512
3.34M
  if (!disable_edge_filter) {
1513
3.02M
    const int need_right = p_angle < 90;
1514
3.02M
    const int need_bottom = p_angle > 180;
1515
3.02M
    if (p_angle != 90 && p_angle != 180) {
1516
1.31M
      assert(need_above_left);
1517
1.31M
      const int ab_le = 1;
1518
1.31M
      if (need_above && need_left && (txwpx + txhpx >= 24)) {
1519
272k
        highbd_filter_intra_edge_corner(above_row, left_col);
1520
272k
      }
1521
1.31M
      if (need_above && n_top_px > 0) {
1522
815k
        const int strength = intra_edge_filter_strength(
1523
815k
            txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1524
815k
        const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1525
815k
        av1_highbd_filter_intra_edge(above_row - ab_le, n_px, strength);
1526
815k
      }
1527
1.31M
      if (need_left && n_left_px > 0) {
1528
1.02M
        const int strength = intra_edge_filter_strength(
1529
1.02M
            txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1530
1.02M
        const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1531
1.02M
        av1_highbd_filter_intra_edge(left_col - ab_le, n_px, strength);
1532
1.02M
      }
1533
1.31M
    }
1534
3.02M
    upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1535
3.02M
                                                 intra_edge_filter_type);
1536
3.02M
    if (need_above && upsample_above) {
1537
135k
      const int n_px = txwpx + (need_right ? txhpx : 0);
1538
135k
      av1_highbd_upsample_intra_edge(above_row, n_px, bit_depth);
1539
135k
    }
1540
3.02M
    upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1541
3.02M
                                                intra_edge_filter_type);
1542
3.02M
    if (need_left && upsample_left) {
1543
236k
      const int n_px = txhpx + (need_bottom ? txwpx : 0);
1544
236k
      av1_highbd_upsample_intra_edge(left_col, n_px, bit_depth);
1545
236k
    }
1546
3.02M
  }
1547
3.34M
  highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1548
3.34M
                      upsample_above, upsample_left, p_angle, bit_depth);
1549
3.34M
}
1550
1551
// For HBD encode/decode, this function generates the pred data of a given
1552
// block for non-directional intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H,
1553
// SMOOTH_V and PAETH).
1554
static void highbd_build_non_directional_intra_predictors(
1555
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1556
    PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px,
1557
52.7M
    int bit_depth) {
1558
52.7M
  int i = 0;
1559
52.7M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1560
52.7M
  const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1561
52.7M
  const int txwpx = tx_size_wide[tx_size];
1562
52.7M
  const int txhpx = tx_size_high[tx_size];
1563
52.7M
  int need_left = extend_modes[mode] & NEED_LEFT;
1564
52.7M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1565
52.7M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1566
52.7M
  const uint16_t *above_ref = ref - ref_stride;
1567
52.7M
  const uint16_t *left_ref = ref - 1;
1568
52.7M
  const int base = 128 << (bit_depth - 8);
1569
1570
52.7M
  assert(n_top_px >= 0);
1571
52.7M
  assert(n_left_px >= 0);
1572
52.7M
  assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1573
52.7M
         mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1574
1575
52.7M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1576
0
    int val = 0;
1577
0
    if (need_left) {
1578
0
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1579
0
    } else {
1580
0
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1581
0
    }
1582
0
    for (i = 0; i < txhpx; ++i) {
1583
0
      aom_memset16(dst, val, txwpx);
1584
0
      dst += dst_stride;
1585
0
    }
1586
0
    return;
1587
0
  }
1588
1589
52.7M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1590
52.7M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1591
52.7M
  uint16_t *const above_row = above_data + 16;
1592
52.7M
  uint16_t *const left_col = left_data + 16;
1593
1594
52.7M
  if (need_left) {
1595
52.7M
    aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1596
52.7M
    if (n_left_px > 0) {
1597
327M
      for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1598
51.7M
      if (i < txhpx) aom_memset16(&left_col[i], left_col[i - 1], txhpx - i);
1599
51.7M
    } else if (n_top_px > 0) {
1600
944k
      aom_memset16(left_col, above_ref[0], txhpx);
1601
944k
    }
1602
52.7M
  }
1603
1604
52.7M
  if (need_above) {
1605
52.7M
    aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1606
52.7M
    if (n_top_px > 0) {
1607
52.5M
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1608
52.5M
      i = n_top_px;
1609
52.5M
      if (i < txwpx) aom_memset16(&above_row[i], above_row[i - 1], (txwpx - i));
1610
52.5M
    } else if (n_left_px > 0) {
1611
205k
      aom_memset16(above_row, left_ref[0], txwpx);
1612
205k
    }
1613
52.7M
  }
1614
1615
52.7M
  if (need_above_left) {
1616
1.08M
    if (n_top_px > 0 && n_left_px > 0) {
1617
1.03M
      above_row[-1] = above_ref[-1];
1618
1.03M
    } else if (n_top_px > 0) {
1619
21.3k
      above_row[-1] = above_ref[0];
1620
31.8k
    } else if (n_left_px > 0) {
1621
30.6k
      above_row[-1] = left_ref[0];
1622
30.6k
    } else {
1623
1.25k
      above_row[-1] = base;
1624
1.25k
    }
1625
1.08M
    left_col[-1] = above_row[-1];
1626
1.08M
  }
1627
1628
52.7M
  if (mode == DC_PRED) {
1629
50.4M
    dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1630
50.4M
        dst, dst_stride, above_row, left_col, bit_depth);
1631
50.4M
  } else {
1632
2.27M
    pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1633
2.27M
  }
1634
52.7M
}
1635
#endif  // CONFIG_AV1_HIGHBITDEPTH
1636
1637
static inline BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1638
2.32M
                                            int subsampling_y) {
1639
2.32M
  assert(subsampling_x >= 0 && subsampling_x < 2);
1640
2.32M
  assert(subsampling_y >= 0 && subsampling_y < 2);
1641
2.32M
  BLOCK_SIZE bs = bsize;
1642
2.32M
  switch (bsize) {
1643
10.1k
    case BLOCK_4X4:
1644
10.1k
      if (subsampling_x == 1 && subsampling_y == 1)
1645
10.1k
        bs = BLOCK_8X8;
1646
70
      else if (subsampling_x == 1)
1647
70
        bs = BLOCK_8X4;
1648
0
      else if (subsampling_y == 1)
1649
0
        bs = BLOCK_4X8;
1650
10.1k
      break;
1651
20.6k
    case BLOCK_4X8:
1652
20.6k
      if (subsampling_x == 1 && subsampling_y == 1)
1653
20.6k
        bs = BLOCK_8X8;
1654
1
      else if (subsampling_x == 1)
1655
0
        bs = BLOCK_8X8;
1656
1
      else if (subsampling_y == 1)
1657
0
        bs = BLOCK_4X8;
1658
20.6k
      break;
1659
30.0k
    case BLOCK_8X4:
1660
30.0k
      if (subsampling_x == 1 && subsampling_y == 1)
1661
29.6k
        bs = BLOCK_8X8;
1662
412
      else if (subsampling_x == 1)
1663
412
        bs = BLOCK_8X4;
1664
0
      else if (subsampling_y == 1)
1665
0
        bs = BLOCK_8X8;
1666
30.0k
      break;
1667
24.3k
    case BLOCK_4X16:
1668
24.3k
      if (subsampling_x == 1 && subsampling_y == 1)
1669
24.3k
        bs = BLOCK_8X16;
1670
0
      else if (subsampling_x == 1)
1671
0
        bs = BLOCK_8X16;
1672
0
      else if (subsampling_y == 1)
1673
0
        bs = BLOCK_4X16;
1674
24.3k
      break;
1675
39.4k
    case BLOCK_16X4:
1676
39.4k
      if (subsampling_x == 1 && subsampling_y == 1)
1677
38.9k
        bs = BLOCK_16X8;
1678
481
      else if (subsampling_x == 1)
1679
480
        bs = BLOCK_16X4;
1680
1
      else if (subsampling_y == 1)
1681
0
        bs = BLOCK_16X8;
1682
39.4k
      break;
1683
2.20M
    default: break;
1684
2.32M
  }
1685
2.32M
  return bs;
1686
2.32M
}
1687
1688
void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1689
                             int enable_intra_edge_filter, int wpx, int hpx,
1690
                             TX_SIZE tx_size, PREDICTION_MODE mode,
1691
                             int angle_delta, int use_palette,
1692
                             FILTER_INTRA_MODE filter_intra_mode,
1693
                             const uint8_t *ref, int ref_stride, uint8_t *dst,
1694
                             int dst_stride, int col_off, int row_off,
1695
63.8M
                             int plane) {
1696
63.8M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1697
63.8M
  const int txwpx = tx_size_wide[tx_size];
1698
63.8M
  const int txhpx = tx_size_high[tx_size];
1699
63.8M
  const int x = col_off << MI_SIZE_LOG2;
1700
63.8M
  const int y = row_off << MI_SIZE_LOG2;
1701
63.8M
  const int is_hbd = is_cur_buf_hbd(xd);
1702
1703
63.8M
  assert(mode < INTRA_MODES);
1704
1705
63.8M
  if (use_palette) {
1706
77.2k
    int r, c;
1707
77.2k
    const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1708
77.2k
                               xd->color_index_map_offset[plane != 0];
1709
77.2k
    const uint16_t *const palette =
1710
77.2k
        mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1711
77.2k
    if (is_hbd) {
1712
9.81k
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1713
110k
      for (r = 0; r < txhpx; ++r) {
1714
1.46M
        for (c = 0; c < txwpx; ++c) {
1715
1.36M
          dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1716
1.36M
        }
1717
100k
      }
1718
67.4k
    } else {
1719
629k
      for (r = 0; r < txhpx; ++r) {
1720
6.77M
        for (c = 0; c < txwpx; ++c) {
1721
6.21M
          dst[r * dst_stride + c] =
1722
6.21M
              (uint8_t)palette[map[(r + y) * wpx + c + x]];
1723
6.21M
        }
1724
561k
      }
1725
67.4k
    }
1726
77.2k
    return;
1727
77.2k
  }
1728
1729
63.7M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
1730
63.7M
  const int ss_x = pd->subsampling_x;
1731
63.7M
  const int ss_y = pd->subsampling_y;
1732
63.7M
  const int have_top =
1733
63.7M
      row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1734
63.7M
  const int have_left =
1735
63.7M
      col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1736
1737
  // Distance between the right edge of this prediction block to
1738
  // the frame right edge
1739
63.7M
  const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1740
  // Distance between the bottom edge of this prediction block to
1741
  // the frame bottom edge
1742
63.7M
  const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1743
63.7M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1744
63.7M
  const int is_dr_mode = av1_is_directional_mode(mode);
1745
1746
  // The computations in this function, as well as in build_intra_predictors(),
1747
  // are generalized for all intra modes. Some of these operations are not
1748
  // required since non-directional intra modes (i.e., DC, SMOOTH, SMOOTH_H,
1749
  // SMOOTH_V, and PAETH) specifically require left and top neighbors. Hence, a
1750
  // separate function build_non_directional_intra_predictors() is introduced
1751
  // for these modes to avoid redundant computations while generating pred data.
1752
1753
63.7M
  const int n_top_px = have_top ? AOMMIN(txwpx, xr + txwpx) : 0;
1754
63.7M
  const int n_left_px = have_left ? AOMMIN(txhpx, yd + txhpx) : 0;
1755
63.7M
  if (!use_filter_intra && !is_dr_mode) {
1756
57.3M
#if CONFIG_AV1_HIGHBITDEPTH
1757
57.3M
    if (is_hbd) {
1758
52.7M
      highbd_build_non_directional_intra_predictors(
1759
52.7M
          ref, ref_stride, dst, dst_stride, mode, tx_size, n_top_px, n_left_px,
1760
52.7M
          xd->bd);
1761
52.7M
      return;
1762
52.7M
    }
1763
4.59M
#endif  // CONFIG_AV1_HIGHBITDEPTH
1764
4.59M
    build_non_directional_intra_predictors(ref, ref_stride, dst, dst_stride,
1765
4.59M
                                           mode, tx_size, n_top_px, n_left_px);
1766
4.59M
    return;
1767
57.3M
  }
1768
1769
6.41M
  const int txw = tx_size_wide_unit[tx_size];
1770
6.41M
  const int txh = tx_size_high_unit[tx_size];
1771
6.41M
  const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1772
6.41M
  const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1773
6.41M
  const int right_available =
1774
6.41M
      mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1775
6.41M
  const int bottom_available =
1776
6.41M
      (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1777
1778
6.41M
  const PARTITION_TYPE partition = mbmi->partition;
1779
1780
6.41M
  BLOCK_SIZE bsize = mbmi->bsize;
1781
  // force 4x4 chroma component block size.
1782
6.41M
  if (ss_x || ss_y) {
1783
2.32M
    bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1784
2.32M
  }
1785
1786
6.41M
  int p_angle = 0;
1787
6.41M
  int need_top_right = extend_modes[mode] & NEED_ABOVERIGHT;
1788
6.41M
  int need_bottom_left = extend_modes[mode] & NEED_BOTTOMLEFT;
1789
1790
6.41M
  if (use_filter_intra) {
1791
883k
    need_top_right = 0;
1792
883k
    need_bottom_left = 0;
1793
883k
  }
1794
6.41M
  if (is_dr_mode) {
1795
5.58M
    p_angle = mode_to_angle_map[mode] + angle_delta;
1796
5.58M
    need_top_right = p_angle < 90;
1797
5.58M
    need_bottom_left = p_angle > 180;
1798
5.58M
  }
1799
1800
  // Possible states for have_top_right(TR) and have_bottom_left(BL)
1801
  // -1 : TR and BL are not needed
1802
  //  0 : TR and BL are needed but not available
1803
  // > 0 : TR and BL are needed and pixels are available
1804
6.41M
  const int have_top_right =
1805
6.41M
      need_top_right ? has_top_right(sb_size, bsize, mi_row, mi_col, have_top,
1806
582k
                                     right_available, partition, tx_size,
1807
582k
                                     row_off, col_off, ss_x, ss_y)
1808
6.41M
                     : -1;
1809
6.41M
  const int have_bottom_left =
1810
6.41M
      need_bottom_left ? has_bottom_left(sb_size, bsize, mi_row, mi_col,
1811
952k
                                         bottom_available, have_left, partition,
1812
952k
                                         tx_size, row_off, col_off, ss_x, ss_y)
1813
6.41M
                       : -1;
1814
1815
6.41M
  const int disable_edge_filter = !enable_intra_edge_filter;
1816
6.41M
  const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1817
6.41M
  const int n_topright_px =
1818
6.41M
      have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right;
1819
6.41M
  const int n_bottomleft_px =
1820
6.41M
      have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left;
1821
6.41M
#if CONFIG_AV1_HIGHBITDEPTH
1822
6.41M
  if (is_hbd) {
1823
3.94M
    highbd_build_directional_and_filter_intra_predictors(
1824
3.94M
        ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1825
3.94M
        tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1826
3.94M
        n_bottomleft_px, intra_edge_filter_type, xd->bd);
1827
3.94M
    return;
1828
3.94M
  }
1829
2.47M
#endif
1830
2.47M
  build_directional_and_filter_intra_predictors(
1831
2.47M
      ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1832
2.47M
      tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1833
2.47M
      n_bottomleft_px, intra_edge_filter_type);
1834
2.47M
}
1835
1836
void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1837
                                    int plane, int blk_col, int blk_row,
1838
63.1M
                                    TX_SIZE tx_size) {
1839
63.1M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1840
63.1M
  struct macroblockd_plane *const pd = &xd->plane[plane];
1841
63.1M
  const int dst_stride = pd->dst.stride;
1842
63.1M
  uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1843
63.1M
  const PREDICTION_MODE mode =
1844
63.1M
      (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1845
63.1M
  const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1846
63.1M
  const FILTER_INTRA_MODE filter_intra_mode =
1847
63.1M
      (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1848
63.1M
          ? mbmi->filter_intra_mode_info.filter_intra_mode
1849
63.1M
          : FILTER_INTRA_MODES;
1850
63.1M
  const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1851
63.1M
  const SequenceHeader *seq_params = cm->seq_params;
1852
1853
63.1M
#if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
1854
63.1M
  if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1855
#if CONFIG_DEBUG
1856
    assert(is_cfl_allowed(xd));
1857
    const BLOCK_SIZE plane_bsize =
1858
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1859
    (void)plane_bsize;
1860
    assert(plane_bsize < BLOCK_SIZES_ALL);
1861
    if (!xd->lossless[mbmi->segment_id]) {
1862
      assert(blk_col == 0);
1863
      assert(blk_row == 0);
1864
      assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1865
      assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1866
    }
1867
#endif
1868
1.30M
    CFL_CTX *const cfl = &xd->cfl;
1869
1.30M
    CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1870
1.30M
    if (!cfl->dc_pred_is_cached[pred_plane]) {
1871
1.30M
      av1_predict_intra_block(xd, seq_params->sb_size,
1872
1.30M
                              seq_params->enable_intra_edge_filter, pd->width,
1873
1.30M
                              pd->height, tx_size, mode, angle_delta,
1874
1.30M
                              use_palette, filter_intra_mode, dst, dst_stride,
1875
1.30M
                              dst, dst_stride, blk_col, blk_row, plane);
1876
1.30M
      if (cfl->use_dc_pred_cache) {
1877
0
        cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1878
0
        cfl->dc_pred_is_cached[pred_plane] = true;
1879
0
      }
1880
1.30M
    } else {
1881
0
      cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1882
0
    }
1883
1.30M
    av1_cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1884
1.30M
    return;
1885
1.30M
  }
1886
61.8M
#endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
1887
61.8M
  av1_predict_intra_block(
1888
61.8M
      xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1889
61.8M
      pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1890
61.8M
      dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1891
61.8M
}
1892
1893
5.81k
void av1_init_intra_predictors(void) {
1894
5.81k
  aom_once(init_intra_predictors_internal);
1895
5.81k
}