Coverage Report

Created: 2023-06-07 06:31

/src/aom/av1/common/reconintra.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <math.h>
13
14
#include "config/aom_config.h"
15
#include "config/aom_dsp_rtcd.h"
16
#include "config/av1_rtcd.h"
17
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_mem/aom_mem.h"
20
#include "aom_ports/aom_once.h"
21
#include "aom_ports/mem.h"
22
#include "av1/common/av1_common_int.h"
23
#include "av1/common/cfl.h"
24
#include "av1/common/reconintra.h"
25
26
enum {
27
  NEED_LEFT = 1 << 1,
28
  NEED_ABOVE = 1 << 2,
29
  NEED_ABOVERIGHT = 1 << 3,
30
  NEED_ABOVELEFT = 1 << 4,
31
  NEED_BOTTOMLEFT = 1 << 5,
32
};
33
34
#define INTRA_EDGE_FILT 3
35
0
#define INTRA_EDGE_TAPS 5
36
#define MAX_UPSAMPLE_SZ 16
37
217M
#define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
38
39
static const uint8_t extend_modes[INTRA_MODES] = {
40
  NEED_ABOVE | NEED_LEFT,                   // DC
41
  NEED_ABOVE,                               // V
42
  NEED_LEFT,                                // H
43
  NEED_ABOVE | NEED_ABOVERIGHT,             // D45
44
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
45
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
46
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
47
  NEED_LEFT | NEED_BOTTOMLEFT,              // D203
48
  NEED_ABOVE | NEED_ABOVERIGHT,             // D67
49
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH
50
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
51
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
52
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
53
};
54
55
// Tables to store if the top-right reference pixels are available. The flags
56
// are represented with bits, packed into 8-bit integers. E.g., for the 32x32
57
// blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
58
// order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
59
// i.e. (table[10 / 8] >> (10 % 8)) & 1.
60
//       . . . .
61
//       . . . .
62
//       . . o .
63
//       . . . .
64
static uint8_t has_tr_4x4[128] = {
65
  255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
66
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69
  255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73
};
74
static uint8_t has_tr_4x8[64] = {
75
  255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
76
  119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
77
  127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
78
  119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
79
  119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
80
};
81
static uint8_t has_tr_8x4[64] = {
82
  255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
83
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84
  255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86
};
87
static uint8_t has_tr_8x8[32] = {
88
  255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
89
  255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90
};
91
static uint8_t has_tr_8x16[16] = {
92
  255, 255, 119, 119, 127, 127, 119, 119,
93
  255, 127, 119, 119, 127, 127, 119, 119,
94
};
95
static uint8_t has_tr_16x8[16] = {
96
  255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
97
};
98
static uint8_t has_tr_16x16[8] = {
99
  255, 85, 119, 85, 127, 85, 119, 85,
100
};
101
static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
102
static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
103
static uint8_t has_tr_32x32[2] = { 95, 87 };
104
static uint8_t has_tr_32x64[1] = { 127 };
105
static uint8_t has_tr_64x32[1] = { 19 };
106
static uint8_t has_tr_64x64[1] = { 7 };
107
static uint8_t has_tr_64x128[1] = { 3 };
108
static uint8_t has_tr_128x64[1] = { 1 };
109
static uint8_t has_tr_128x128[1] = { 1 };
110
static uint8_t has_tr_4x16[32] = {
111
  255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
112
  127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
113
  127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
114
};
115
static uint8_t has_tr_16x4[32] = {
116
  255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
117
  127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118
};
119
static uint8_t has_tr_8x32[8] = {
120
  255, 255, 127, 127, 255, 127, 127, 127,
121
};
122
static uint8_t has_tr_32x8[8] = {
123
  15, 0, 5, 0, 7, 0, 5, 0,
124
};
125
static uint8_t has_tr_16x64[2] = { 255, 127 };
126
static uint8_t has_tr_64x16[2] = { 3, 1 };
127
128
static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
129
  // 4X4
130
  has_tr_4x4,
131
  // 4X8,       8X4,            8X8
132
  has_tr_4x8, has_tr_8x4, has_tr_8x8,
133
  // 8X16,      16X8,           16X16
134
  has_tr_8x16, has_tr_16x8, has_tr_16x16,
135
  // 16X32,     32X16,          32X32
136
  has_tr_16x32, has_tr_32x16, has_tr_32x32,
137
  // 32X64,     64X32,          64X64
138
  has_tr_32x64, has_tr_64x32, has_tr_64x64,
139
  // 64x128,    128x64,         128x128
140
  has_tr_64x128, has_tr_128x64, has_tr_128x128,
141
  // 4x16,      16x4,            8x32
142
  has_tr_4x16, has_tr_16x4, has_tr_8x32,
143
  // 32x8,      16x64,           64x16
144
  has_tr_32x8, has_tr_16x64, has_tr_64x16
145
};
146
147
static uint8_t has_tr_vert_8x8[32] = {
148
  255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
149
  255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150
};
151
static uint8_t has_tr_vert_16x16[8] = {
152
  255, 0, 119, 0, 127, 0, 119, 0,
153
};
154
static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
155
static uint8_t has_tr_vert_64x64[1] = { 3 };
156
157
// The _vert_* tables are like the ordinary tables above, but describe the
158
// order we visit square blocks when doing a PARTITION_VERT_A or
159
// PARTITION_VERT_B. This is the same order as normal except for on the last
160
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
161
// as a pair of squares, which means that these tables work correctly for both
162
// mixed vertical partition types.
163
//
164
// There are tables for each of the square sizes. Vertical rectangles (like
165
// BLOCK_16X32) use their respective "non-vert" table
166
static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
167
  // 4X4
168
  NULL,
169
  // 4X8,      8X4,         8X8
170
  has_tr_4x8, NULL, has_tr_vert_8x8,
171
  // 8X16,     16X8,        16X16
172
  has_tr_8x16, NULL, has_tr_vert_16x16,
173
  // 16X32,    32X16,       32X32
174
  has_tr_16x32, NULL, has_tr_vert_32x32,
175
  // 32X64,    64X32,       64X64
176
  has_tr_32x64, NULL, has_tr_vert_64x64,
177
  // 64x128,   128x64,      128x128
178
  has_tr_64x128, NULL, has_tr_128x128
179
};
180
181
static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
182
610k
                                       BLOCK_SIZE bsize) {
183
610k
  const uint8_t *ret = NULL;
184
  // If this is a mixed vertical partition, look up bsize in orders_vert.
185
610k
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
186
50.7k
    assert(bsize < BLOCK_SIZES);
187
0
    ret = has_tr_vert_tables[bsize];
188
560k
  } else {
189
560k
    ret = has_tr_tables[bsize];
190
560k
  }
191
0
  assert(ret);
192
0
  return ret;
193
610k
}
194
195
static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
196
                         int mi_col, int top_available, int right_available,
197
                         PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
198
1.13M
                         int col_off, int ss_x, int ss_y) {
199
1.13M
  if (!top_available || !right_available) return 0;
200
201
1.06M
  const int bw_unit = mi_size_wide[bsize];
202
1.06M
  const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
203
1.06M
  const int top_right_count_unit = tx_size_wide_unit[txsz];
204
205
1.06M
  if (row_off > 0) {  // Just need to check if enough pixels on the right.
206
184k
    if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
207
      // Special case: For 128x128 blocks, the transform unit whose
208
      // top-right corner is at the center of the block does in fact have
209
      // pixels available at its top-right corner.
210
87.4k
      if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
211
87.4k
          col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
212
11.2k
        return 1;
213
11.2k
      }
214
76.1k
      const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
215
76.1k
      const int col_off_64 = col_off % plane_bw_unit_64;
216
76.1k
      return col_off_64 + top_right_count_unit < plane_bw_unit_64;
217
87.4k
    }
218
96.9k
    return col_off + top_right_count_unit < plane_bw_unit;
219
882k
  } else {
220
    // All top-right pixels are in the block above, which is already available.
221
882k
    if (col_off + top_right_count_unit < plane_bw_unit) return 1;
222
223
832k
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
224
832k
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
225
832k
    const int sb_mi_size = mi_size_high[sb_size];
226
832k
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
227
832k
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
228
229
    // Top row of superblock: so top-right pixels are in the top and/or
230
    // top-right superblocks, both of which are already available.
231
832k
    if (blk_row_in_sb == 0) return 1;
232
233
    // Rightmost column of superblock (and not the top row): so top-right pixels
234
    // fall in the right superblock, which is not available yet.
235
737k
    if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
236
126k
      return 0;
237
126k
    }
238
239
    // General case (neither top row nor rightmost column): check if the
240
    // top-right block is coded before the current block.
241
610k
    const int this_blk_index =
242
610k
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
243
610k
        blk_col_in_sb + 0;
244
610k
    const int idx1 = this_blk_index / 8;
245
610k
    const int idx2 = this_blk_index % 8;
246
610k
    const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
247
610k
    return (has_tr_table[idx1] >> idx2) & 1;
248
737k
  }
249
1.06M
}
250
251
// Similar to the has_tr_* tables, but store if the bottom-left reference
252
// pixels are available.
253
static uint8_t has_bl_4x4[128] = {
254
  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
255
  85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
256
  17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
257
  85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
258
  0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
259
  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
260
  85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
261
};
262
static uint8_t has_bl_4x8[64] = {
263
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
264
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
265
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
266
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
267
};
268
static uint8_t has_bl_8x4[64] = {
269
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
270
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
271
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
272
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
273
};
274
static uint8_t has_bl_8x8[32] = {
275
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
276
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277
};
278
static uint8_t has_bl_8x16[16] = {
279
  16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
280
};
281
static uint8_t has_bl_16x8[16] = {
282
  254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
283
};
284
static uint8_t has_bl_16x16[8] = {
285
  84, 16, 84, 0, 84, 16, 84, 0,
286
};
287
static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
288
static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
289
static uint8_t has_bl_32x32[2] = { 4, 4 };
290
static uint8_t has_bl_32x64[1] = { 0 };
291
static uint8_t has_bl_64x32[1] = { 34 };
292
static uint8_t has_bl_64x64[1] = { 0 };
293
static uint8_t has_bl_64x128[1] = { 0 };
294
static uint8_t has_bl_128x64[1] = { 0 };
295
static uint8_t has_bl_128x128[1] = { 0 };
296
static uint8_t has_bl_4x16[32] = {
297
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
298
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299
};
300
static uint8_t has_bl_16x4[32] = {
301
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
302
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303
};
304
static uint8_t has_bl_8x32[8] = {
305
  0, 1, 0, 0, 0, 1, 0, 0,
306
};
307
static uint8_t has_bl_32x8[8] = {
308
  238, 78, 238, 14, 238, 78, 238, 14,
309
};
310
static uint8_t has_bl_16x64[2] = { 0, 0 };
311
static uint8_t has_bl_64x16[2] = { 42, 42 };
312
313
static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
314
  // 4X4
315
  has_bl_4x4,
316
  // 4X8,         8X4,         8X8
317
  has_bl_4x8, has_bl_8x4, has_bl_8x8,
318
  // 8X16,        16X8,        16X16
319
  has_bl_8x16, has_bl_16x8, has_bl_16x16,
320
  // 16X32,       32X16,       32X32
321
  has_bl_16x32, has_bl_32x16, has_bl_32x32,
322
  // 32X64,       64X32,       64X64
323
  has_bl_32x64, has_bl_64x32, has_bl_64x64,
324
  // 64x128,      128x64,      128x128
325
  has_bl_64x128, has_bl_128x64, has_bl_128x128,
326
  // 4x16,        16x4,        8x32
327
  has_bl_4x16, has_bl_16x4, has_bl_8x32,
328
  // 32x8,        16x64,       64x16
329
  has_bl_32x8, has_bl_16x64, has_bl_64x16
330
};
331
332
static uint8_t has_bl_vert_8x8[32] = {
333
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
334
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335
};
336
static uint8_t has_bl_vert_16x16[8] = {
337
  254, 16, 254, 0, 254, 16, 254, 0,
338
};
339
static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
340
static uint8_t has_bl_vert_64x64[1] = { 2 };
341
342
// The _vert_* tables are like the ordinary tables above, but describe the
343
// order we visit square blocks when doing a PARTITION_VERT_A or
344
// PARTITION_VERT_B. This is the same order as normal except for on the last
345
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
346
// as a pair of squares, which means that these tables work correctly for both
347
// mixed vertical partition types.
348
//
349
// There are tables for each of the square sizes. Vertical rectangles (like
350
// BLOCK_16X32) use their respective "non-vert" table
351
static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
352
  // 4X4
353
  NULL,
354
  // 4X8,     8X4,         8X8
355
  has_bl_4x8, NULL, has_bl_vert_8x8,
356
  // 8X16,    16X8,        16X16
357
  has_bl_8x16, NULL, has_bl_vert_16x16,
358
  // 16X32,   32X16,       32X32
359
  has_bl_16x32, NULL, has_bl_vert_32x32,
360
  // 32X64,   64X32,       64X64
361
  has_bl_32x64, NULL, has_bl_vert_64x64,
362
  // 64x128,  128x64,      128x128
363
  has_bl_64x128, NULL, has_bl_128x128
364
};
365
366
static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
367
946k
                                       BLOCK_SIZE bsize) {
368
946k
  const uint8_t *ret = NULL;
369
  // If this is a mixed vertical partition, look up bsize in orders_vert.
370
946k
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
371
75.4k
    assert(bsize < BLOCK_SIZES);
372
0
    ret = has_bl_vert_tables[bsize];
373
870k
  } else {
374
870k
    ret = has_bl_tables[bsize];
375
870k
  }
376
0
  assert(ret);
377
0
  return ret;
378
946k
}
379
380
static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
381
                           int mi_col, int bottom_available, int left_available,
382
                           PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
383
1.83M
                           int col_off, int ss_x, int ss_y) {
384
1.83M
  if (!bottom_available || !left_available) return 0;
385
386
  // Special case for 128x* blocks, when col_off is half the block width.
387
  // This is needed because 128x* superblocks are divided into 64x* blocks in
388
  // raster order
389
1.70M
  if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
390
159k
    const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
391
159k
    const int col_off_64 = col_off % plane_bw_unit_64;
392
159k
    if (col_off_64 == 0) {
393
      // We are at the left edge of top-right or bottom-right 64x* block.
394
54.2k
      const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
395
54.2k
      const int row_off_64 = row_off % plane_bh_unit_64;
396
54.2k
      const int plane_bh_unit =
397
54.2k
          AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
398
      // Check if all bottom-left pixels are in the left 64x* block (which is
399
      // already coded).
400
54.2k
      return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
401
54.2k
    }
402
159k
  }
403
404
1.64M
  if (col_off > 0) {
405
    // Bottom-left pixels are in the bottom-left block, which is not available.
406
273k
    return 0;
407
1.37M
  } else {
408
1.37M
    const int bh_unit = mi_size_high[bsize];
409
1.37M
    const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
410
1.37M
    const int bottom_left_count_unit = tx_size_high_unit[txsz];
411
412
    // All bottom-left pixels are in the left block, which is already available.
413
1.37M
    if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
414
415
1.30M
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
416
1.30M
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
417
1.30M
    const int sb_mi_size = mi_size_high[sb_size];
418
1.30M
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
419
1.30M
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
420
421
    // Leftmost column of superblock: so bottom-left pixels maybe in the left
422
    // and/or bottom-left superblocks. But only the left superblock is
423
    // available, so check if all required pixels fall in that superblock.
424
1.30M
    if (blk_col_in_sb == 0) {
425
208k
      const int blk_start_row_off =
426
208k
          blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
427
208k
          ss_y;
428
208k
      const int row_off_in_sb = blk_start_row_off + row_off;
429
208k
      const int sb_height_unit = sb_mi_size >> ss_y;
430
208k
      return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
431
208k
    }
432
433
    // Bottom row of superblock (and not the leftmost column): so bottom-left
434
    // pixels fall in the bottom superblock, which is not available yet.
435
1.09M
    if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
436
437
    // General case (neither leftmost column nor bottom row): check if the
438
    // bottom-left block is coded before the current block.
439
946k
    const int this_blk_index =
440
946k
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
441
946k
        blk_col_in_sb + 0;
442
946k
    const int idx1 = this_blk_index / 8;
443
946k
    const int idx2 = this_blk_index % 8;
444
946k
    const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
445
946k
    return (has_bl_table[idx1] >> idx2) & 1;
446
1.09M
  }
447
1.64M
}
448
449
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
450
                              const uint8_t *above, const uint8_t *left);
451
452
static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
453
static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
454
455
#if CONFIG_AV1_HIGHBITDEPTH
456
typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
457
                                   const uint16_t *above, const uint16_t *left,
458
                                   int bd);
459
static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
460
static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
461
#endif
462
463
1
static void init_intra_predictors_internal(void) {
464
1
  assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
465
466
0
#define INIT_RECTANGULAR(p, type)             \
467
20
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
468
20
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
469
20
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
470
20
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
471
20
  p[TX_16X32] = aom_##type##_predictor_16x32; \
472
20
  p[TX_32X16] = aom_##type##_predictor_32x16; \
473
20
  p[TX_32X64] = aom_##type##_predictor_32x64; \
474
20
  p[TX_64X32] = aom_##type##_predictor_64x32; \
475
20
  p[TX_4X16] = aom_##type##_predictor_4x16;   \
476
20
  p[TX_16X4] = aom_##type##_predictor_16x4;   \
477
20
  p[TX_8X32] = aom_##type##_predictor_8x32;   \
478
20
  p[TX_32X8] = aom_##type##_predictor_32x8;   \
479
20
  p[TX_16X64] = aom_##type##_predictor_16x64; \
480
20
  p[TX_64X16] = aom_##type##_predictor_64x16;
481
482
0
#define INIT_NO_4X4(p, type)                  \
483
20
  p[TX_8X8] = aom_##type##_predictor_8x8;     \
484
20
  p[TX_16X16] = aom_##type##_predictor_16x16; \
485
20
  p[TX_32X32] = aom_##type##_predictor_32x32; \
486
20
  p[TX_64X64] = aom_##type##_predictor_64x64; \
487
20
  INIT_RECTANGULAR(p, type)
488
489
0
#define INIT_ALL_SIZES(p, type)           \
490
20
  p[TX_4X4] = aom_##type##_predictor_4x4; \
491
20
  INIT_NO_4X4(p, type)
492
493
1
  INIT_ALL_SIZES(pred[V_PRED], v)
494
1
  INIT_ALL_SIZES(pred[H_PRED], h)
495
1
  INIT_ALL_SIZES(pred[PAETH_PRED], paeth)
496
1
  INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth)
497
1
  INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v)
498
1
  INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h)
499
1
  INIT_ALL_SIZES(dc_pred[0][0], dc_128)
500
1
  INIT_ALL_SIZES(dc_pred[0][1], dc_top)
501
1
  INIT_ALL_SIZES(dc_pred[1][0], dc_left)
502
1
  INIT_ALL_SIZES(dc_pred[1][1], dc)
503
1
#if CONFIG_AV1_HIGHBITDEPTH
504
1
  INIT_ALL_SIZES(pred_high[V_PRED], highbd_v)
505
1
  INIT_ALL_SIZES(pred_high[H_PRED], highbd_h)
506
1
  INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth)
507
1
  INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth)
508
1
  INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v)
509
1
  INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h)
510
1
  INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128)
511
1
  INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top)
512
1
  INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left)
513
1
  INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc)
514
1
#endif
515
1
#undef intra_pred_allsizes
516
1
}
517
518
// Directional prediction, zone 1: 0 < angle < 90
519
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
520
                            const uint8_t *above, const uint8_t *left,
521
0
                            int upsample_above, int dx, int dy) {
522
0
  int r, c, x, base, shift, val;
523
524
0
  (void)left;
525
0
  (void)dy;
526
0
  assert(dy == 1);
527
0
  assert(dx > 0);
528
529
0
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
530
0
  const int frac_bits = 6 - upsample_above;
531
0
  const int base_inc = 1 << upsample_above;
532
0
  x = dx;
533
0
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
534
0
    base = x >> frac_bits;
535
0
    shift = ((x << upsample_above) & 0x3F) >> 1;
536
537
0
    if (base >= max_base_x) {
538
0
      for (int i = r; i < bh; ++i) {
539
0
        memset(dst, above[max_base_x], bw * sizeof(dst[0]));
540
0
        dst += stride;
541
0
      }
542
0
      return;
543
0
    }
544
545
0
    for (c = 0; c < bw; ++c, base += base_inc) {
546
0
      if (base < max_base_x) {
547
0
        val = above[base] * (32 - shift) + above[base + 1] * shift;
548
0
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
549
0
      } else {
550
0
        dst[c] = above[max_base_x];
551
0
      }
552
0
    }
553
0
  }
554
0
}
555
556
// Directional prediction, zone 2: 90 < angle < 180
557
void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
558
                            const uint8_t *above, const uint8_t *left,
559
                            int upsample_above, int upsample_left, int dx,
560
0
                            int dy) {
561
0
  assert(dx > 0);
562
0
  assert(dy > 0);
563
564
0
  const int min_base_x = -(1 << upsample_above);
565
0
  const int min_base_y = -(1 << upsample_left);
566
0
  (void)min_base_y;
567
0
  const int frac_bits_x = 6 - upsample_above;
568
0
  const int frac_bits_y = 6 - upsample_left;
569
570
0
  for (int r = 0; r < bh; ++r) {
571
0
    for (int c = 0; c < bw; ++c) {
572
0
      int val;
573
0
      int y = r + 1;
574
0
      int x = (c << 6) - y * dx;
575
0
      const int base_x = x >> frac_bits_x;
576
0
      if (base_x >= min_base_x) {
577
0
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
578
0
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
579
0
        val = ROUND_POWER_OF_TWO(val, 5);
580
0
      } else {
581
0
        x = c + 1;
582
0
        y = (r << 6) - x * dy;
583
0
        const int base_y = y >> frac_bits_y;
584
0
        assert(base_y >= min_base_y);
585
0
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
586
0
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
587
0
        val = ROUND_POWER_OF_TWO(val, 5);
588
0
      }
589
0
      dst[c] = val;
590
0
    }
591
0
    dst += stride;
592
0
  }
593
0
}
594
595
// Directional prediction, zone 3: 180 < angle < 270
596
void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
597
                            const uint8_t *above, const uint8_t *left,
598
0
                            int upsample_left, int dx, int dy) {
599
0
  int r, c, y, base, shift, val;
600
601
0
  (void)above;
602
0
  (void)dx;
603
604
0
  assert(dx == 1);
605
0
  assert(dy > 0);
606
607
0
  const int max_base_y = (bw + bh - 1) << upsample_left;
608
0
  const int frac_bits = 6 - upsample_left;
609
0
  const int base_inc = 1 << upsample_left;
610
0
  y = dy;
611
0
  for (c = 0; c < bw; ++c, y += dy) {
612
0
    base = y >> frac_bits;
613
0
    shift = ((y << upsample_left) & 0x3F) >> 1;
614
615
0
    for (r = 0; r < bh; ++r, base += base_inc) {
616
0
      if (base < max_base_y) {
617
0
        val = left[base] * (32 - shift) + left[base + 1] * shift;
618
0
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
619
0
      } else {
620
0
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
621
0
        break;
622
0
      }
623
0
    }
624
0
  }
625
0
}
626
627
static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
628
                         const uint8_t *above, const uint8_t *left,
629
5.10M
                         int upsample_above, int upsample_left, int angle) {
630
5.10M
  const int dx = av1_get_dx(angle);
631
5.10M
  const int dy = av1_get_dy(angle);
632
5.10M
  const int bw = tx_size_wide[tx_size];
633
5.10M
  const int bh = tx_size_high[tx_size];
634
5.10M
  assert(angle > 0 && angle < 270);
635
636
5.10M
  if (angle > 0 && angle < 90) {
637
458k
    av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
638
458k
                         dy);
639
4.64M
  } else if (angle > 90 && angle < 180) {
640
1.00M
    av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
641
1.00M
                         upsample_left, dx, dy);
642
3.64M
  } else if (angle > 180 && angle < 270) {
643
869k
    av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
644
869k
                         dy);
645
2.77M
  } else if (angle == 90) {
646
575k
    pred[V_PRED][tx_size](dst, stride, above, left);
647
2.20M
  } else if (angle == 180) {
648
2.20M
    pred[H_PRED][tx_size](dst, stride, above, left);
649
2.20M
  }
650
5.10M
}
651
652
#if CONFIG_AV1_HIGHBITDEPTH
653
// Directional prediction, zone 1: 0 < angle < 90
654
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
655
                                   int bh, const uint16_t *above,
656
                                   const uint16_t *left, int upsample_above,
657
0
                                   int dx, int dy, int bd) {
658
0
  int r, c, x, base, shift, val;
659
660
0
  (void)left;
661
0
  (void)dy;
662
0
  (void)bd;
663
0
  assert(dy == 1);
664
0
  assert(dx > 0);
665
666
0
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
667
0
  const int frac_bits = 6 - upsample_above;
668
0
  const int base_inc = 1 << upsample_above;
669
0
  x = dx;
670
0
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
671
0
    base = x >> frac_bits;
672
0
    shift = ((x << upsample_above) & 0x3F) >> 1;
673
674
0
    if (base >= max_base_x) {
675
0
      for (int i = r; i < bh; ++i) {
676
0
        aom_memset16(dst, above[max_base_x], bw);
677
0
        dst += stride;
678
0
      }
679
0
      return;
680
0
    }
681
682
0
    for (c = 0; c < bw; ++c, base += base_inc) {
683
0
      if (base < max_base_x) {
684
0
        val = above[base] * (32 - shift) + above[base + 1] * shift;
685
0
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
686
0
      } else {
687
0
        dst[c] = above[max_base_x];
688
0
      }
689
0
    }
690
0
  }
691
0
}
692
693
// Directional prediction, zone 2: 90 < angle < 180
694
void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
695
                                   int bh, const uint16_t *above,
696
                                   const uint16_t *left, int upsample_above,
697
0
                                   int upsample_left, int dx, int dy, int bd) {
698
0
  (void)bd;
699
0
  assert(dx > 0);
700
0
  assert(dy > 0);
701
702
0
  const int min_base_x = -(1 << upsample_above);
703
0
  const int min_base_y = -(1 << upsample_left);
704
0
  (void)min_base_y;
705
0
  const int frac_bits_x = 6 - upsample_above;
706
0
  const int frac_bits_y = 6 - upsample_left;
707
708
0
  for (int r = 0; r < bh; ++r) {
709
0
    for (int c = 0; c < bw; ++c) {
710
0
      int val;
711
0
      int y = r + 1;
712
0
      int x = (c << 6) - y * dx;
713
0
      const int base_x = x >> frac_bits_x;
714
0
      if (base_x >= min_base_x) {
715
0
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
716
0
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
717
0
        val = ROUND_POWER_OF_TWO(val, 5);
718
0
      } else {
719
0
        x = c + 1;
720
0
        y = (r << 6) - x * dy;
721
0
        const int base_y = y >> frac_bits_y;
722
0
        assert(base_y >= min_base_y);
723
0
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
724
0
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
725
0
        val = ROUND_POWER_OF_TWO(val, 5);
726
0
      }
727
0
      dst[c] = val;
728
0
    }
729
0
    dst += stride;
730
0
  }
731
0
}
732
733
// Directional prediction, zone 3: 180 < angle < 270
734
void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
735
                                   int bh, const uint16_t *above,
736
                                   const uint16_t *left, int upsample_left,
737
0
                                   int dx, int dy, int bd) {
738
0
  int r, c, y, base, shift, val;
739
740
0
  (void)above;
741
0
  (void)dx;
742
0
  (void)bd;
743
0
  assert(dx == 1);
744
0
  assert(dy > 0);
745
746
0
  const int max_base_y = (bw + bh - 1) << upsample_left;
747
0
  const int frac_bits = 6 - upsample_left;
748
0
  const int base_inc = 1 << upsample_left;
749
0
  y = dy;
750
0
  for (c = 0; c < bw; ++c, y += dy) {
751
0
    base = y >> frac_bits;
752
0
    shift = ((y << upsample_left) & 0x3F) >> 1;
753
754
0
    for (r = 0; r < bh; ++r, base += base_inc) {
755
0
      if (base < max_base_y) {
756
0
        val = left[base] * (32 - shift) + left[base + 1] * shift;
757
0
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
758
0
      } else {
759
0
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
760
0
        break;
761
0
      }
762
0
    }
763
0
  }
764
0
}
765
766
static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
767
                                TX_SIZE tx_size, const uint16_t *above,
768
                                const uint16_t *left, int upsample_above,
769
4.48M
                                int upsample_left, int angle, int bd) {
770
4.48M
  const int dx = av1_get_dx(angle);
771
4.48M
  const int dy = av1_get_dy(angle);
772
4.48M
  const int bw = tx_size_wide[tx_size];
773
4.48M
  const int bh = tx_size_high[tx_size];
774
4.48M
  assert(angle > 0 && angle < 270);
775
776
4.48M
  if (angle > 0 && angle < 90) {
777
628k
    av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
778
628k
                                upsample_above, dx, dy, bd);
779
3.85M
  } else if (angle > 90 && angle < 180) {
780
1.35M
    av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
781
1.35M
                                upsample_above, upsample_left, dx, dy, bd);
782
2.49M
  } else if (angle > 180 && angle < 270) {
783
854k
    av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
784
854k
                                dx, dy, bd);
785
1.64M
  } else if (angle == 90) {
786
422k
    pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
787
1.22M
  } else if (angle == 180) {
788
1.22M
    pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
789
1.22M
  }
790
4.48M
}
791
#endif  // CONFIG_AV1_HIGHBITDEPTH
792
793
DECLARE_ALIGNED(16, const int8_t,
794
                av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
795
  {
796
      { -6, 10, 0, 0, 0, 12, 0, 0 },
797
      { -5, 2, 10, 0, 0, 9, 0, 0 },
798
      { -3, 1, 1, 10, 0, 7, 0, 0 },
799
      { -3, 1, 1, 2, 10, 5, 0, 0 },
800
      { -4, 6, 0, 0, 0, 2, 12, 0 },
801
      { -3, 2, 6, 0, 0, 2, 9, 0 },
802
      { -3, 2, 2, 6, 0, 2, 7, 0 },
803
      { -3, 1, 2, 2, 6, 3, 5, 0 },
804
  },
805
  {
806
      { -10, 16, 0, 0, 0, 10, 0, 0 },
807
      { -6, 0, 16, 0, 0, 6, 0, 0 },
808
      { -4, 0, 0, 16, 0, 4, 0, 0 },
809
      { -2, 0, 0, 0, 16, 2, 0, 0 },
810
      { -10, 16, 0, 0, 0, 0, 10, 0 },
811
      { -6, 0, 16, 0, 0, 0, 6, 0 },
812
      { -4, 0, 0, 16, 0, 0, 4, 0 },
813
      { -2, 0, 0, 0, 16, 0, 2, 0 },
814
  },
815
  {
816
      { -8, 8, 0, 0, 0, 16, 0, 0 },
817
      { -8, 0, 8, 0, 0, 16, 0, 0 },
818
      { -8, 0, 0, 8, 0, 16, 0, 0 },
819
      { -8, 0, 0, 0, 8, 16, 0, 0 },
820
      { -4, 4, 0, 0, 0, 0, 16, 0 },
821
      { -4, 0, 4, 0, 0, 0, 16, 0 },
822
      { -4, 0, 0, 4, 0, 0, 16, 0 },
823
      { -4, 0, 0, 0, 4, 0, 16, 0 },
824
  },
825
  {
826
      { -2, 8, 0, 0, 0, 10, 0, 0 },
827
      { -1, 3, 8, 0, 0, 6, 0, 0 },
828
      { -1, 2, 3, 8, 0, 4, 0, 0 },
829
      { 0, 1, 2, 3, 8, 2, 0, 0 },
830
      { -1, 4, 0, 0, 0, 3, 10, 0 },
831
      { -1, 3, 4, 0, 0, 4, 6, 0 },
832
      { -1, 2, 3, 4, 0, 4, 4, 0 },
833
      { -1, 2, 2, 3, 4, 3, 3, 0 },
834
  },
835
  {
836
      { -12, 14, 0, 0, 0, 14, 0, 0 },
837
      { -10, 0, 14, 0, 0, 12, 0, 0 },
838
      { -9, 0, 0, 14, 0, 11, 0, 0 },
839
      { -8, 0, 0, 0, 14, 10, 0, 0 },
840
      { -10, 12, 0, 0, 0, 0, 14, 0 },
841
      { -9, 1, 12, 0, 0, 0, 12, 0 },
842
      { -8, 0, 0, 12, 0, 1, 11, 0 },
843
      { -7, 0, 0, 1, 12, 1, 9, 0 },
844
  },
845
};
846
847
void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
848
                                  TX_SIZE tx_size, const uint8_t *above,
849
0
                                  const uint8_t *left, int mode) {
850
0
  int r, c;
851
0
  uint8_t buffer[33][33];
852
0
  const int bw = tx_size_wide[tx_size];
853
0
  const int bh = tx_size_high[tx_size];
854
855
0
  assert(bw <= 32 && bh <= 32);
856
857
0
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
858
0
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
859
860
0
  for (r = 1; r < bh + 1; r += 2)
861
0
    for (c = 1; c < bw + 1; c += 4) {
862
0
      const uint8_t p0 = buffer[r - 1][c - 1];
863
0
      const uint8_t p1 = buffer[r - 1][c];
864
0
      const uint8_t p2 = buffer[r - 1][c + 1];
865
0
      const uint8_t p3 = buffer[r - 1][c + 2];
866
0
      const uint8_t p4 = buffer[r - 1][c + 3];
867
0
      const uint8_t p5 = buffer[r][c - 1];
868
0
      const uint8_t p6 = buffer[r + 1][c - 1];
869
0
      for (int k = 0; k < 8; ++k) {
870
0
        int r_offset = k >> 2;
871
0
        int c_offset = k & 0x03;
872
0
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
873
0
                 av1_filter_intra_taps[mode][k][1] * p1 +
874
0
                 av1_filter_intra_taps[mode][k][2] * p2 +
875
0
                 av1_filter_intra_taps[mode][k][3] * p3 +
876
0
                 av1_filter_intra_taps[mode][k][4] * p4 +
877
0
                 av1_filter_intra_taps[mode][k][5] * p5 +
878
0
                 av1_filter_intra_taps[mode][k][6] * p6;
879
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
880
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
881
        // Since Clip1() clips a negative value to 0, it is safe to replace
882
        // Round2Signed() with Round2().
883
0
        buffer[r + r_offset][c + c_offset] =
884
0
            clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
885
0
      }
886
0
    }
887
888
0
  for (r = 0; r < bh; ++r) {
889
0
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
890
0
    dst += stride;
891
0
  }
892
0
}
893
894
#if CONFIG_AV1_HIGHBITDEPTH
895
static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
896
                                          TX_SIZE tx_size,
897
                                          const uint16_t *above,
898
                                          const uint16_t *left, int mode,
899
443k
                                          int bd) {
900
443k
  int r, c;
901
443k
  uint16_t buffer[33][33];
902
443k
  const int bw = tx_size_wide[tx_size];
903
443k
  const int bh = tx_size_high[tx_size];
904
905
443k
  assert(bw <= 32 && bh <= 32);
906
907
4.78M
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
908
443k
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
909
910
2.60M
  for (r = 1; r < bh + 1; r += 2)
911
9.79M
    for (c = 1; c < bw + 1; c += 4) {
912
7.63M
      const uint16_t p0 = buffer[r - 1][c - 1];
913
7.63M
      const uint16_t p1 = buffer[r - 1][c];
914
7.63M
      const uint16_t p2 = buffer[r - 1][c + 1];
915
7.63M
      const uint16_t p3 = buffer[r - 1][c + 2];
916
7.63M
      const uint16_t p4 = buffer[r - 1][c + 3];
917
7.63M
      const uint16_t p5 = buffer[r][c - 1];
918
7.63M
      const uint16_t p6 = buffer[r + 1][c - 1];
919
68.7M
      for (int k = 0; k < 8; ++k) {
920
61.0M
        int r_offset = k >> 2;
921
61.0M
        int c_offset = k & 0x03;
922
61.0M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
923
61.0M
                 av1_filter_intra_taps[mode][k][1] * p1 +
924
61.0M
                 av1_filter_intra_taps[mode][k][2] * p2 +
925
61.0M
                 av1_filter_intra_taps[mode][k][3] * p3 +
926
61.0M
                 av1_filter_intra_taps[mode][k][4] * p4 +
927
61.0M
                 av1_filter_intra_taps[mode][k][5] * p5 +
928
61.0M
                 av1_filter_intra_taps[mode][k][6] * p6;
929
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
930
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
931
        // Since Clip1() clips a negative value to 0, it is safe to replace
932
        // Round2Signed() with Round2().
933
61.0M
        buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
934
61.0M
            ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
935
61.0M
      }
936
7.63M
    }
937
938
4.78M
  for (r = 0; r < bh; ++r) {
939
4.34M
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
940
4.34M
    dst += stride;
941
4.34M
  }
942
443k
}
943
#endif  // CONFIG_AV1_HIGHBITDEPTH
944
945
167M
static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
946
167M
  if (plane == 0) {
947
57.9M
    const PREDICTION_MODE mode = mbmi->mode;
948
57.9M
    return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
949
57.9M
            mode == SMOOTH_H_PRED);
950
109M
  } else {
951
    // uv_mode is not set for inter blocks, so need to explicitly
952
    // detect that case.
953
109M
    if (is_inter_block(mbmi)) return 0;
954
955
106M
    const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
956
106M
    return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
957
106M
            uv_mode == UV_SMOOTH_H_PRED);
958
109M
  }
959
167M
}
960
961
108M
static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
962
108M
  int ab_sm, le_sm;
963
964
108M
  if (plane == 0) {
965
37.7M
    const MB_MODE_INFO *ab = xd->above_mbmi;
966
37.7M
    const MB_MODE_INFO *le = xd->left_mbmi;
967
37.7M
    ab_sm = ab ? is_smooth(ab, plane) : 0;
968
37.7M
    le_sm = le ? is_smooth(le, plane) : 0;
969
71.0M
  } else {
970
71.0M
    const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
971
71.0M
    const MB_MODE_INFO *le = xd->chroma_left_mbmi;
972
71.0M
    ab_sm = ab ? is_smooth(ab, plane) : 0;
973
71.0M
    le_sm = le ? is_smooth(le, plane) : 0;
974
71.0M
  }
975
976
108M
  return (ab_sm || le_sm) ? 1 : 0;
977
108M
}
978
979
5.82M
static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
980
5.82M
  const int d = abs(delta);
981
5.82M
  int strength = 0;
982
983
5.82M
  const int blk_wh = bs0 + bs1;
984
5.82M
  if (type == 0) {
985
4.54M
    if (blk_wh <= 8) {
986
925k
      if (d >= 56) strength = 1;
987
3.61M
    } else if (blk_wh <= 12) {
988
436k
      if (d >= 40) strength = 1;
989
3.17M
    } else if (blk_wh <= 16) {
990
666k
      if (d >= 40) strength = 1;
991
2.51M
    } else if (blk_wh <= 24) {
992
917k
      if (d >= 8) strength = 1;
993
917k
      if (d >= 16) strength = 2;
994
917k
      if (d >= 32) strength = 3;
995
1.59M
    } else if (blk_wh <= 32) {
996
504k
      if (d >= 1) strength = 1;
997
504k
      if (d >= 4) strength = 2;
998
504k
      if (d >= 32) strength = 3;
999
1.09M
    } else {
1000
1.09M
      if (d >= 1) strength = 3;
1001
1.09M
    }
1002
4.54M
  } else {
1003
1.28M
    if (blk_wh <= 8) {
1004
230k
      if (d >= 40) strength = 1;
1005
230k
      if (d >= 64) strength = 2;
1006
1.05M
    } else if (blk_wh <= 16) {
1007
313k
      if (d >= 20) strength = 1;
1008
313k
      if (d >= 48) strength = 2;
1009
743k
    } else if (blk_wh <= 24) {
1010
287k
      if (d >= 4) strength = 3;
1011
456k
    } else {
1012
456k
      if (d >= 1) strength = 3;
1013
456k
    }
1014
1.28M
  }
1015
5.82M
  return strength;
1016
5.82M
}
1017
1018
0
void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1019
0
  if (!strength) return;
1020
1021
0
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1022
0
                                                         { 0, 5, 6, 5, 0 },
1023
0
                                                         { 2, 4, 4, 4, 2 } };
1024
0
  const int filt = strength - 1;
1025
0
  uint8_t edge[129];
1026
1027
0
  memcpy(edge, p, sz * sizeof(*p));
1028
0
  for (int i = 1; i < sz; i++) {
1029
0
    int s = 0;
1030
0
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1031
0
      int k = i - 2 + j;
1032
0
      k = (k < 0) ? 0 : k;
1033
0
      k = (k > sz - 1) ? sz - 1 : k;
1034
0
      s += edge[k] * kernel[filt][j];
1035
0
    }
1036
0
    s = (s + 8) >> 4;
1037
0
    p[i] = s;
1038
0
  }
1039
0
}
1040
1041
430k
static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1042
430k
  const int kernel[3] = { 5, 6, 5 };
1043
1044
430k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1045
430k
          (p_above[0] * kernel[2]);
1046
430k
  s = (s + 8) >> 4;
1047
430k
  p_above[-1] = s;
1048
430k
  p_left[-1] = s;
1049
430k
}
1050
1051
0
void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
1052
0
  if (!strength) return;
1053
1054
0
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1055
0
                                                         { 0, 5, 6, 5, 0 },
1056
0
                                                         { 2, 4, 4, 4, 2 } };
1057
0
  const int filt = strength - 1;
1058
0
  uint16_t edge[129];
1059
1060
0
  memcpy(edge, p, sz * sizeof(*p));
1061
0
  for (int i = 1; i < sz; i++) {
1062
0
    int s = 0;
1063
0
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1064
0
      int k = i - 2 + j;
1065
0
      k = (k < 0) ? 0 : k;
1066
0
      k = (k > sz - 1) ? sz - 1 : k;
1067
0
      s += edge[k] * kernel[filt][j];
1068
0
    }
1069
0
    s = (s + 8) >> 4;
1070
0
    p[i] = s;
1071
0
  }
1072
0
}
1073
1074
#if CONFIG_AV1_HIGHBITDEPTH
1075
372k
static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
1076
372k
  const int kernel[3] = { 5, 6, 5 };
1077
1078
372k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1079
372k
          (p_above[0] * kernel[2]);
1080
372k
  s = (s + 8) >> 4;
1081
372k
  p_above[-1] = s;
1082
372k
  p_left[-1] = s;
1083
372k
}
1084
#endif
1085
1086
0
void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1087
  // interpolate half-sample positions
1088
0
  assert(sz <= MAX_UPSAMPLE_SZ);
1089
1090
0
  uint8_t in[MAX_UPSAMPLE_SZ + 3];
1091
  // copy p[-1..(sz-1)] and extend first and last samples
1092
0
  in[0] = p[-1];
1093
0
  in[1] = p[-1];
1094
0
  for (int i = 0; i < sz; i++) {
1095
0
    in[i + 2] = p[i];
1096
0
  }
1097
0
  in[sz + 2] = p[sz - 1];
1098
1099
  // interpolate half-sample edge positions
1100
0
  p[-2] = in[0];
1101
0
  for (int i = 0; i < sz; i++) {
1102
0
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1103
0
    s = clip_pixel((s + 8) >> 4);
1104
0
    p[2 * i - 1] = s;
1105
0
    p[2 * i] = in[i + 2];
1106
0
  }
1107
0
}
1108
1109
0
void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
1110
  // interpolate half-sample positions
1111
0
  assert(sz <= MAX_UPSAMPLE_SZ);
1112
1113
0
  uint16_t in[MAX_UPSAMPLE_SZ + 3];
1114
  // copy p[-1..(sz-1)] and extend first and last samples
1115
0
  in[0] = p[-1];
1116
0
  in[1] = p[-1];
1117
0
  for (int i = 0; i < sz; i++) {
1118
0
    in[i + 2] = p[i];
1119
0
  }
1120
0
  in[sz + 2] = p[sz - 1];
1121
1122
  // interpolate half-sample edge positions
1123
0
  p[-2] = in[0];
1124
0
  for (int i = 0; i < sz; i++) {
1125
0
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1126
0
    s = (s + 8) >> 4;
1127
0
    s = clip_pixel_highbd(s, bd);
1128
0
    p[2 * i - 1] = s;
1129
0
    p[2 * i] = in[i + 2];
1130
0
  }
1131
0
}
1132
#if CONFIG_AV1_HIGHBITDEPTH
1133
static void build_intra_predictors_high(
1134
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1135
    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1136
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1137
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1138
88.6M
    int bit_depth) {
1139
88.6M
  int i;
1140
88.6M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1141
88.6M
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1142
88.6M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1143
88.6M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1144
88.6M
  uint16_t *const above_row = above_data + 16;
1145
88.6M
  uint16_t *const left_col = left_data + 16;
1146
88.6M
  const int txwpx = tx_size_wide[tx_size];
1147
88.6M
  const int txhpx = tx_size_high[tx_size];
1148
88.6M
  int need_left = extend_modes[mode] & NEED_LEFT;
1149
88.6M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1150
88.6M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1151
88.6M
  const uint16_t *above_ref = ref - ref_stride;
1152
88.6M
  const uint16_t *left_ref = ref - 1;
1153
88.6M
  const int is_dr_mode = av1_is_directional_mode(mode);
1154
88.6M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1155
88.6M
  int base = 128 << (bit_depth - 8);
1156
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1157
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1158
  // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1159
  // seen to be the potential reason for this issue.
1160
88.6M
  aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1161
88.6M
  aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1162
1163
  // The default values if ref pixels are not available:
1164
  // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1165
  // base+1   A      B  ..     Y      Z
1166
  // base+1   C      D  ..     W      X
1167
  // base+1   E      F  ..     U      V
1168
  // base+1   G      H  ..     S      T      T      T      T      T
1169
1170
88.6M
  if (is_dr_mode) {
1171
4.64M
    if (p_angle <= 90)
1172
1.09M
      need_above = 1, need_left = 0, need_above_left = 1;
1173
3.54M
    else if (p_angle < 180)
1174
1.35M
      need_above = 1, need_left = 1, need_above_left = 1;
1175
2.18M
    else
1176
2.18M
      need_above = 0, need_left = 1, need_above_left = 1;
1177
4.64M
  }
1178
88.6M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1179
1180
88.6M
  assert(n_top_px >= 0);
1181
0
  assert(n_topright_px >= -1);
1182
0
  assert(n_left_px >= 0);
1183
0
  assert(n_bottomleft_px >= -1);
1184
1185
88.6M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1186
159k
    int val;
1187
159k
    if (need_left) {
1188
114k
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1189
114k
    } else {
1190
45.2k
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1191
45.2k
    }
1192
3.23M
    for (i = 0; i < txhpx; ++i) {
1193
3.07M
      aom_memset16(dst, val, txwpx);
1194
3.07M
      dst += dst_stride;
1195
3.07M
    }
1196
159k
    return;
1197
159k
  }
1198
1199
  // NEED_LEFT
1200
88.4M
  if (need_left) {
1201
87.4M
    const int num_left_pixels_needed =
1202
87.4M
        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1203
87.4M
    i = 0;
1204
87.4M
    if (n_left_px > 0) {
1205
622M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1206
85.5M
      if (n_bottomleft_px > 0) {
1207
301k
        assert(i == txhpx);
1208
3.48M
        for (; i < txhpx + n_bottomleft_px; i++)
1209
3.18M
          left_col[i] = left_ref[i * ref_stride];
1210
301k
      }
1211
85.5M
      if (i < num_left_pixels_needed)
1212
775k
        aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1213
85.5M
    } else if (n_top_px > 0) {
1214
1.78M
      aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1215
1.78M
    }
1216
87.4M
  }
1217
1218
  // NEED_ABOVE
1219
88.4M
  if (need_above) {
1220
86.4M
    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1221
86.4M
    if (n_top_px > 0) {
1222
85.7M
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1223
85.7M
      i = n_top_px;
1224
85.7M
      if (n_topright_px > 0) {
1225
325k
        assert(n_top_px == txwpx);
1226
0
        memcpy(above_row + txwpx, above_ref + txwpx,
1227
325k
               n_topright_px * sizeof(above_ref[0]));
1228
325k
        i += n_topright_px;
1229
325k
      }
1230
85.7M
      if (i < num_top_pixels_needed)
1231
474k
        aom_memset16(&above_row[i], above_row[i - 1],
1232
474k
                     num_top_pixels_needed - i);
1233
85.7M
    } else if (n_left_px > 0) {
1234
588k
      aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1235
588k
    }
1236
86.4M
  }
1237
1238
88.4M
  if (need_above_left) {
1239
8.30M
    if (n_top_px > 0 && n_left_px > 0) {
1240
7.87M
      above_row[-1] = above_ref[-1];
1241
7.87M
    } else if (n_top_px > 0) {
1242
182k
      above_row[-1] = above_ref[0];
1243
246k
    } else if (n_left_px > 0) {
1244
240k
      above_row[-1] = left_ref[0];
1245
240k
    } else {
1246
6.88k
      above_row[-1] = base;
1247
6.88k
    }
1248
8.30M
    left_col[-1] = above_row[-1];
1249
8.30M
  }
1250
1251
88.4M
  if (use_filter_intra) {
1252
443k
    highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1253
443k
                                  filter_intra_mode, bit_depth);
1254
443k
    return;
1255
443k
  }
1256
1257
88.0M
  if (is_dr_mode) {
1258
4.48M
    int upsample_above = 0;
1259
4.48M
    int upsample_left = 0;
1260
4.48M
    if (!disable_edge_filter) {
1261
3.22M
      const int need_right = p_angle < 90;
1262
3.22M
      const int need_bottom = p_angle > 180;
1263
3.22M
      if (p_angle != 90 && p_angle != 180) {
1264
1.86M
        const int ab_le = need_above_left ? 1 : 0;
1265
1.86M
        if (need_above && need_left && (txwpx + txhpx >= 24)) {
1266
372k
          filter_intra_edge_corner_high(above_row, left_col);
1267
372k
        }
1268
1.86M
        if (need_above && n_top_px > 0) {
1269
1.19M
          const int strength = intra_edge_filter_strength(
1270
1.19M
              txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1271
1.19M
          const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1272
1.19M
          av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
1273
1.19M
        }
1274
1.86M
        if (need_left && n_left_px > 0) {
1275
1.42M
          const int strength = intra_edge_filter_strength(
1276
1.42M
              txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1277
1.42M
          const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1278
1.42M
          av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
1279
1.42M
        }
1280
1.86M
      }
1281
3.22M
      upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1282
3.22M
                                                   intra_edge_filter_type);
1283
3.22M
      if (need_above && upsample_above) {
1284
199k
        const int n_px = txwpx + (need_right ? txhpx : 0);
1285
199k
        av1_upsample_intra_edge_high(above_row, n_px, bit_depth);
1286
199k
      }
1287
3.22M
      upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1288
3.22M
                                                  intra_edge_filter_type);
1289
3.22M
      if (need_left && upsample_left) {
1290
391k
        const int n_px = txhpx + (need_bottom ? txwpx : 0);
1291
391k
        av1_upsample_intra_edge_high(left_col, n_px, bit_depth);
1292
391k
      }
1293
3.22M
    }
1294
4.48M
    highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1295
4.48M
                        upsample_above, upsample_left, p_angle, bit_depth);
1296
4.48M
    return;
1297
4.48M
  }
1298
1299
  // predict
1300
83.5M
  if (mode == DC_PRED) {
1301
77.8M
    dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1302
77.8M
        dst, dst_stride, above_row, left_col, bit_depth);
1303
77.8M
  } else {
1304
5.71M
    pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1305
5.71M
  }
1306
83.5M
}
1307
#endif  // CONFIG_AV1_HIGHBITDEPTH
1308
1309
static void build_intra_predictors(
1310
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1311
    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1312
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1313
20.0M
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1314
20.0M
  int i;
1315
20.0M
  const uint8_t *above_ref = ref - ref_stride;
1316
20.0M
  const uint8_t *left_ref = ref - 1;
1317
20.0M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1318
20.0M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1319
20.0M
  uint8_t *const above_row = above_data + 16;
1320
20.0M
  uint8_t *const left_col = left_data + 16;
1321
20.0M
  const int txwpx = tx_size_wide[tx_size];
1322
20.0M
  const int txhpx = tx_size_high[tx_size];
1323
20.0M
  int need_left = extend_modes[mode] & NEED_LEFT;
1324
20.0M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1325
20.0M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1326
20.0M
  const int is_dr_mode = av1_is_directional_mode(mode);
1327
20.0M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1328
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1329
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1330
  // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1331
  // be the potential reason for this issue.
1332
20.0M
  memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1333
20.0M
  memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1334
1335
  // The default values if ref pixels are not available:
1336
  // 128 127 127 .. 127 127 127 127 127 127
1337
  // 129  A   B  ..  Y   Z
1338
  // 129  C   D  ..  W   X
1339
  // 129  E   F  ..  U   V
1340
  // 129  G   H  ..  S   T   T   T   T   T
1341
  // ..
1342
1343
20.0M
  if (is_dr_mode) {
1344
5.17M
    if (p_angle <= 90)
1345
1.06M
      need_above = 1, need_left = 0, need_above_left = 1;
1346
4.10M
    else if (p_angle < 180)
1347
1.00M
      need_above = 1, need_left = 1, need_above_left = 1;
1348
3.10M
    else
1349
3.10M
      need_above = 0, need_left = 1, need_above_left = 1;
1350
5.17M
  }
1351
20.0M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1352
1353
20.0M
  assert(n_top_px >= 0);
1354
0
  assert(n_topright_px >= -1);
1355
0
  assert(n_left_px >= 0);
1356
0
  assert(n_bottomleft_px >= -1);
1357
1358
20.0M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1359
64.5k
    int val;
1360
64.5k
    if (need_left) {
1361
32.9k
      val = (n_top_px > 0) ? above_ref[0] : 129;
1362
32.9k
    } else {
1363
31.6k
      val = (n_left_px > 0) ? left_ref[0] : 127;
1364
31.6k
    }
1365
1.76M
    for (i = 0; i < txhpx; ++i) {
1366
1.69M
      memset(dst, val, txwpx);
1367
1.69M
      dst += dst_stride;
1368
1.69M
    }
1369
64.5k
    return;
1370
64.5k
  }
1371
1372
  // NEED_LEFT
1373
19.9M
  if (need_left) {
1374
18.9M
    const int num_left_pixels_needed =
1375
18.9M
        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1376
18.9M
    i = 0;
1377
18.9M
    if (n_left_px > 0) {
1378
279M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1379
18.4M
      if (n_bottomleft_px > 0) {
1380
371k
        assert(i == txhpx);
1381
4.36M
        for (; i < txhpx + n_bottomleft_px; i++)
1382
3.99M
          left_col[i] = left_ref[i * ref_stride];
1383
371k
      }
1384
18.4M
      if (i < num_left_pixels_needed)
1385
807k
        memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1386
18.4M
    } else if (n_top_px > 0) {
1387
350k
      memset(left_col, above_ref[0], num_left_pixels_needed);
1388
350k
    }
1389
18.9M
  }
1390
1391
  // NEED_ABOVE
1392
19.9M
  if (need_above) {
1393
16.9M
    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1394
16.9M
    if (n_top_px > 0) {
1395
16.3M
      memcpy(above_row, above_ref, n_top_px);
1396
16.3M
      i = n_top_px;
1397
16.3M
      if (n_topright_px > 0) {
1398
279k
        assert(n_top_px == txwpx);
1399
0
        memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1400
279k
        i += n_topright_px;
1401
279k
      }
1402
16.3M
      if (i < num_top_pixels_needed)
1403
318k
        memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1404
16.3M
    } else if (n_left_px > 0) {
1405
424k
      memset(above_row, left_ref[0], num_top_pixels_needed);
1406
424k
    }
1407
16.9M
  }
1408
1409
19.9M
  if (need_above_left) {
1410
8.69M
    if (n_top_px > 0 && n_left_px > 0) {
1411
8.34M
      above_row[-1] = above_ref[-1];
1412
8.34M
    } else if (n_top_px > 0) {
1413
130k
      above_row[-1] = above_ref[0];
1414
217k
    } else if (n_left_px > 0) {
1415
188k
      above_row[-1] = left_ref[0];
1416
188k
    } else {
1417
29.0k
      above_row[-1] = 128;
1418
29.0k
    }
1419
8.69M
    left_col[-1] = above_row[-1];
1420
8.69M
  }
1421
1422
19.9M
  if (use_filter_intra) {
1423
948k
    av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1424
948k
                               filter_intra_mode);
1425
948k
    return;
1426
948k
  }
1427
1428
19.0M
  if (is_dr_mode) {
1429
5.10M
    int upsample_above = 0;
1430
5.10M
    int upsample_left = 0;
1431
5.10M
    if (!disable_edge_filter) {
1432
4.97M
      const int need_right = p_angle < 90;
1433
4.97M
      const int need_bottom = p_angle > 180;
1434
4.97M
      if (p_angle != 90 && p_angle != 180) {
1435
2.25M
        const int ab_le = need_above_left ? 1 : 0;
1436
2.25M
        if (need_above && need_left && (txwpx + txhpx >= 24)) {
1437
430k
          filter_intra_edge_corner(above_row, left_col);
1438
430k
        }
1439
2.25M
        if (need_above && n_top_px > 0) {
1440
1.39M
          const int strength = intra_edge_filter_strength(
1441
1.39M
              txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1442
1.39M
          const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1443
1.39M
          av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1444
1.39M
        }
1445
2.25M
        if (need_left && n_left_px > 0) {
1446
1.80M
          const int strength = intra_edge_filter_strength(
1447
1.80M
              txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1448
1.80M
          const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1449
1.80M
          av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1450
1.80M
        }
1451
2.25M
      }
1452
4.97M
      upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1453
4.97M
                                                   intra_edge_filter_type);
1454
4.97M
      if (need_above && upsample_above) {
1455
274k
        const int n_px = txwpx + (need_right ? txhpx : 0);
1456
274k
        av1_upsample_intra_edge(above_row, n_px);
1457
274k
      }
1458
4.97M
      upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1459
4.97M
                                                  intra_edge_filter_type);
1460
4.97M
      if (need_left && upsample_left) {
1461
485k
        const int n_px = txhpx + (need_bottom ? txwpx : 0);
1462
485k
        av1_upsample_intra_edge(left_col, n_px);
1463
485k
      }
1464
4.97M
    }
1465
5.10M
    dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1466
5.10M
                 upsample_left, p_angle);
1467
5.10M
    return;
1468
5.10M
  }
1469
1470
  // predict
1471
13.9M
  if (mode == DC_PRED) {
1472
9.35M
    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1473
9.35M
                                                  left_col);
1474
9.35M
  } else {
1475
4.57M
    pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1476
4.57M
  }
1477
13.9M
}
1478
1479
static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1480
9.93M
                                            int subsampling_y) {
1481
9.93M
  assert(subsampling_x >= 0 && subsampling_x < 2);
1482
0
  assert(subsampling_y >= 0 && subsampling_y < 2);
1483
0
  BLOCK_SIZE bs = bsize;
1484
9.93M
  switch (bsize) {
1485
77.0k
    case BLOCK_4X4:
1486
77.0k
      if (subsampling_x == 1 && subsampling_y == 1)
1487
74.6k
        bs = BLOCK_8X8;
1488
2.38k
      else if (subsampling_x == 1)
1489
2.38k
        bs = BLOCK_8X4;
1490
0
      else if (subsampling_y == 1)
1491
0
        bs = BLOCK_4X8;
1492
77.0k
      break;
1493
208k
    case BLOCK_4X8:
1494
208k
      if (subsampling_x == 1 && subsampling_y == 1)
1495
208k
        bs = BLOCK_8X8;
1496
0
      else if (subsampling_x == 1)
1497
0
        bs = BLOCK_8X8;
1498
0
      else if (subsampling_y == 1)
1499
0
        bs = BLOCK_4X8;
1500
208k
      break;
1501
171k
    case BLOCK_8X4:
1502
171k
      if (subsampling_x == 1 && subsampling_y == 1)
1503
170k
        bs = BLOCK_8X8;
1504
440
      else if (subsampling_x == 1)
1505
440
        bs = BLOCK_8X4;
1506
0
      else if (subsampling_y == 1)
1507
0
        bs = BLOCK_8X8;
1508
171k
      break;
1509
174k
    case BLOCK_4X16:
1510
174k
      if (subsampling_x == 1 && subsampling_y == 1)
1511
174k
        bs = BLOCK_8X16;
1512
1
      else if (subsampling_x == 1)
1513
0
        bs = BLOCK_8X16;
1514
1
      else if (subsampling_y == 1)
1515
0
        bs = BLOCK_4X16;
1516
174k
      break;
1517
241k
    case BLOCK_16X4:
1518
241k
      if (subsampling_x == 1 && subsampling_y == 1)
1519
236k
        bs = BLOCK_16X8;
1520
5.07k
      else if (subsampling_x == 1)
1521
5.07k
        bs = BLOCK_16X4;
1522
0
      else if (subsampling_y == 1)
1523
0
        bs = BLOCK_16X8;
1524
241k
      break;
1525
9.06M
    default: break;
1526
9.93M
  }
1527
9.93M
  return bs;
1528
9.93M
}
1529
1530
void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1531
                             int enable_intra_edge_filter, int wpx, int hpx,
1532
                             TX_SIZE tx_size, PREDICTION_MODE mode,
1533
                             int angle_delta, int use_palette,
1534
                             FILTER_INTRA_MODE filter_intra_mode,
1535
                             const uint8_t *ref, int ref_stride, uint8_t *dst,
1536
                             int dst_stride, int col_off, int row_off,
1537
108M
                             int plane) {
1538
108M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1539
108M
  const int txwpx = tx_size_wide[tx_size];
1540
108M
  const int txhpx = tx_size_high[tx_size];
1541
108M
  const int x = col_off << MI_SIZE_LOG2;
1542
108M
  const int y = row_off << MI_SIZE_LOG2;
1543
1544
108M
  if (use_palette) {
1545
216k
    int r, c;
1546
216k
    const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1547
216k
                               xd->color_index_map_offset[plane != 0];
1548
216k
    const uint16_t *const palette =
1549
216k
        mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1550
216k
    if (is_cur_buf_hbd(xd)) {
1551
152k
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1552
1.52M
      for (r = 0; r < txhpx; ++r) {
1553
17.8M
        for (c = 0; c < txwpx; ++c) {
1554
16.4M
          dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1555
16.4M
        }
1556
1.37M
      }
1557
152k
    } else {
1558
567k
      for (r = 0; r < txhpx; ++r) {
1559
5.10M
        for (c = 0; c < txwpx; ++c) {
1560
4.59M
          dst[r * dst_stride + c] =
1561
4.59M
              (uint8_t)palette[map[(r + y) * wpx + c + x]];
1562
4.59M
        }
1563
503k
      }
1564
63.5k
    }
1565
216k
    return;
1566
216k
  }
1567
1568
108M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
1569
108M
  const int txw = tx_size_wide_unit[tx_size];
1570
108M
  const int txh = tx_size_high_unit[tx_size];
1571
108M
  const int ss_x = pd->subsampling_x;
1572
108M
  const int ss_y = pd->subsampling_y;
1573
108M
  const int have_top =
1574
108M
      row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1575
108M
  const int have_left =
1576
108M
      col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1577
108M
  const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1578
108M
  const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1579
1580
  // Distance between the right edge of this prediction block to
1581
  // the frame right edge
1582
108M
  const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1583
  // Distance between the bottom edge of this prediction block to
1584
  // the frame bottom edge
1585
108M
  const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1586
108M
  const int right_available =
1587
108M
      mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1588
108M
  const int bottom_available =
1589
108M
      (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1590
1591
108M
  const PARTITION_TYPE partition = mbmi->partition;
1592
1593
108M
  BLOCK_SIZE bsize = mbmi->bsize;
1594
  // force 4x4 chroma component block size.
1595
108M
  if (ss_x || ss_y) {
1596
9.93M
    bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1597
9.93M
  }
1598
1599
108M
  const int is_dr_mode = av1_is_directional_mode(mode);
1600
108M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1601
108M
  int p_angle = 0;
1602
108M
  int need_top_right = extend_modes[mode] & NEED_ABOVERIGHT;
1603
108M
  int need_bottom_left = extend_modes[mode] & NEED_BOTTOMLEFT;
1604
1605
108M
  if (use_filter_intra) {
1606
1.39M
    need_top_right = 0;
1607
1.39M
    need_bottom_left = 0;
1608
1.39M
  }
1609
108M
  if (is_dr_mode) {
1610
9.82M
    p_angle = mode_to_angle_map[mode] + angle_delta;
1611
9.82M
    need_top_right = p_angle < 90;
1612
9.82M
    need_bottom_left = p_angle > 180;
1613
9.82M
  }
1614
1615
  // Possible states for have_top_right(TR) and have_bottom_left(BL)
1616
  // -1 : TR and BL are not needed
1617
  //  0 : TR and BL are needed but not available
1618
  // > 0 : TR and BL are needed and pixels are available
1619
108M
  const int have_top_right =
1620
108M
      need_top_right ? has_top_right(sb_size, bsize, mi_row, mi_col, have_top,
1621
1.13M
                                     right_available, partition, tx_size,
1622
1.13M
                                     row_off, col_off, ss_x, ss_y)
1623
108M
                     : -1;
1624
108M
  const int have_bottom_left =
1625
108M
      need_bottom_left ? has_bottom_left(sb_size, bsize, mi_row, mi_col,
1626
1.83M
                                         bottom_available, have_left, partition,
1627
1.83M
                                         tx_size, row_off, col_off, ss_x, ss_y)
1628
108M
                       : -1;
1629
1630
108M
  const int disable_edge_filter = !enable_intra_edge_filter;
1631
108M
  const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1632
108M
#if CONFIG_AV1_HIGHBITDEPTH
1633
108M
  if (is_cur_buf_hbd(xd)) {
1634
88.6M
    build_intra_predictors_high(
1635
88.6M
        ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1636
88.6M
        tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1637
88.6M
        have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right,
1638
88.6M
        have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1639
88.6M
        have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left,
1640
88.6M
        intra_edge_filter_type, xd->bd);
1641
88.6M
    return;
1642
88.6M
  }
1643
19.9M
#endif
1644
19.9M
  build_intra_predictors(
1645
19.9M
      ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1646
19.9M
      tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1647
19.9M
      have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right,
1648
19.9M
      have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1649
19.9M
      have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left,
1650
19.9M
      intra_edge_filter_type);
1651
19.9M
}
1652
1653
void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1654
                                    int plane, int blk_col, int blk_row,
1655
107M
                                    TX_SIZE tx_size) {
1656
107M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1657
107M
  struct macroblockd_plane *const pd = &xd->plane[plane];
1658
107M
  const int dst_stride = pd->dst.stride;
1659
107M
  uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1660
107M
  const PREDICTION_MODE mode =
1661
107M
      (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1662
107M
  const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1663
107M
  const FILTER_INTRA_MODE filter_intra_mode =
1664
107M
      (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1665
107M
          ? mbmi->filter_intra_mode_info.filter_intra_mode
1666
107M
          : FILTER_INTRA_MODES;
1667
107M
  const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1668
107M
  const SequenceHeader *seq_params = cm->seq_params;
1669
1670
107M
  if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1671
#if CONFIG_DEBUG
1672
    assert(is_cfl_allowed(xd));
1673
    const BLOCK_SIZE plane_bsize =
1674
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1675
    (void)plane_bsize;
1676
    assert(plane_bsize < BLOCK_SIZES_ALL);
1677
    if (!xd->lossless[mbmi->segment_id]) {
1678
      assert(blk_col == 0);
1679
      assert(blk_row == 0);
1680
      assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1681
      assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1682
    }
1683
#endif
1684
2.60M
    CFL_CTX *const cfl = &xd->cfl;
1685
2.60M
    CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1686
2.60M
    if (!cfl->dc_pred_is_cached[pred_plane]) {
1687
2.60M
      av1_predict_intra_block(xd, seq_params->sb_size,
1688
2.60M
                              seq_params->enable_intra_edge_filter, pd->width,
1689
2.60M
                              pd->height, tx_size, mode, angle_delta,
1690
2.60M
                              use_palette, filter_intra_mode, dst, dst_stride,
1691
2.60M
                              dst, dst_stride, blk_col, blk_row, plane);
1692
2.60M
      if (cfl->use_dc_pred_cache) {
1693
0
        cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1694
0
        cfl->dc_pred_is_cached[pred_plane] = true;
1695
0
      }
1696
2.60M
    } else {
1697
1
      cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1698
1
    }
1699
2.60M
    cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1700
2.60M
    return;
1701
2.60M
  }
1702
104M
  av1_predict_intra_block(
1703
104M
      xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1704
104M
      pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1705
104M
      dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1706
104M
}
1707
1708
14.2k
void av1_init_intra_predictors(void) {
1709
14.2k
  aom_once(init_intra_predictors_internal);
1710
14.2k
}