Coverage Report

Created: 2022-08-24 06:15

/src/aom/av1/common/reconintra.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <math.h>
13
14
#include "config/aom_config.h"
15
#include "config/aom_dsp_rtcd.h"
16
#include "config/av1_rtcd.h"
17
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_mem/aom_mem.h"
20
#include "aom_ports/aom_once.h"
21
#include "aom_ports/mem.h"
22
#include "av1/common/av1_common_int.h"
23
#include "av1/common/cfl.h"
24
#include "av1/common/reconintra.h"
25
26
enum {
27
  NEED_LEFT = 1 << 1,
28
  NEED_ABOVE = 1 << 2,
29
  NEED_ABOVERIGHT = 1 << 3,
30
  NEED_ABOVELEFT = 1 << 4,
31
  NEED_BOTTOMLEFT = 1 << 5,
32
};
33
34
#define INTRA_EDGE_FILT 3
35
1.46G
#define INTRA_EDGE_TAPS 5
36
#define MAX_UPSAMPLE_SZ 16
37
103M
#define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
38
39
static const uint8_t extend_modes[INTRA_MODES] = {
40
  NEED_ABOVE | NEED_LEFT,                   // DC
41
  NEED_ABOVE,                               // V
42
  NEED_LEFT,                                // H
43
  NEED_ABOVE | NEED_ABOVERIGHT,             // D45
44
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
45
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
46
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
47
  NEED_LEFT | NEED_BOTTOMLEFT,              // D203
48
  NEED_ABOVE | NEED_ABOVERIGHT,             // D67
49
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH
50
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
51
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
52
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
53
};
54
55
// Tables to store if the top-right reference pixels are available. The flags
56
// are represented with bits, packed into 8-bit integers. E.g., for the 32x32
57
// blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
58
// order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
59
// i.e. (table[10 / 8] >> (10 % 8)) & 1.
60
//       . . . .
61
//       . . . .
62
//       . . o .
63
//       . . . .
64
static uint8_t has_tr_4x4[128] = {
65
  255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
66
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69
  255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73
};
74
static uint8_t has_tr_4x8[64] = {
75
  255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
76
  119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
77
  127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
78
  119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
79
  119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
80
};
81
static uint8_t has_tr_8x4[64] = {
82
  255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
83
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84
  255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86
};
87
static uint8_t has_tr_8x8[32] = {
88
  255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
89
  255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90
};
91
static uint8_t has_tr_8x16[16] = {
92
  255, 255, 119, 119, 127, 127, 119, 119,
93
  255, 127, 119, 119, 127, 127, 119, 119,
94
};
95
static uint8_t has_tr_16x8[16] = {
96
  255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
97
};
98
static uint8_t has_tr_16x16[8] = {
99
  255, 85, 119, 85, 127, 85, 119, 85,
100
};
101
static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
102
static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
103
static uint8_t has_tr_32x32[2] = { 95, 87 };
104
static uint8_t has_tr_32x64[1] = { 127 };
105
static uint8_t has_tr_64x32[1] = { 19 };
106
static uint8_t has_tr_64x64[1] = { 7 };
107
static uint8_t has_tr_64x128[1] = { 3 };
108
static uint8_t has_tr_128x64[1] = { 1 };
109
static uint8_t has_tr_128x128[1] = { 1 };
110
static uint8_t has_tr_4x16[32] = {
111
  255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
112
  127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
113
  127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
114
};
115
static uint8_t has_tr_16x4[32] = {
116
  255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
117
  127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118
};
119
static uint8_t has_tr_8x32[8] = {
120
  255, 255, 127, 127, 255, 127, 127, 127,
121
};
122
static uint8_t has_tr_32x8[8] = {
123
  15, 0, 5, 0, 7, 0, 5, 0,
124
};
125
static uint8_t has_tr_16x64[2] = { 255, 127 };
126
static uint8_t has_tr_64x16[2] = { 3, 1 };
127
128
static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
129
  // 4X4
130
  has_tr_4x4,
131
  // 4X8,       8X4,            8X8
132
  has_tr_4x8, has_tr_8x4, has_tr_8x8,
133
  // 8X16,      16X8,           16X16
134
  has_tr_8x16, has_tr_16x8, has_tr_16x16,
135
  // 16X32,     32X16,          32X32
136
  has_tr_16x32, has_tr_32x16, has_tr_32x32,
137
  // 32X64,     64X32,          64X64
138
  has_tr_32x64, has_tr_64x32, has_tr_64x64,
139
  // 64x128,    128x64,         128x128
140
  has_tr_64x128, has_tr_128x64, has_tr_128x128,
141
  // 4x16,      16x4,            8x32
142
  has_tr_4x16, has_tr_16x4, has_tr_8x32,
143
  // 32x8,      16x64,           64x16
144
  has_tr_32x8, has_tr_16x64, has_tr_64x16
145
};
146
147
static uint8_t has_tr_vert_8x8[32] = {
148
  255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
149
  255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150
};
151
static uint8_t has_tr_vert_16x16[8] = {
152
  255, 0, 119, 0, 127, 0, 119, 0,
153
};
154
static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
155
static uint8_t has_tr_vert_64x64[1] = { 3 };
156
157
// The _vert_* tables are like the ordinary tables above, but describe the
158
// order we visit square blocks when doing a PARTITION_VERT_A or
159
// PARTITION_VERT_B. This is the same order as normal except for on the last
160
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
161
// as a pair of squares, which means that these tables work correctly for both
162
// mixed vertical partition types.
163
//
164
// There are tables for each of the square sizes. Vertical rectangles (like
165
// BLOCK_16X32) use their respective "non-vert" table
166
static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
167
  // 4X4
168
  NULL,
169
  // 4X8,      8X4,         8X8
170
  has_tr_4x8, NULL, has_tr_vert_8x8,
171
  // 8X16,     16X8,        16X16
172
  has_tr_8x16, NULL, has_tr_vert_16x16,
173
  // 16X32,    32X16,       32X32
174
  has_tr_16x32, NULL, has_tr_vert_32x32,
175
  // 32X64,    64X32,       64X64
176
  has_tr_32x64, NULL, has_tr_vert_64x64,
177
  // 64x128,   128x64,      128x128
178
  has_tr_64x128, NULL, has_tr_128x128
179
};
180
181
static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
182
1.17M
                                       BLOCK_SIZE bsize) {
183
1.17M
  const uint8_t *ret = NULL;
184
  // If this is a mixed vertical partition, look up bsize in orders_vert.
185
1.17M
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
186
0
    assert(bsize < BLOCK_SIZES);
187
0
    ret = has_tr_vert_tables[bsize];
188
1.17M
  } else {
189
1.17M
    ret = has_tr_tables[bsize];
190
1.17M
  }
191
1.17M
  assert(ret);
192
1.17M
  return ret;
193
1.17M
}
194
195
static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
196
                         int mi_col, int top_available, int right_available,
197
                         PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
198
51.9M
                         int col_off, int ss_x, int ss_y) {
199
51.9M
  if (!top_available || !right_available) return 0;
200
201
44.6M
  const int bw_unit = mi_size_wide[bsize];
202
44.6M
  const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
203
44.6M
  const int top_right_count_unit = tx_size_wide_unit[txsz];
204
205
44.6M
  if (row_off > 0) {  // Just need to check if enough pixels on the right.
206
36.0M
    if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
207
      // Special case: For 128x128 blocks, the transform unit whose
208
      // top-right corner is at the center of the block does in fact have
209
      // pixels available at its top-right corner.
210
0
      if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
211
0
          col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
212
0
        return 1;
213
0
      }
214
0
      const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
215
0
      const int col_off_64 = col_off % plane_bw_unit_64;
216
0
      return col_off_64 + top_right_count_unit < plane_bw_unit_64;
217
0
    }
218
36.0M
    return col_off + top_right_count_unit < plane_bw_unit;
219
36.0M
  } else {
220
    // All top-right pixels are in the block above, which is already available.
221
8.58M
    if (col_off + top_right_count_unit < plane_bw_unit) return 1;
222
223
4.12M
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
224
4.12M
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
225
4.12M
    const int sb_mi_size = mi_size_high[sb_size];
226
4.12M
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
227
4.12M
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
228
229
    // Top row of superblock: so top-right pixels are in the top and/or
230
    // top-right superblocks, both of which are already available.
231
4.12M
    if (blk_row_in_sb == 0) return 1;
232
233
    // Rightmost column of superblock (and not the top row): so top-right pixels
234
    // fall in the right superblock, which is not available yet.
235
971k
    if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
236
88.9k
      return 0;
237
88.9k
    }
238
239
    // General case (neither top row nor rightmost column): check if the
240
    // top-right block is coded before the current block.
241
882k
    const int this_blk_index =
242
882k
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
243
882k
        blk_col_in_sb + 0;
244
882k
    const int idx1 = this_blk_index / 8;
245
882k
    const int idx2 = this_blk_index % 8;
246
882k
    const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
247
882k
    return (has_tr_table[idx1] >> idx2) & 1;
248
971k
  }
249
44.6M
}
250
251
// Similar to the has_tr_* tables, but store if the bottom-left reference
252
// pixels are available.
253
static uint8_t has_bl_4x4[128] = {
254
  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
255
  85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
256
  17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
257
  85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
258
  0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
259
  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
260
  85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
261
};
262
static uint8_t has_bl_4x8[64] = {
263
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
264
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
265
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
266
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
267
};
268
static uint8_t has_bl_8x4[64] = {
269
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
270
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
271
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
272
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
273
};
274
static uint8_t has_bl_8x8[32] = {
275
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
276
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277
};
278
static uint8_t has_bl_8x16[16] = {
279
  16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
280
};
281
static uint8_t has_bl_16x8[16] = {
282
  254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
283
};
284
static uint8_t has_bl_16x16[8] = {
285
  84, 16, 84, 0, 84, 16, 84, 0,
286
};
287
static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
288
static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
289
static uint8_t has_bl_32x32[2] = { 4, 4 };
290
static uint8_t has_bl_32x64[1] = { 0 };
291
static uint8_t has_bl_64x32[1] = { 34 };
292
static uint8_t has_bl_64x64[1] = { 0 };
293
static uint8_t has_bl_64x128[1] = { 0 };
294
static uint8_t has_bl_128x64[1] = { 0 };
295
static uint8_t has_bl_128x128[1] = { 0 };
296
static uint8_t has_bl_4x16[32] = {
297
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
298
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299
};
300
static uint8_t has_bl_16x4[32] = {
301
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
302
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303
};
304
static uint8_t has_bl_8x32[8] = {
305
  0, 1, 0, 0, 0, 1, 0, 0,
306
};
307
static uint8_t has_bl_32x8[8] = {
308
  238, 78, 238, 14, 238, 78, 238, 14,
309
};
310
static uint8_t has_bl_16x64[2] = { 0, 0 };
311
static uint8_t has_bl_64x16[2] = { 42, 42 };
312
313
static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
314
  // 4X4
315
  has_bl_4x4,
316
  // 4X8,         8X4,         8X8
317
  has_bl_4x8, has_bl_8x4, has_bl_8x8,
318
  // 8X16,        16X8,        16X16
319
  has_bl_8x16, has_bl_16x8, has_bl_16x16,
320
  // 16X32,       32X16,       32X32
321
  has_bl_16x32, has_bl_32x16, has_bl_32x32,
322
  // 32X64,       64X32,       64X64
323
  has_bl_32x64, has_bl_64x32, has_bl_64x64,
324
  // 64x128,      128x64,      128x128
325
  has_bl_64x128, has_bl_128x64, has_bl_128x128,
326
  // 4x16,        16x4,        8x32
327
  has_bl_4x16, has_bl_16x4, has_bl_8x32,
328
  // 32x8,        16x64,       64x16
329
  has_bl_32x8, has_bl_16x64, has_bl_64x16
330
};
331
332
static uint8_t has_bl_vert_8x8[32] = {
333
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
334
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335
};
336
static uint8_t has_bl_vert_16x16[8] = {
337
  254, 16, 254, 0, 254, 16, 254, 0,
338
};
339
static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
340
static uint8_t has_bl_vert_64x64[1] = { 2 };
341
342
// The _vert_* tables are like the ordinary tables above, but describe the
343
// order we visit square blocks when doing a PARTITION_VERT_A or
344
// PARTITION_VERT_B. This is the same order as normal except for on the last
345
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
346
// as a pair of squares, which means that these tables work correctly for both
347
// mixed vertical partition types.
348
//
349
// There are tables for each of the square sizes. Vertical rectangles (like
350
// BLOCK_16X32) use their respective "non-vert" table
351
static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
352
  // 4X4
353
  NULL,
354
  // 4X8,     8X4,         8X8
355
  has_bl_4x8, NULL, has_bl_vert_8x8,
356
  // 8X16,    16X8,        16X16
357
  has_bl_8x16, NULL, has_bl_vert_16x16,
358
  // 16X32,   32X16,       32X32
359
  has_bl_16x32, NULL, has_bl_vert_32x32,
360
  // 32X64,   64X32,       64X64
361
  has_bl_32x64, NULL, has_bl_vert_64x64,
362
  // 64x128,  128x64,      128x128
363
  has_bl_64x128, NULL, has_bl_128x128
364
};
365
366
static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
367
1.43M
                                       BLOCK_SIZE bsize) {
368
1.43M
  const uint8_t *ret = NULL;
369
  // If this is a mixed vertical partition, look up bsize in orders_vert.
370
1.43M
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
371
0
    assert(bsize < BLOCK_SIZES);
372
0
    ret = has_bl_vert_tables[bsize];
373
1.43M
  } else {
374
1.43M
    ret = has_bl_tables[bsize];
375
1.43M
  }
376
1.43M
  assert(ret);
377
1.43M
  return ret;
378
1.43M
}
379
380
static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
381
                           int mi_col, int bottom_available, int left_available,
382
                           PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
383
51.9M
                           int col_off, int ss_x, int ss_y) {
384
51.9M
  if (!bottom_available || !left_available) return 0;
385
386
  // Special case for 128x* blocks, when col_off is half the block width.
387
  // This is needed because 128x* superblocks are divided into 64x* blocks in
388
  // raster order
389
45.0M
  if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
390
0
    const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
391
0
    const int col_off_64 = col_off % plane_bw_unit_64;
392
0
    if (col_off_64 == 0) {
393
      // We are at the left edge of top-right or bottom-right 64x* block.
394
0
      const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
395
0
      const int row_off_64 = row_off % plane_bh_unit_64;
396
0
      const int plane_bh_unit =
397
0
          AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
398
      // Check if all bottom-left pixels are in the left 64x* block (which is
399
      // already coded).
400
0
      return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
401
0
    }
402
0
  }
403
404
45.0M
  if (col_off > 0) {
405
    // Bottom-left pixels are in the bottom-left block, which is not available.
406
36.0M
    return 0;
407
36.0M
  } else {
408
9.01M
    const int bh_unit = mi_size_high[bsize];
409
9.01M
    const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
410
9.01M
    const int bottom_left_count_unit = tx_size_high_unit[txsz];
411
412
    // All bottom-left pixels are in the left block, which is already available.
413
9.01M
    if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
414
415
4.45M
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
416
4.45M
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
417
4.45M
    const int sb_mi_size = mi_size_high[sb_size];
418
4.45M
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
419
4.45M
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
420
421
    // Leftmost column of superblock: so bottom-left pixels maybe in the left
422
    // and/or bottom-left superblocks. But only the left superblock is
423
    // available, so check if all required pixels fall in that superblock.
424
4.45M
    if (blk_col_in_sb == 0) {
425
3.20M
      const int blk_start_row_off =
426
3.20M
          blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
427
3.20M
          ss_y;
428
3.20M
      const int row_off_in_sb = blk_start_row_off + row_off;
429
3.20M
      const int sb_height_unit = sb_mi_size >> ss_y;
430
3.20M
      return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
431
3.20M
    }
432
433
    // Bottom row of superblock (and not the leftmost column): so bottom-left
434
    // pixels fall in the bottom superblock, which is not available yet.
435
1.24M
    if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
436
437
    // General case (neither leftmost column nor bottom row): check if the
438
    // bottom-left block is coded before the current block.
439
1.21M
    const int this_blk_index =
440
1.21M
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
441
1.21M
        blk_col_in_sb + 0;
442
1.21M
    const int idx1 = this_blk_index / 8;
443
1.21M
    const int idx2 = this_blk_index % 8;
444
1.21M
    const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
445
1.21M
    return (has_bl_table[idx1] >> idx2) & 1;
446
1.24M
  }
447
45.0M
}
448
449
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
450
                              const uint8_t *above, const uint8_t *left);
451
452
static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
453
static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
454
455
#if CONFIG_AV1_HIGHBITDEPTH
456
typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
457
                                   const uint16_t *above, const uint16_t *left,
458
                                   int bd);
459
static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
460
static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
461
#endif
462
463
1
static void init_intra_predictors_internal(void) {
464
1
  assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
465
466
#if CONFIG_REALTIME_ONLY
467
#define INIT_RECTANGULAR(p, type)             \
468
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
469
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
470
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
471
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
472
  p[TX_16X32] = aom_##type##_predictor_16x32; \
473
  p[TX_32X16] = aom_##type##_predictor_32x16; \
474
  p[TX_32X64] = aom_##type##_predictor_32x64; \
475
  p[TX_64X32] = aom_##type##_predictor_64x32;
476
#else
477
1
#define INIT_RECTANGULAR(p, type)             \
478
20
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
479
20
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
480
20
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
481
20
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
482
20
  p[TX_16X32] = aom_##type##_predictor_16x32; \
483
20
  p[TX_32X16] = aom_##type##_predictor_32x16; \
484
20
  p[TX_32X64] = aom_##type##_predictor_32x64; \
485
20
  p[TX_64X32] = aom_##type##_predictor_64x32; \
486
20
  p[TX_4X16] = aom_##type##_predictor_4x16;   \
487
20
  p[TX_16X4] = aom_##type##_predictor_16x4;   \
488
20
  p[TX_8X32] = aom_##type##_predictor_8x32;   \
489
20
  p[TX_32X8] = aom_##type##_predictor_32x8;   \
490
20
  p[TX_16X64] = aom_##type##_predictor_16x64; \
491
20
  p[TX_64X16] = aom_##type##_predictor_64x16;
492
1
#endif
493
494
1
#define INIT_NO_4X4(p, type)                  \
495
20
  p[TX_8X8] = aom_##type##_predictor_8x8;     \
496
20
  p[TX_16X16] = aom_##type##_predictor_16x16; \
497
20
  p[TX_32X32] = aom_##type##_predictor_32x32; \
498
20
  p[TX_64X64] = aom_##type##_predictor_64x64; \
499
20
  INIT_RECTANGULAR(p, type)
500
501
1
#define INIT_ALL_SIZES(p, type)           \
502
20
  p[TX_4X4] = aom_##type##_predictor_4x4; \
503
20
  INIT_NO_4X4(p, type)
504
505
1
  INIT_ALL_SIZES(pred[V_PRED], v);
506
1
  INIT_ALL_SIZES(pred[H_PRED], h);
507
1
  INIT_ALL_SIZES(pred[PAETH_PRED], paeth);
508
1
  INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth);
509
1
  INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v);
510
1
  INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h);
511
1
  INIT_ALL_SIZES(dc_pred[0][0], dc_128);
512
1
  INIT_ALL_SIZES(dc_pred[0][1], dc_top);
513
1
  INIT_ALL_SIZES(dc_pred[1][0], dc_left);
514
1
  INIT_ALL_SIZES(dc_pred[1][1], dc);
515
1
#if CONFIG_AV1_HIGHBITDEPTH
516
1
  INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
517
1
  INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
518
1
  INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth);
519
1
  INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth);
520
1
  INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v);
521
1
  INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h);
522
1
  INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
523
1
  INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
524
1
  INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
525
1
  INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
526
1
#endif
527
1
#undef intra_pred_allsizes
528
1
}
529
530
// Directional prediction, zone 1: 0 < angle < 90
531
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
532
                            const uint8_t *above, const uint8_t *left,
533
10.2M
                            int upsample_above, int dx, int dy) {
534
10.2M
  int r, c, x, base, shift, val;
535
536
10.2M
  (void)left;
537
10.2M
  (void)dy;
538
10.2M
  assert(dy == 1);
539
10.2M
  assert(dx > 0);
540
541
10.2M
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
542
10.2M
  const int frac_bits = 6 - upsample_above;
543
10.2M
  const int base_inc = 1 << upsample_above;
544
10.2M
  x = dx;
545
72.1M
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
546
61.9M
    base = x >> frac_bits;
547
61.9M
    shift = ((x << upsample_above) & 0x3F) >> 1;
548
549
61.9M
    if (base >= max_base_x) {
550
0
      for (int i = r; i < bh; ++i) {
551
0
        memset(dst, above[max_base_x], bw * sizeof(dst[0]));
552
0
        dst += stride;
553
0
      }
554
0
      return;
555
0
    }
556
557
908M
    for (c = 0; c < bw; ++c, base += base_inc) {
558
846M
      if (base < max_base_x) {
559
846M
        val = above[base] * (32 - shift) + above[base + 1] * shift;
560
846M
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
561
846M
      } else {
562
205
        dst[c] = above[max_base_x];
563
205
      }
564
846M
    }
565
61.9M
  }
566
10.2M
}
567
568
// Directional prediction, zone 2: 90 < angle < 180
569
void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
570
                            const uint8_t *above, const uint8_t *left,
571
                            int upsample_above, int upsample_left, int dx,
572
20.2M
                            int dy) {
573
20.2M
  assert(dx > 0);
574
20.2M
  assert(dy > 0);
575
576
20.2M
  const int min_base_x = -(1 << upsample_above);
577
20.2M
  const int min_base_y = -(1 << upsample_left);
578
20.2M
  (void)min_base_y;
579
20.2M
  const int frac_bits_x = 6 - upsample_above;
580
20.2M
  const int frac_bits_y = 6 - upsample_left;
581
582
155M
  for (int r = 0; r < bh; ++r) {
583
2.22G
    for (int c = 0; c < bw; ++c) {
584
2.08G
      int val;
585
2.08G
      int y = r + 1;
586
2.08G
      int x = (c << 6) - y * dx;
587
2.08G
      const int base_x = x >> frac_bits_x;
588
2.08G
      if (base_x >= min_base_x) {
589
1.05G
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
590
1.05G
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
591
1.05G
        val = ROUND_POWER_OF_TWO(val, 5);
592
1.05G
      } else {
593
1.03G
        x = c + 1;
594
1.03G
        y = (r << 6) - x * dy;
595
1.03G
        const int base_y = y >> frac_bits_y;
596
1.03G
        assert(base_y >= min_base_y);
597
1.03G
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
598
1.03G
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
599
1.03G
        val = ROUND_POWER_OF_TWO(val, 5);
600
1.03G
      }
601
2.08G
      dst[c] = val;
602
2.08G
    }
603
134M
    dst += stride;
604
134M
  }
605
20.2M
}
606
607
// Directional prediction, zone 3: 180 < angle < 270
608
void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
609
                            const uint8_t *above, const uint8_t *left,
610
9.64M
                            int upsample_left, int dx, int dy) {
611
9.64M
  int r, c, y, base, shift, val;
612
613
9.64M
  (void)above;
614
9.64M
  (void)dx;
615
616
9.64M
  assert(dx == 1);
617
9.64M
  assert(dy > 0);
618
619
9.64M
  const int max_base_y = (bw + bh - 1) << upsample_left;
620
9.64M
  const int frac_bits = 6 - upsample_left;
621
9.64M
  const int base_inc = 1 << upsample_left;
622
9.64M
  y = dy;
623
69.8M
  for (c = 0; c < bw; ++c, y += dy) {
624
60.1M
    base = y >> frac_bits;
625
60.1M
    shift = ((y << upsample_left) & 0x3F) >> 1;
626
627
924M
    for (r = 0; r < bh; ++r, base += base_inc) {
628
864M
      if (base < max_base_y) {
629
864M
        val = left[base] * (32 - shift) + left[base + 1] * shift;
630
864M
        dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5);
631
18.4E
      } else {
632
18.4E
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
633
18.4E
        break;
634
18.4E
      }
635
864M
    }
636
60.1M
  }
637
9.64M
}
638
639
static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
640
                         const uint8_t *above, const uint8_t *left,
641
42.0M
                         int upsample_above, int upsample_left, int angle) {
642
42.0M
  const int dx = av1_get_dx(angle);
643
42.0M
  const int dy = av1_get_dy(angle);
644
42.0M
  const int bw = tx_size_wide[tx_size];
645
42.0M
  const int bh = tx_size_high[tx_size];
646
42.0M
  assert(angle > 0 && angle < 270);
647
648
42.0M
  if (angle > 0 && angle < 90) {
649
10.2M
    av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
650
10.2M
                         dy);
651
31.7M
  } else if (angle > 90 && angle < 180) {
652
20.2M
    av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
653
20.2M
                         upsample_left, dx, dy);
654
20.2M
  } else if (angle > 180 && angle < 270) {
655
9.63M
    av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
656
9.63M
                         dy);
657
9.63M
  } else if (angle == 90) {
658
1.08M
    pred[V_PRED][tx_size](dst, stride, above, left);
659
1.10M
  } else if (angle == 180) {
660
1.10M
    pred[H_PRED][tx_size](dst, stride, above, left);
661
1.10M
  }
662
42.0M
}
663
664
#if CONFIG_AV1_HIGHBITDEPTH
665
// Directional prediction, zone 1: 0 < angle < 90
666
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
667
                                   int bh, const uint16_t *above,
668
                                   const uint16_t *left, int upsample_above,
669
0
                                   int dx, int dy, int bd) {
670
0
  int r, c, x, base, shift, val;
671
672
0
  (void)left;
673
0
  (void)dy;
674
0
  (void)bd;
675
0
  assert(dy == 1);
676
0
  assert(dx > 0);
677
678
0
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
679
0
  const int frac_bits = 6 - upsample_above;
680
0
  const int base_inc = 1 << upsample_above;
681
0
  x = dx;
682
0
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
683
0
    base = x >> frac_bits;
684
0
    shift = ((x << upsample_above) & 0x3F) >> 1;
685
686
0
    if (base >= max_base_x) {
687
0
      for (int i = r; i < bh; ++i) {
688
0
        aom_memset16(dst, above[max_base_x], bw);
689
0
        dst += stride;
690
0
      }
691
0
      return;
692
0
    }
693
694
0
    for (c = 0; c < bw; ++c, base += base_inc) {
695
0
      if (base < max_base_x) {
696
0
        val = above[base] * (32 - shift) + above[base + 1] * shift;
697
0
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
698
0
      } else {
699
0
        dst[c] = above[max_base_x];
700
0
      }
701
0
    }
702
0
  }
703
0
}
704
705
// Directional prediction, zone 2: 90 < angle < 180
706
void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
707
                                   int bh, const uint16_t *above,
708
                                   const uint16_t *left, int upsample_above,
709
0
                                   int upsample_left, int dx, int dy, int bd) {
710
0
  (void)bd;
711
0
  assert(dx > 0);
712
0
  assert(dy > 0);
713
714
0
  const int min_base_x = -(1 << upsample_above);
715
0
  const int min_base_y = -(1 << upsample_left);
716
0
  (void)min_base_y;
717
0
  const int frac_bits_x = 6 - upsample_above;
718
0
  const int frac_bits_y = 6 - upsample_left;
719
720
0
  for (int r = 0; r < bh; ++r) {
721
0
    for (int c = 0; c < bw; ++c) {
722
0
      int val;
723
0
      int y = r + 1;
724
0
      int x = (c << 6) - y * dx;
725
0
      const int base_x = x >> frac_bits_x;
726
0
      if (base_x >= min_base_x) {
727
0
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
728
0
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
729
0
        val = ROUND_POWER_OF_TWO(val, 5);
730
0
      } else {
731
0
        x = c + 1;
732
0
        y = (r << 6) - x * dy;
733
0
        const int base_y = y >> frac_bits_y;
734
0
        assert(base_y >= min_base_y);
735
0
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
736
0
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
737
0
        val = ROUND_POWER_OF_TWO(val, 5);
738
0
      }
739
0
      dst[c] = val;
740
0
    }
741
0
    dst += stride;
742
0
  }
743
0
}
744
745
// Directional prediction, zone 3: 180 < angle < 270
746
void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
747
                                   int bh, const uint16_t *above,
748
                                   const uint16_t *left, int upsample_left,
749
0
                                   int dx, int dy, int bd) {
750
0
  int r, c, y, base, shift, val;
751
752
0
  (void)above;
753
0
  (void)dx;
754
0
  (void)bd;
755
0
  assert(dx == 1);
756
0
  assert(dy > 0);
757
758
0
  const int max_base_y = (bw + bh - 1) << upsample_left;
759
0
  const int frac_bits = 6 - upsample_left;
760
0
  const int base_inc = 1 << upsample_left;
761
0
  y = dy;
762
0
  for (c = 0; c < bw; ++c, y += dy) {
763
0
    base = y >> frac_bits;
764
0
    shift = ((y << upsample_left) & 0x3F) >> 1;
765
766
0
    for (r = 0; r < bh; ++r, base += base_inc) {
767
0
      if (base < max_base_y) {
768
0
        val = left[base] * (32 - shift) + left[base + 1] * shift;
769
0
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
770
0
      } else {
771
0
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
772
0
        break;
773
0
      }
774
0
    }
775
0
  }
776
0
}
777
778
static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
779
                                TX_SIZE tx_size, const uint16_t *above,
780
                                const uint16_t *left, int upsample_above,
781
0
                                int upsample_left, int angle, int bd) {
782
0
  const int dx = av1_get_dx(angle);
783
0
  const int dy = av1_get_dy(angle);
784
0
  const int bw = tx_size_wide[tx_size];
785
0
  const int bh = tx_size_high[tx_size];
786
0
  assert(angle > 0 && angle < 270);
787
788
0
  if (angle > 0 && angle < 90) {
789
0
    av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
790
0
                                upsample_above, dx, dy, bd);
791
0
  } else if (angle > 90 && angle < 180) {
792
0
    av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
793
0
                                upsample_above, upsample_left, dx, dy, bd);
794
0
  } else if (angle > 180 && angle < 270) {
795
0
    av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
796
0
                                dx, dy, bd);
797
0
  } else if (angle == 90) {
798
0
    pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
799
0
  } else if (angle == 180) {
800
0
    pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
801
0
  }
802
0
}
803
#endif  // CONFIG_AV1_HIGHBITDEPTH
804
805
DECLARE_ALIGNED(16, const int8_t,
806
                av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
807
  {
808
      { -6, 10, 0, 0, 0, 12, 0, 0 },
809
      { -5, 2, 10, 0, 0, 9, 0, 0 },
810
      { -3, 1, 1, 10, 0, 7, 0, 0 },
811
      { -3, 1, 1, 2, 10, 5, 0, 0 },
812
      { -4, 6, 0, 0, 0, 2, 12, 0 },
813
      { -3, 2, 6, 0, 0, 2, 9, 0 },
814
      { -3, 2, 2, 6, 0, 2, 7, 0 },
815
      { -3, 1, 2, 2, 6, 3, 5, 0 },
816
  },
817
  {
818
      { -10, 16, 0, 0, 0, 10, 0, 0 },
819
      { -6, 0, 16, 0, 0, 6, 0, 0 },
820
      { -4, 0, 0, 16, 0, 4, 0, 0 },
821
      { -2, 0, 0, 0, 16, 2, 0, 0 },
822
      { -10, 16, 0, 0, 0, 0, 10, 0 },
823
      { -6, 0, 16, 0, 0, 0, 6, 0 },
824
      { -4, 0, 0, 16, 0, 0, 4, 0 },
825
      { -2, 0, 0, 0, 16, 0, 2, 0 },
826
  },
827
  {
828
      { -8, 8, 0, 0, 0, 16, 0, 0 },
829
      { -8, 0, 8, 0, 0, 16, 0, 0 },
830
      { -8, 0, 0, 8, 0, 16, 0, 0 },
831
      { -8, 0, 0, 0, 8, 16, 0, 0 },
832
      { -4, 4, 0, 0, 0, 0, 16, 0 },
833
      { -4, 0, 4, 0, 0, 0, 16, 0 },
834
      { -4, 0, 0, 4, 0, 0, 16, 0 },
835
      { -4, 0, 0, 0, 4, 0, 16, 0 },
836
  },
837
  {
838
      { -2, 8, 0, 0, 0, 10, 0, 0 },
839
      { -1, 3, 8, 0, 0, 6, 0, 0 },
840
      { -1, 2, 3, 8, 0, 4, 0, 0 },
841
      { 0, 1, 2, 3, 8, 2, 0, 0 },
842
      { -1, 4, 0, 0, 0, 3, 10, 0 },
843
      { -1, 3, 4, 0, 0, 4, 6, 0 },
844
      { -1, 2, 3, 4, 0, 4, 4, 0 },
845
      { -1, 2, 2, 3, 4, 3, 3, 0 },
846
  },
847
  {
848
      { -12, 14, 0, 0, 0, 14, 0, 0 },
849
      { -10, 0, 14, 0, 0, 12, 0, 0 },
850
      { -9, 0, 0, 14, 0, 11, 0, 0 },
851
      { -8, 0, 0, 0, 14, 10, 0, 0 },
852
      { -10, 12, 0, 0, 0, 0, 14, 0 },
853
      { -9, 1, 12, 0, 0, 0, 12, 0 },
854
      { -8, 0, 0, 12, 0, 1, 11, 0 },
855
      { -7, 0, 0, 1, 12, 1, 9, 0 },
856
  },
857
};
858
859
void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
860
                                  TX_SIZE tx_size, const uint8_t *above,
861
498k
                                  const uint8_t *left, int mode) {
862
498k
  int r, c;
863
498k
  uint8_t buffer[33][33];
864
498k
  const int bw = tx_size_wide[tx_size];
865
498k
  const int bh = tx_size_high[tx_size];
866
867
498k
  assert(bw <= 32 && bh <= 32);
868
869
3.69M
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
870
498k
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
871
872
2.09M
  for (r = 1; r < bh + 1; r += 2)
873
5.69M
    for (c = 1; c < bw + 1; c += 4) {
874
4.09M
      const uint8_t p0 = buffer[r - 1][c - 1];
875
4.09M
      const uint8_t p1 = buffer[r - 1][c];
876
4.09M
      const uint8_t p2 = buffer[r - 1][c + 1];
877
4.09M
      const uint8_t p3 = buffer[r - 1][c + 2];
878
4.09M
      const uint8_t p4 = buffer[r - 1][c + 3];
879
4.09M
      const uint8_t p5 = buffer[r][c - 1];
880
4.09M
      const uint8_t p6 = buffer[r + 1][c - 1];
881
36.8M
      for (int k = 0; k < 8; ++k) {
882
32.7M
        int r_offset = k >> 2;
883
32.7M
        int c_offset = k & 0x03;
884
32.7M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
885
32.7M
                 av1_filter_intra_taps[mode][k][1] * p1 +
886
32.7M
                 av1_filter_intra_taps[mode][k][2] * p2 +
887
32.7M
                 av1_filter_intra_taps[mode][k][3] * p3 +
888
32.7M
                 av1_filter_intra_taps[mode][k][4] * p4 +
889
32.7M
                 av1_filter_intra_taps[mode][k][5] * p5 +
890
32.7M
                 av1_filter_intra_taps[mode][k][6] * p6;
891
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
892
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
893
        // Since Clip1() clips a negative value to 0, it is safe to replace
894
        // Round2Signed() with Round2().
895
32.7M
        buffer[r + r_offset][c + c_offset] =
896
32.7M
            clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
897
32.7M
      }
898
4.09M
    }
899
900
3.69M
  for (r = 0; r < bh; ++r) {
901
3.19M
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
902
3.19M
    dst += stride;
903
3.19M
  }
904
498k
}
905
906
#if CONFIG_AV1_HIGHBITDEPTH
907
static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
908
                                          TX_SIZE tx_size,
909
                                          const uint16_t *above,
910
                                          const uint16_t *left, int mode,
911
0
                                          int bd) {
912
0
  int r, c;
913
0
  uint16_t buffer[33][33];
914
0
  const int bw = tx_size_wide[tx_size];
915
0
  const int bh = tx_size_high[tx_size];
916
917
0
  assert(bw <= 32 && bh <= 32);
918
919
0
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
920
0
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
921
922
0
  for (r = 1; r < bh + 1; r += 2)
923
0
    for (c = 1; c < bw + 1; c += 4) {
924
0
      const uint16_t p0 = buffer[r - 1][c - 1];
925
0
      const uint16_t p1 = buffer[r - 1][c];
926
0
      const uint16_t p2 = buffer[r - 1][c + 1];
927
0
      const uint16_t p3 = buffer[r - 1][c + 2];
928
0
      const uint16_t p4 = buffer[r - 1][c + 3];
929
0
      const uint16_t p5 = buffer[r][c - 1];
930
0
      const uint16_t p6 = buffer[r + 1][c - 1];
931
0
      for (int k = 0; k < 8; ++k) {
932
0
        int r_offset = k >> 2;
933
0
        int c_offset = k & 0x03;
934
0
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
935
0
                 av1_filter_intra_taps[mode][k][1] * p1 +
936
0
                 av1_filter_intra_taps[mode][k][2] * p2 +
937
0
                 av1_filter_intra_taps[mode][k][3] * p3 +
938
0
                 av1_filter_intra_taps[mode][k][4] * p4 +
939
0
                 av1_filter_intra_taps[mode][k][5] * p5 +
940
0
                 av1_filter_intra_taps[mode][k][6] * p6;
941
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
942
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
943
        // Since Clip1() clips a negative value to 0, it is safe to replace
944
        // Round2Signed() with Round2().
945
0
        buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
946
0
            ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
947
0
      }
948
0
    }
949
950
0
  for (r = 0; r < bh; ++r) {
951
0
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
952
0
    dst += stride;
953
0
  }
954
0
}
955
#endif  // CONFIG_AV1_HIGHBITDEPTH
956
957
75.9M
static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
958
75.9M
  if (plane == 0) {
959
73.9M
    const PREDICTION_MODE mode = mbmi->mode;
960
73.9M
    return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
961
73.9M
            mode == SMOOTH_H_PRED);
962
73.9M
  } else {
963
    // uv_mode is not set for inter blocks, so need to explicitly
964
    // detect that case.
965
2.01M
    if (is_inter_block(mbmi)) return 0;
966
967
2.01M
    const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
968
2.06M
    return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
969
2.06M
            uv_mode == UV_SMOOTH_H_PRED);
970
2.01M
  }
971
75.9M
}
972
973
51.9M
static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
974
51.9M
  int ab_sm, le_sm;
975
976
51.9M
  if (plane == 0) {
977
50.5M
    const MB_MODE_INFO *ab = xd->above_mbmi;
978
50.5M
    const MB_MODE_INFO *le = xd->left_mbmi;
979
50.5M
    ab_sm = ab ? is_smooth(ab, plane) : 0;
980
50.5M
    le_sm = le ? is_smooth(le, plane) : 0;
981
50.5M
  } else {
982
1.45M
    const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
983
1.45M
    const MB_MODE_INFO *le = xd->chroma_left_mbmi;
984
1.45M
    ab_sm = ab ? is_smooth(ab, plane) : 0;
985
1.45M
    le_sm = le ? is_smooth(le, plane) : 0;
986
1.45M
  }
987
988
51.9M
  return (ab_sm || le_sm) ? 1 : 0;
989
51.9M
}
990
991
57.5M
static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
992
57.5M
  const int d = abs(delta);
993
57.5M
  int strength = 0;
994
995
57.5M
  const int blk_wh = bs0 + bs1;
996
57.5M
  if (type == 0) {
997
57.5M
    if (blk_wh <= 8) {
998
48.5M
      if (d >= 56) strength = 1;
999
48.5M
    } else if (blk_wh <= 12) {
1000
42.2k
      if (d >= 40) strength = 1;
1001
8.88M
    } else if (blk_wh <= 16) {
1002
3.82M
      if (d >= 40) strength = 1;
1003
5.05M
    } else if (blk_wh <= 24) {
1004
396k
      if (d >= 8) strength = 1;
1005
396k
      if (d >= 16) strength = 2;
1006
396k
      if (d >= 32) strength = 3;
1007
4.66M
    } else if (blk_wh <= 32) {
1008
1.81M
      if (d >= 1) strength = 1;
1009
1.81M
      if (d >= 4) strength = 2;
1010
1.81M
      if (d >= 32) strength = 3;
1011
2.84M
    } else {
1012
2.95M
      if (d >= 1) strength = 3;
1013
2.84M
    }
1014
57.5M
  } else {
1015
85.5k
    if (blk_wh <= 8) {
1016
46.6k
      if (d >= 40) strength = 1;
1017
46.6k
      if (d >= 64) strength = 2;
1018
46.6k
    } else if (blk_wh <= 16) {
1019
36.7k
      if (d >= 20) strength = 1;
1020
36.7k
      if (d >= 48) strength = 2;
1021
36.7k
    } else if (blk_wh <= 24) {
1022
540
      if (d >= 4) strength = 3;
1023
1.51k
    } else {
1024
1.51k
      if (d >= 1) strength = 3;
1025
1.51k
    }
1026
85.5k
  }
1027
57.5M
  return strength;
1028
57.5M
}
1029
1030
57.5M
void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1031
57.5M
  if (!strength) return;
1032
1033
22.1M
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1034
22.1M
                                                         { 0, 5, 6, 5, 0 },
1035
22.1M
                                                         { 2, 4, 4, 4, 2 } };
1036
22.1M
  const int filt = strength - 1;
1037
22.1M
  uint8_t edge[129];
1038
1039
22.1M
  memcpy(edge, p, sz * sizeof(*p));
1040
266M
  for (int i = 1; i < sz; i++) {
1041
244M
    int s = 0;
1042
1.46G
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1043
1.22G
      int k = i - 2 + j;
1044
1.22G
      k = (k < 0) ? 0 : k;
1045
1.22G
      k = (k > sz - 1) ? sz - 1 : k;
1046
1.22G
      s += edge[k] * kernel[filt][j];
1047
1.22G
    }
1048
244M
    s = (s + 8) >> 4;
1049
244M
    p[i] = s;
1050
244M
  }
1051
22.1M
}
1052
1053
2.23M
static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1054
2.23M
  const int kernel[3] = { 5, 6, 5 };
1055
1056
2.23M
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1057
2.23M
          (p_above[0] * kernel[2]);
1058
2.23M
  s = (s + 8) >> 4;
1059
2.23M
  p_above[-1] = s;
1060
2.23M
  p_left[-1] = s;
1061
2.23M
}
1062
1063
0
void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
1064
0
  if (!strength) return;
1065
1066
0
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1067
0
                                                         { 0, 5, 6, 5, 0 },
1068
0
                                                         { 2, 4, 4, 4, 2 } };
1069
0
  const int filt = strength - 1;
1070
0
  uint16_t edge[129];
1071
1072
0
  memcpy(edge, p, sz * sizeof(*p));
1073
0
  for (int i = 1; i < sz; i++) {
1074
0
    int s = 0;
1075
0
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1076
0
      int k = i - 2 + j;
1077
0
      k = (k < 0) ? 0 : k;
1078
0
      k = (k > sz - 1) ? sz - 1 : k;
1079
0
      s += edge[k] * kernel[filt][j];
1080
0
    }
1081
0
    s = (s + 8) >> 4;
1082
0
    p[i] = s;
1083
0
  }
1084
0
}
1085
1086
#if CONFIG_AV1_HIGHBITDEPTH
1087
0
static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
1088
0
  const int kernel[3] = { 5, 6, 5 };
1089
1090
0
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1091
0
          (p_above[0] * kernel[2]);
1092
0
  s = (s + 8) >> 4;
1093
0
  p_above[-1] = s;
1094
0
  p_left[-1] = s;
1095
0
}
1096
#endif
1097
1098
36.0M
void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1099
  // interpolate half-sample positions
1100
36.0M
  assert(sz <= MAX_UPSAMPLE_SZ);
1101
1102
36.0M
  uint8_t in[MAX_UPSAMPLE_SZ + 3];
1103
  // copy p[-1..(sz-1)] and extend first and last samples
1104
36.0M
  in[0] = p[-1];
1105
36.0M
  in[1] = p[-1];
1106
268M
  for (int i = 0; i < sz; i++) {
1107
232M
    in[i + 2] = p[i];
1108
232M
  }
1109
36.0M
  in[sz + 2] = p[sz - 1];
1110
1111
  // interpolate half-sample edge positions
1112
36.0M
  p[-2] = in[0];
1113
267M
  for (int i = 0; i < sz; i++) {
1114
231M
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1115
231M
    s = clip_pixel((s + 8) >> 4);
1116
231M
    p[2 * i - 1] = s;
1117
231M
    p[2 * i] = in[i + 2];
1118
231M
  }
1119
36.0M
}
1120
1121
0
void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
1122
  // interpolate half-sample positions
1123
0
  assert(sz <= MAX_UPSAMPLE_SZ);
1124
1125
0
  uint16_t in[MAX_UPSAMPLE_SZ + 3];
1126
  // copy p[-1..(sz-1)] and extend first and last samples
1127
0
  in[0] = p[-1];
1128
0
  in[1] = p[-1];
1129
0
  for (int i = 0; i < sz; i++) {
1130
0
    in[i + 2] = p[i];
1131
0
  }
1132
0
  in[sz + 2] = p[sz - 1];
1133
1134
  // interpolate half-sample edge positions
1135
0
  p[-2] = in[0];
1136
0
  for (int i = 0; i < sz; i++) {
1137
0
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1138
0
    s = (s + 8) >> 4;
1139
0
    s = clip_pixel_highbd(s, bd);
1140
0
    p[2 * i - 1] = s;
1141
0
    p[2 * i] = in[i + 2];
1142
0
  }
1143
0
}
1144
#if CONFIG_AV1_HIGHBITDEPTH
1145
static void build_intra_predictors_high(
1146
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1147
    PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode,
1148
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1149
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1150
0
    int bit_depth) {
1151
0
  int i;
1152
0
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1153
0
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1154
0
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1155
0
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1156
0
  uint16_t *const above_row = above_data + 16;
1157
0
  uint16_t *const left_col = left_data + 16;
1158
0
  const int txwpx = tx_size_wide[tx_size];
1159
0
  const int txhpx = tx_size_high[tx_size];
1160
0
  int need_left = extend_modes[mode] & NEED_LEFT;
1161
0
  int need_above = extend_modes[mode] & NEED_ABOVE;
1162
0
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1163
0
  const uint16_t *above_ref = ref - ref_stride;
1164
0
  const uint16_t *left_ref = ref - 1;
1165
0
  int p_angle = 0;
1166
0
  const int is_dr_mode = av1_is_directional_mode(mode);
1167
0
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1168
0
  int base = 128 << (bit_depth - 8);
1169
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1170
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1171
  // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1172
  // seen to be the potential reason for this issue.
1173
0
  aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1174
0
  aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1175
1176
  // The default values if ref pixels are not available:
1177
  // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1178
  // base+1   A      B  ..     Y      Z
1179
  // base+1   C      D  ..     W      X
1180
  // base+1   E      F  ..     U      V
1181
  // base+1   G      H  ..     S      T      T      T      T      T
1182
1183
0
  if (is_dr_mode) {
1184
0
    p_angle = mode_to_angle_map[mode] + angle_delta;
1185
0
    if (p_angle <= 90)
1186
0
      need_above = 1, need_left = 0, need_above_left = 1;
1187
0
    else if (p_angle < 180)
1188
0
      need_above = 1, need_left = 1, need_above_left = 1;
1189
0
    else
1190
0
      need_above = 0, need_left = 1, need_above_left = 1;
1191
0
  }
1192
0
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1193
1194
0
  assert(n_top_px >= 0);
1195
0
  assert(n_topright_px >= 0);
1196
0
  assert(n_left_px >= 0);
1197
0
  assert(n_bottomleft_px >= 0);
1198
1199
0
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1200
0
    int val;
1201
0
    if (need_left) {
1202
0
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1203
0
    } else {
1204
0
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1205
0
    }
1206
0
    for (i = 0; i < txhpx; ++i) {
1207
0
      aom_memset16(dst, val, txwpx);
1208
0
      dst += dst_stride;
1209
0
    }
1210
0
    return;
1211
0
  }
1212
1213
  // NEED_LEFT
1214
0
  if (need_left) {
1215
0
    int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
1216
0
    if (use_filter_intra) need_bottom = 0;
1217
0
    if (is_dr_mode) need_bottom = p_angle > 180;
1218
0
    const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1219
0
    i = 0;
1220
0
    if (n_left_px > 0) {
1221
0
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1222
0
      if (need_bottom && n_bottomleft_px > 0) {
1223
0
        assert(i == txhpx);
1224
0
        for (; i < txhpx + n_bottomleft_px; i++)
1225
0
          left_col[i] = left_ref[i * ref_stride];
1226
0
      }
1227
0
      if (i < num_left_pixels_needed)
1228
0
        aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1229
0
    } else if (n_top_px > 0) {
1230
0
      aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1231
0
    }
1232
0
  }
1233
1234
  // NEED_ABOVE
1235
0
  if (need_above) {
1236
0
    int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
1237
0
    if (use_filter_intra) need_right = 0;
1238
0
    if (is_dr_mode) need_right = p_angle < 90;
1239
0
    const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1240
0
    if (n_top_px > 0) {
1241
0
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1242
0
      i = n_top_px;
1243
0
      if (need_right && n_topright_px > 0) {
1244
0
        assert(n_top_px == txwpx);
1245
0
        memcpy(above_row + txwpx, above_ref + txwpx,
1246
0
               n_topright_px * sizeof(above_ref[0]));
1247
0
        i += n_topright_px;
1248
0
      }
1249
0
      if (i < num_top_pixels_needed)
1250
0
        aom_memset16(&above_row[i], above_row[i - 1],
1251
0
                     num_top_pixels_needed - i);
1252
0
    } else if (n_left_px > 0) {
1253
0
      aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1254
0
    }
1255
0
  }
1256
1257
0
  if (need_above_left) {
1258
0
    if (n_top_px > 0 && n_left_px > 0) {
1259
0
      above_row[-1] = above_ref[-1];
1260
0
    } else if (n_top_px > 0) {
1261
0
      above_row[-1] = above_ref[0];
1262
0
    } else if (n_left_px > 0) {
1263
0
      above_row[-1] = left_ref[0];
1264
0
    } else {
1265
0
      above_row[-1] = base;
1266
0
    }
1267
0
    left_col[-1] = above_row[-1];
1268
0
  }
1269
1270
0
  if (use_filter_intra) {
1271
0
    highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1272
0
                                  filter_intra_mode, bit_depth);
1273
0
    return;
1274
0
  }
1275
1276
0
  if (is_dr_mode) {
1277
0
    int upsample_above = 0;
1278
0
    int upsample_left = 0;
1279
0
    if (!disable_edge_filter) {
1280
0
      const int need_right = p_angle < 90;
1281
0
      const int need_bottom = p_angle > 180;
1282
0
      if (p_angle != 90 && p_angle != 180) {
1283
0
        const int ab_le = need_above_left ? 1 : 0;
1284
0
        if (need_above && need_left && (txwpx + txhpx >= 24)) {
1285
0
          filter_intra_edge_corner_high(above_row, left_col);
1286
0
        }
1287
0
        if (need_above && n_top_px > 0) {
1288
0
          const int strength = intra_edge_filter_strength(
1289
0
              txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1290
0
          const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1291
0
          av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
1292
0
        }
1293
0
        if (need_left && n_left_px > 0) {
1294
0
          const int strength = intra_edge_filter_strength(
1295
0
              txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1296
0
          const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1297
0
          av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
1298
0
        }
1299
0
      }
1300
0
      upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1301
0
                                                   intra_edge_filter_type);
1302
0
      if (need_above && upsample_above) {
1303
0
        const int n_px = txwpx + (need_right ? txhpx : 0);
1304
0
        av1_upsample_intra_edge_high(above_row, n_px, bit_depth);
1305
0
      }
1306
0
      upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1307
0
                                                  intra_edge_filter_type);
1308
0
      if (need_left && upsample_left) {
1309
0
        const int n_px = txhpx + (need_bottom ? txwpx : 0);
1310
0
        av1_upsample_intra_edge_high(left_col, n_px, bit_depth);
1311
0
      }
1312
0
    }
1313
0
    highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1314
0
                        upsample_above, upsample_left, p_angle, bit_depth);
1315
0
    return;
1316
0
  }
1317
1318
  // predict
1319
0
  if (mode == DC_PRED) {
1320
0
    dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1321
0
        dst, dst_stride, above_row, left_col, bit_depth);
1322
0
  } else {
1323
0
    pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1324
0
  }
1325
0
}
1326
#endif  // CONFIG_AV1_HIGHBITDEPTH
1327
1328
static void build_intra_predictors(
1329
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1330
    PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode,
1331
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1332
51.8M
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1333
51.8M
  int i;
1334
51.8M
  const uint8_t *above_ref = ref - ref_stride;
1335
51.8M
  const uint8_t *left_ref = ref - 1;
1336
51.8M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1337
51.8M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1338
51.8M
  uint8_t *const above_row = above_data + 16;
1339
51.8M
  uint8_t *const left_col = left_data + 16;
1340
51.8M
  const int txwpx = tx_size_wide[tx_size];
1341
51.8M
  const int txhpx = tx_size_high[tx_size];
1342
51.8M
  int need_left = extend_modes[mode] & NEED_LEFT;
1343
51.8M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1344
51.8M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1345
51.8M
  int p_angle = 0;
1346
51.8M
  const int is_dr_mode = av1_is_directional_mode(mode);
1347
51.8M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1348
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1349
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1350
  // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1351
  // be the potential reason for this issue.
1352
51.8M
  memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1353
51.8M
  memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1354
1355
  // The default values if ref pixels are not available:
1356
  // 128 127 127 .. 127 127 127 127 127 127
1357
  // 129  A   B  ..  Y   Z
1358
  // 129  C   D  ..  W   X
1359
  // 129  E   F  ..  U   V
1360
  // 129  G   H  ..  S   T   T   T   T   T
1361
  // ..
1362
1363
51.8M
  if (is_dr_mode) {
1364
43.7M
    p_angle = mode_to_angle_map[mode] + angle_delta;
1365
43.7M
    if (p_angle <= 90)
1366
12.3M
      need_above = 1, need_left = 0, need_above_left = 1;
1367
31.4M
    else if (p_angle < 180)
1368
20.3M
      need_above = 1, need_left = 1, need_above_left = 1;
1369
11.0M
    else
1370
11.0M
      need_above = 0, need_left = 1, need_above_left = 1;
1371
43.7M
  }
1372
51.8M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1373
1374
51.8M
  assert(n_top_px >= 0);
1375
51.8M
  assert(n_topright_px >= 0);
1376
51.8M
  assert(n_left_px >= 0);
1377
51.8M
  assert(n_bottomleft_px >= 0);
1378
1379
51.8M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1380
1.59M
    int val;
1381
1.59M
    if (need_left) {
1382
661k
      val = (n_top_px > 0) ? above_ref[0] : 129;
1383
932k
    } else {
1384
932k
      val = (n_left_px > 0) ? left_ref[0] : 127;
1385
932k
    }
1386
22.6M
    for (i = 0; i < txhpx; ++i) {
1387
21.0M
      memset(dst, val, txwpx);
1388
21.0M
      dst += dst_stride;
1389
21.0M
    }
1390
1.59M
    return;
1391
1.59M
  }
1392
1393
  // NEED_LEFT
1394
50.2M
  if (need_left) {
1395
38.9M
    int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
1396
38.9M
    if (use_filter_intra) need_bottom = 0;
1397
38.9M
    if (is_dr_mode) need_bottom = p_angle > 180;
1398
38.9M
    const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1399
38.9M
    i = 0;
1400
38.9M
    if (n_left_px > 0) {
1401
261M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1402
37.2M
      if (need_bottom && n_bottomleft_px > 0) {
1403
1.49M
        assert(i == txhpx);
1404
14.3M
        for (; i < txhpx + n_bottomleft_px; i++)
1405
12.8M
          left_col[i] = left_ref[i * ref_stride];
1406
1.49M
      }
1407
37.2M
      if (i < num_left_pixels_needed)
1408
8.49M
        memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1409
37.2M
    } else if (n_top_px > 0) {
1410
1.46M
      memset(left_col, above_ref[0], num_left_pixels_needed);
1411
1.46M
    }
1412
38.9M
  }
1413
1414
  // NEED_ABOVE
1415
50.2M
  if (need_above) {
1416
39.6M
    int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
1417
39.6M
    if (use_filter_intra) need_right = 0;
1418
39.6M
    if (is_dr_mode) need_right = p_angle < 90;
1419
39.6M
    const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1420
39.6M
    if (n_top_px > 0) {
1421
37.6M
      memcpy(above_row, above_ref, n_top_px);
1422
37.6M
      i = n_top_px;
1423
37.6M
      if (need_right && n_topright_px > 0) {
1424
8.61M
        assert(n_top_px == txwpx);
1425
8.61M
        memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1426
8.61M
        i += n_topright_px;
1427
8.61M
      }
1428
37.6M
      if (i < num_top_pixels_needed)
1429
2.04M
        memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1430
37.6M
    } else if (n_left_px > 0) {
1431
1.75M
      memset(above_row, left_ref[0], num_top_pixels_needed);
1432
1.75M
    }
1433
39.6M
  }
1434
1435
50.2M
  if (need_above_left) {
1436
43.8M
    if (n_top_px > 0 && n_left_px > 0) {
1437
39.9M
      above_row[-1] = above_ref[-1];
1438
39.9M
    } else if (n_top_px > 0) {
1439
1.78M
      above_row[-1] = above_ref[0];
1440
2.16M
    } else if (n_left_px > 0) {
1441
2.06M
      above_row[-1] = left_ref[0];
1442
2.06M
    } else {
1443
106k
      above_row[-1] = 128;
1444
106k
    }
1445
43.8M
    left_col[-1] = above_row[-1];
1446
43.8M
  }
1447
1448
50.2M
  if (use_filter_intra) {
1449
498k
    av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1450
498k
                               filter_intra_mode);
1451
498k
    return;
1452
498k
  }
1453
1454
49.7M
  if (is_dr_mode) {
1455
42.1M
    int upsample_above = 0;
1456
42.1M
    int upsample_left = 0;
1457
42.1M
    if (!disable_edge_filter) {
1458
42.1M
      const int need_right = p_angle < 90;
1459
42.1M
      const int need_bottom = p_angle > 180;
1460
42.1M
      if (p_angle != 90 && p_angle != 180) {
1461
40.0M
        const int ab_le = need_above_left ? 1 : 0;
1462
40.0M
        if (need_above && need_left && (txwpx + txhpx >= 24)) {
1463
2.23M
          filter_intra_edge_corner(above_row, left_col);
1464
2.23M
        }
1465
40.0M
        if (need_above && n_top_px > 0) {
1466
29.1M
          const int strength = intra_edge_filter_strength(
1467
29.1M
              txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1468
29.1M
          const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1469
29.1M
          av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1470
29.1M
        }
1471
40.0M
        if (need_left && n_left_px > 0) {
1472
28.7M
          const int strength = intra_edge_filter_strength(
1473
28.7M
              txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1474
28.7M
          const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1475
28.7M
          av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1476
28.7M
        }
1477
40.0M
      }
1478
42.1M
      upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1479
42.1M
                                                   intra_edge_filter_type);
1480
42.1M
      if (need_above && upsample_above) {
1481
18.4M
        const int n_px = txwpx + (need_right ? txhpx : 0);
1482
18.4M
        av1_upsample_intra_edge(above_row, n_px);
1483
18.4M
      }
1484
42.1M
      upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1485
42.1M
                                                  intra_edge_filter_type);
1486
42.1M
      if (need_left && upsample_left) {
1487
17.7M
        const int n_px = txhpx + (need_bottom ? txwpx : 0);
1488
17.7M
        av1_upsample_intra_edge(left_col, n_px);
1489
17.7M
      }
1490
42.1M
    }
1491
42.1M
    dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1492
42.1M
                 upsample_left, p_angle);
1493
42.1M
    return;
1494
42.1M
  }
1495
1496
  // predict
1497
7.55M
  if (mode == DC_PRED) {
1498
4.07M
    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1499
4.07M
                                                  left_col);
1500
4.07M
  } else {
1501
3.48M
    pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1502
3.48M
  }
1503
7.55M
}
1504
1505
static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1506
1.48M
                                            int subsampling_y) {
1507
1.48M
  assert(subsampling_x >= 0 && subsampling_x < 2);
1508
1.48M
  assert(subsampling_y >= 0 && subsampling_y < 2);
1509
1.48M
  BLOCK_SIZE bs = bsize;
1510
1.48M
  switch (bsize) {
1511
2.71k
    case BLOCK_4X4:
1512
2.71k
      if (subsampling_x == 1 && subsampling_y == 1)
1513
2.71k
        bs = BLOCK_8X8;
1514
0
      else if (subsampling_x == 1)
1515
0
        bs = BLOCK_8X4;
1516
0
      else if (subsampling_y == 1)
1517
0
        bs = BLOCK_4X8;
1518
2.71k
      break;
1519
2.47k
    case BLOCK_4X8:
1520
2.47k
      if (subsampling_x == 1 && subsampling_y == 1)
1521
2.47k
        bs = BLOCK_8X8;
1522
0
      else if (subsampling_x == 1)
1523
0
        bs = BLOCK_8X8;
1524
0
      else if (subsampling_y == 1)
1525
0
        bs = BLOCK_4X8;
1526
2.47k
      break;
1527
2.58k
    case BLOCK_8X4:
1528
2.58k
      if (subsampling_x == 1 && subsampling_y == 1)
1529
2.58k
        bs = BLOCK_8X8;
1530
0
      else if (subsampling_x == 1)
1531
0
        bs = BLOCK_8X4;
1532
0
      else if (subsampling_y == 1)
1533
0
        bs = BLOCK_8X8;
1534
2.58k
      break;
1535
0
    case BLOCK_4X16:
1536
0
      if (subsampling_x == 1 && subsampling_y == 1)
1537
0
        bs = BLOCK_8X16;
1538
0
      else if (subsampling_x == 1)
1539
0
        bs = BLOCK_8X16;
1540
0
      else if (subsampling_y == 1)
1541
0
        bs = BLOCK_4X16;
1542
0
      break;
1543
0
    case BLOCK_16X4:
1544
0
      if (subsampling_x == 1 && subsampling_y == 1)
1545
0
        bs = BLOCK_16X8;
1546
0
      else if (subsampling_x == 1)
1547
0
        bs = BLOCK_16X4;
1548
0
      else if (subsampling_y == 1)
1549
0
        bs = BLOCK_16X8;
1550
0
      break;
1551
1.47M
    default: break;
1552
1.48M
  }
1553
1.48M
  return bs;
1554
1.48M
}
1555
1556
void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1557
                             int enable_intra_edge_filter, int wpx, int hpx,
1558
                             TX_SIZE tx_size, PREDICTION_MODE mode,
1559
                             int angle_delta, int use_palette,
1560
                             FILTER_INTRA_MODE filter_intra_mode,
1561
                             const uint8_t *ref, int ref_stride, uint8_t *dst,
1562
                             int dst_stride, int col_off, int row_off,
1563
51.7M
                             int plane) {
1564
51.7M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1565
51.7M
  const int txwpx = tx_size_wide[tx_size];
1566
51.7M
  const int txhpx = tx_size_high[tx_size];
1567
51.7M
  const int x = col_off << MI_SIZE_LOG2;
1568
51.7M
  const int y = row_off << MI_SIZE_LOG2;
1569
1570
51.7M
  if (use_palette) {
1571
0
    int r, c;
1572
0
    const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1573
0
                               xd->color_index_map_offset[plane != 0];
1574
0
    const uint16_t *const palette =
1575
0
        mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1576
0
    if (is_cur_buf_hbd(xd)) {
1577
0
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1578
0
      for (r = 0; r < txhpx; ++r) {
1579
0
        for (c = 0; c < txwpx; ++c) {
1580
0
          dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1581
0
        }
1582
0
      }
1583
0
    } else {
1584
0
      for (r = 0; r < txhpx; ++r) {
1585
0
        for (c = 0; c < txwpx; ++c) {
1586
0
          dst[r * dst_stride + c] =
1587
0
              (uint8_t)palette[map[(r + y) * wpx + c + x]];
1588
0
        }
1589
0
      }
1590
0
    }
1591
0
    return;
1592
0
  }
1593
1594
51.7M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
1595
51.7M
  const int txw = tx_size_wide_unit[tx_size];
1596
51.7M
  const int txh = tx_size_high_unit[tx_size];
1597
51.7M
  const int ss_x = pd->subsampling_x;
1598
51.7M
  const int ss_y = pd->subsampling_y;
1599
51.7M
  const int have_top =
1600
51.7M
      row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1601
51.7M
  const int have_left =
1602
51.7M
      col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1603
51.7M
  const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1604
51.7M
  const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1605
1606
  // Distance between the right edge of this prediction block to
1607
  // the frame right edge
1608
51.7M
  const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1609
  // Distance between the bottom edge of this prediction block to
1610
  // the frame bottom edge
1611
51.7M
  const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1612
51.7M
  const int right_available =
1613
51.7M
      mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1614
51.7M
  const int bottom_available =
1615
51.7M
      (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1616
1617
51.7M
  const PARTITION_TYPE partition = mbmi->partition;
1618
1619
51.7M
  BLOCK_SIZE bsize = mbmi->bsize;
1620
  // force 4x4 chroma component block size.
1621
51.7M
  if (ss_x || ss_y) {
1622
1.48M
    bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1623
1.48M
  }
1624
1625
51.7M
  const int have_top_right =
1626
51.7M
      has_top_right(sb_size, bsize, mi_row, mi_col, have_top, right_available,
1627
51.7M
                    partition, tx_size, row_off, col_off, ss_x, ss_y);
1628
51.7M
  const int have_bottom_left = has_bottom_left(
1629
51.7M
      sb_size, bsize, mi_row, mi_col, bottom_available, have_left, partition,
1630
51.7M
      tx_size, row_off, col_off, ss_x, ss_y);
1631
1632
51.7M
  const int disable_edge_filter = !enable_intra_edge_filter;
1633
51.7M
  const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1634
51.7M
#if CONFIG_AV1_HIGHBITDEPTH
1635
51.7M
  if (is_cur_buf_hbd(xd)) {
1636
0
    build_intra_predictors_high(
1637
0
        ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode,
1638
0
        tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1639
0
        have_top_right ? AOMMIN(txwpx, xr) : 0,
1640
0
        have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1641
0
        have_bottom_left ? AOMMIN(txhpx, yd) : 0, intra_edge_filter_type,
1642
0
        xd->bd);
1643
0
    return;
1644
0
  }
1645
51.7M
#endif
1646
51.7M
  build_intra_predictors(
1647
51.7M
      ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode,
1648
51.7M
      tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1649
51.7M
      have_top_right ? AOMMIN(txwpx, xr) : 0,
1650
51.7M
      have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1651
51.7M
      have_bottom_left ? AOMMIN(txhpx, yd) : 0, intra_edge_filter_type);
1652
51.7M
}
1653
1654
void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1655
                                    int plane, int blk_col, int blk_row,
1656
52.2M
                                    TX_SIZE tx_size) {
1657
52.2M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1658
52.2M
  struct macroblockd_plane *const pd = &xd->plane[plane];
1659
52.2M
  const int dst_stride = pd->dst.stride;
1660
52.2M
  uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1661
52.2M
  const PREDICTION_MODE mode =
1662
52.2M
      (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1663
52.2M
  const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1664
52.2M
  const FILTER_INTRA_MODE filter_intra_mode =
1665
52.2M
      (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1666
52.2M
          ? mbmi->filter_intra_mode_info.filter_intra_mode
1667
52.2M
          : FILTER_INTRA_MODES;
1668
52.2M
  const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1669
52.2M
  const SequenceHeader *seq_params = cm->seq_params;
1670
1671
52.2M
  if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1672
#if CONFIG_DEBUG
1673
    assert(is_cfl_allowed(xd));
1674
    const BLOCK_SIZE plane_bsize =
1675
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1676
    (void)plane_bsize;
1677
    assert(plane_bsize < BLOCK_SIZES_ALL);
1678
    if (!xd->lossless[mbmi->segment_id]) {
1679
      assert(blk_col == 0);
1680
      assert(blk_row == 0);
1681
      assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1682
      assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1683
    }
1684
#endif
1685
616k
    CFL_CTX *const cfl = &xd->cfl;
1686
616k
    CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1687
616k
    if (cfl->dc_pred_is_cached[pred_plane] == 0) {
1688
77.0k
      av1_predict_intra_block(xd, seq_params->sb_size,
1689
77.0k
                              seq_params->enable_intra_edge_filter, pd->width,
1690
77.0k
                              pd->height, tx_size, mode, angle_delta,
1691
77.0k
                              use_palette, filter_intra_mode, dst, dst_stride,
1692
77.0k
                              dst, dst_stride, blk_col, blk_row, plane);
1693
77.0k
      if (cfl->use_dc_pred_cache) {
1694
77.0k
        cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1695
77.0k
        cfl->dc_pred_is_cached[pred_plane] = 1;
1696
77.0k
      }
1697
539k
    } else {
1698
539k
      cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1699
539k
    }
1700
616k
    cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1701
616k
    return;
1702
616k
  }
1703
51.6M
  av1_predict_intra_block(
1704
51.6M
      xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1705
51.6M
      pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1706
51.6M
      dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1707
51.6M
}
1708
1709
1
void av1_init_intra_predictors(void) {
1710
1
  aom_once(init_intra_predictors_internal);
1711
1
}