Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/common/reconintra.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <math.h>
14
15
#include "config/aom_config.h"
16
#include "config/aom_dsp_rtcd.h"
17
#include "config/av1_rtcd.h"
18
19
#include "aom_dsp/aom_dsp_common.h"
20
#include "aom_mem/aom_mem.h"
21
#include "aom_ports/aom_once.h"
22
#include "aom_ports/mem.h"
23
#include "av1/common/av1_common_int.h"
24
#include "av1/common/cfl.h"
25
#include "av1/common/reconintra.h"
26
27
enum {
28
  NEED_LEFT = 1 << 1,
29
  NEED_ABOVE = 1 << 2,
30
  NEED_ABOVERIGHT = 1 << 3,
31
  NEED_ABOVELEFT = 1 << 4,
32
  NEED_BOTTOMLEFT = 1 << 5,
33
};
34
35
#define INTRA_EDGE_FILT 3
36
828M
#define INTRA_EDGE_TAPS 5
37
#define MAX_UPSAMPLE_SZ 16
38
135M
#define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
39
40
static const uint8_t extend_modes[INTRA_MODES] = {
41
  NEED_ABOVE | NEED_LEFT,                   // DC
42
  NEED_ABOVE,                               // V
43
  NEED_LEFT,                                // H
44
  NEED_ABOVE | NEED_ABOVERIGHT,             // D45
45
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
46
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
47
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
48
  NEED_LEFT | NEED_BOTTOMLEFT,              // D203
49
  NEED_ABOVE | NEED_ABOVERIGHT,             // D67
50
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH
51
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
52
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
53
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
54
};
55
56
// Tables to store if the top-right reference pixels are available. The flags
57
// are represented with bits, packed into 8-bit integers. E.g., for the 32x32
58
// blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
59
// order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
60
// i.e. (table[10 / 8] >> (10 % 8)) & 1.
61
//       . . . .
62
//       . . . .
63
//       . . o .
64
//       . . . .
65
static uint8_t has_tr_4x4[128] = {
66
  255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70
  255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
74
};
75
static uint8_t has_tr_4x8[64] = {
76
  255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
77
  119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
78
  127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
79
  119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
80
  119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
81
};
82
static uint8_t has_tr_8x4[64] = {
83
  255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85
  255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
87
};
88
static uint8_t has_tr_8x8[32] = {
89
  255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90
  255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
91
};
92
static uint8_t has_tr_8x16[16] = {
93
  255, 255, 119, 119, 127, 127, 119, 119,
94
  255, 127, 119, 119, 127, 127, 119, 119,
95
};
96
static uint8_t has_tr_16x8[16] = {
97
  255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
98
};
99
static uint8_t has_tr_16x16[8] = {
100
  255, 85, 119, 85, 127, 85, 119, 85,
101
};
102
static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
103
static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
104
static uint8_t has_tr_32x32[2] = { 95, 87 };
105
static uint8_t has_tr_32x64[1] = { 127 };
106
static uint8_t has_tr_64x32[1] = { 19 };
107
static uint8_t has_tr_64x64[1] = { 7 };
108
static uint8_t has_tr_64x128[1] = { 3 };
109
static uint8_t has_tr_128x64[1] = { 1 };
110
static uint8_t has_tr_128x128[1] = { 1 };
111
static uint8_t has_tr_4x16[32] = {
112
  255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
113
  127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
114
  127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
115
};
116
static uint8_t has_tr_16x4[32] = {
117
  255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118
  127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
119
};
120
static uint8_t has_tr_8x32[8] = {
121
  255, 255, 127, 127, 255, 127, 127, 127,
122
};
123
static uint8_t has_tr_32x8[8] = {
124
  15, 0, 5, 0, 7, 0, 5, 0,
125
};
126
static uint8_t has_tr_16x64[2] = { 255, 127 };
127
static uint8_t has_tr_64x16[2] = { 3, 1 };
128
129
static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
130
  // 4X4
131
  has_tr_4x4,
132
  // 4X8,       8X4,            8X8
133
  has_tr_4x8, has_tr_8x4, has_tr_8x8,
134
  // 8X16,      16X8,           16X16
135
  has_tr_8x16, has_tr_16x8, has_tr_16x16,
136
  // 16X32,     32X16,          32X32
137
  has_tr_16x32, has_tr_32x16, has_tr_32x32,
138
  // 32X64,     64X32,          64X64
139
  has_tr_32x64, has_tr_64x32, has_tr_64x64,
140
  // 64x128,    128x64,         128x128
141
  has_tr_64x128, has_tr_128x64, has_tr_128x128,
142
  // 4x16,      16x4,            8x32
143
  has_tr_4x16, has_tr_16x4, has_tr_8x32,
144
  // 32x8,      16x64,           64x16
145
  has_tr_32x8, has_tr_16x64, has_tr_64x16
146
};
147
148
static uint8_t has_tr_vert_8x8[32] = {
149
  255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150
  255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
151
};
152
static uint8_t has_tr_vert_16x16[8] = {
153
  255, 0, 119, 0, 127, 0, 119, 0,
154
};
155
static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
156
static uint8_t has_tr_vert_64x64[1] = { 3 };
157
158
// The _vert_* tables are like the ordinary tables above, but describe the
159
// order we visit square blocks when doing a PARTITION_VERT_A or
160
// PARTITION_VERT_B. This is the same order as normal except for on the last
161
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
162
// as a pair of squares, which means that these tables work correctly for both
163
// mixed vertical partition types.
164
//
165
// There are tables for each of the square sizes. Vertical rectangles (like
166
// BLOCK_16X32) use their respective "non-vert" table
167
static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
168
  // 4X4
169
  NULL,
170
  // 4X8,      8X4,         8X8
171
  has_tr_4x8, NULL, has_tr_vert_8x8,
172
  // 8X16,     16X8,        16X16
173
  has_tr_8x16, NULL, has_tr_vert_16x16,
174
  // 16X32,    32X16,       32X32
175
  has_tr_16x32, NULL, has_tr_vert_32x32,
176
  // 32X64,    64X32,       64X64
177
  has_tr_32x64, NULL, has_tr_vert_64x64,
178
  // 64x128,   128x64,      128x128
179
  has_tr_64x128, NULL, has_tr_128x128
180
};
181
182
static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
183
1.18M
                                       BLOCK_SIZE bsize) {
184
1.18M
  const uint8_t *ret = NULL;
185
  // If this is a mixed vertical partition, look up bsize in orders_vert.
186
1.18M
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
187
105k
    assert(bsize < BLOCK_SIZES);
188
105k
    ret = has_tr_vert_tables[bsize];
189
1.07M
  } else {
190
1.07M
    ret = has_tr_tables[bsize];
191
1.07M
  }
192
1.18M
  assert(ret);
193
1.18M
  return ret;
194
1.18M
}
195
196
static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
197
                         int mi_col, int top_available, int right_available,
198
                         PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
199
3.10M
                         int col_off, int ss_x, int ss_y) {
200
3.10M
  if (!top_available || !right_available) return 0;
201
202
2.83M
  const int bw_unit = mi_size_wide[bsize];
203
2.83M
  const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
204
2.83M
  const int top_right_count_unit = tx_size_wide_unit[txsz];
205
206
2.83M
  if (row_off > 0) {  // Just need to check if enough pixels on the right.
207
937k
    if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
208
      // Special case: For 128x128 blocks, the transform unit whose
209
      // top-right corner is at the center of the block does in fact have
210
      // pixels available at its top-right corner.
211
277k
      if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
212
65.7k
          col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
213
20.4k
        return 1;
214
20.4k
      }
215
256k
      const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
216
256k
      const int col_off_64 = col_off % plane_bw_unit_64;
217
256k
      return col_off_64 + top_right_count_unit < plane_bw_unit_64;
218
277k
    }
219
660k
    return col_off + top_right_count_unit < plane_bw_unit;
220
1.89M
  } else {
221
    // All top-right pixels are in the block above, which is already available.
222
1.89M
    if (col_off + top_right_count_unit < plane_bw_unit) return 1;
223
224
1.60M
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
225
1.60M
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
226
1.60M
    const int sb_mi_size = mi_size_high[sb_size];
227
1.60M
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
228
1.60M
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
229
230
    // Top row of superblock: so top-right pixels are in the top and/or
231
    // top-right superblocks, both of which are already available.
232
1.60M
    if (blk_row_in_sb == 0) return 1;
233
234
    // Rightmost column of superblock (and not the top row): so top-right pixels
235
    // fall in the right superblock, which is not available yet.
236
1.36M
    if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
237
187k
      return 0;
238
187k
    }
239
240
    // General case (neither top row nor rightmost column): check if the
241
    // top-right block is coded before the current block.
242
1.18M
    const int this_blk_index =
243
1.18M
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
244
1.18M
        blk_col_in_sb + 0;
245
1.18M
    const int idx1 = this_blk_index / 8;
246
1.18M
    const int idx2 = this_blk_index % 8;
247
1.18M
    const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
248
1.18M
    return (has_tr_table[idx1] >> idx2) & 1;
249
1.36M
  }
250
2.83M
}
251
252
// Similar to the has_tr_* tables, but store if the bottom-left reference
253
// pixels are available.
254
static uint8_t has_bl_4x4[128] = {
255
  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
256
  85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
257
  17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
258
  85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
259
  0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
260
  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
261
  85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
262
};
263
static uint8_t has_bl_4x8[64] = {
264
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
265
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
266
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
267
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
268
};
269
static uint8_t has_bl_8x4[64] = {
270
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
271
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
272
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
273
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
274
};
275
static uint8_t has_bl_8x8[32] = {
276
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
278
};
279
static uint8_t has_bl_8x16[16] = {
280
  16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
281
};
282
static uint8_t has_bl_16x8[16] = {
283
  254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
284
};
285
static uint8_t has_bl_16x16[8] = {
286
  84, 16, 84, 0, 84, 16, 84, 0,
287
};
288
static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
289
static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
290
static uint8_t has_bl_32x32[2] = { 4, 4 };
291
static uint8_t has_bl_32x64[1] = { 0 };
292
static uint8_t has_bl_64x32[1] = { 34 };
293
static uint8_t has_bl_64x64[1] = { 0 };
294
static uint8_t has_bl_64x128[1] = { 0 };
295
static uint8_t has_bl_128x64[1] = { 0 };
296
static uint8_t has_bl_128x128[1] = { 0 };
297
static uint8_t has_bl_4x16[32] = {
298
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
300
};
301
static uint8_t has_bl_16x4[32] = {
302
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
304
};
305
static uint8_t has_bl_8x32[8] = {
306
  0, 1, 0, 0, 0, 1, 0, 0,
307
};
308
static uint8_t has_bl_32x8[8] = {
309
  238, 78, 238, 14, 238, 78, 238, 14,
310
};
311
static uint8_t has_bl_16x64[2] = { 0, 0 };
312
static uint8_t has_bl_64x16[2] = { 42, 42 };
313
314
static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
315
  // 4X4
316
  has_bl_4x4,
317
  // 4X8,         8X4,         8X8
318
  has_bl_4x8, has_bl_8x4, has_bl_8x8,
319
  // 8X16,        16X8,        16X16
320
  has_bl_8x16, has_bl_16x8, has_bl_16x16,
321
  // 16X32,       32X16,       32X32
322
  has_bl_16x32, has_bl_32x16, has_bl_32x32,
323
  // 32X64,       64X32,       64X64
324
  has_bl_32x64, has_bl_64x32, has_bl_64x64,
325
  // 64x128,      128x64,      128x128
326
  has_bl_64x128, has_bl_128x64, has_bl_128x128,
327
  // 4x16,        16x4,        8x32
328
  has_bl_4x16, has_bl_16x4, has_bl_8x32,
329
  // 32x8,        16x64,       64x16
330
  has_bl_32x8, has_bl_16x64, has_bl_64x16
331
};
332
333
static uint8_t has_bl_vert_8x8[32] = {
334
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
336
};
337
static uint8_t has_bl_vert_16x16[8] = {
338
  254, 16, 254, 0, 254, 16, 254, 0,
339
};
340
static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
341
static uint8_t has_bl_vert_64x64[1] = { 2 };
342
343
// The _vert_* tables are like the ordinary tables above, but describe the
344
// order we visit square blocks when doing a PARTITION_VERT_A or
345
// PARTITION_VERT_B. This is the same order as normal except for on the last
346
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
347
// as a pair of squares, which means that these tables work correctly for both
348
// mixed vertical partition types.
349
//
350
// There are tables for each of the square sizes. Vertical rectangles (like
351
// BLOCK_16X32) use their respective "non-vert" table
352
static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
353
  // 4X4
354
  NULL,
355
  // 4X8,     8X4,         8X8
356
  has_bl_4x8, NULL, has_bl_vert_8x8,
357
  // 8X16,    16X8,        16X16
358
  has_bl_8x16, NULL, has_bl_vert_16x16,
359
  // 16X32,   32X16,       32X32
360
  has_bl_16x32, NULL, has_bl_vert_32x32,
361
  // 32X64,   64X32,       64X64
362
  has_bl_32x64, NULL, has_bl_vert_64x64,
363
  // 64x128,  128x64,      128x128
364
  has_bl_64x128, NULL, has_bl_128x128
365
};
366
367
static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
368
1.31M
                                       BLOCK_SIZE bsize) {
369
1.31M
  const uint8_t *ret = NULL;
370
  // If this is a mixed vertical partition, look up bsize in orders_vert.
371
1.31M
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
372
119k
    assert(bsize < BLOCK_SIZES);
373
119k
    ret = has_bl_vert_tables[bsize];
374
1.19M
  } else {
375
1.19M
    ret = has_bl_tables[bsize];
376
1.19M
  }
377
1.31M
  assert(ret);
378
1.31M
  return ret;
379
1.31M
}
380
381
static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
382
                           int mi_col, int bottom_available, int left_available,
383
                           PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
384
3.54M
                           int col_off, int ss_x, int ss_y) {
385
3.54M
  if (!bottom_available || !left_available) return 0;
386
387
  // Special case for 128x* blocks, when col_off is half the block width.
388
  // This is needed because 128x* superblocks are divided into 64x* blocks in
389
  // raster order
390
3.29M
  if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
391
500k
    const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
392
500k
    const int col_off_64 = col_off % plane_bw_unit_64;
393
500k
    if (col_off_64 == 0) {
394
      // We are at the left edge of top-right or bottom-right 64x* block.
395
78.5k
      const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
396
78.5k
      const int row_off_64 = row_off % plane_bh_unit_64;
397
78.5k
      const int plane_bh_unit =
398
78.5k
          AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
399
      // Check if all bottom-left pixels are in the left 64x* block (which is
400
      // already coded).
401
78.5k
      return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
402
78.5k
    }
403
500k
  }
404
405
3.22M
  if (col_off > 0) {
406
    // Bottom-left pixels are in the bottom-left block, which is not available.
407
1.14M
    return 0;
408
2.07M
  } else {
409
2.07M
    const int bh_unit = mi_size_high[bsize];
410
2.07M
    const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
411
2.07M
    const int bottom_left_count_unit = tx_size_high_unit[txsz];
412
413
    // All bottom-left pixels are in the left block, which is already available.
414
2.07M
    if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
415
416
1.80M
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
417
1.80M
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
418
1.80M
    const int sb_mi_size = mi_size_high[sb_size];
419
1.80M
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
420
1.80M
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
421
422
    // Leftmost column of superblock: so bottom-left pixels maybe in the left
423
    // and/or bottom-left superblocks. But only the left superblock is
424
    // available, so check if all required pixels fall in that superblock.
425
1.80M
    if (blk_col_in_sb == 0) {
426
261k
      const int blk_start_row_off =
427
261k
          blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
428
261k
          ss_y;
429
261k
      const int row_off_in_sb = blk_start_row_off + row_off;
430
261k
      const int sb_height_unit = sb_mi_size >> ss_y;
431
261k
      return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
432
261k
    }
433
434
    // Bottom row of superblock (and not the leftmost column): so bottom-left
435
    // pixels fall in the bottom superblock, which is not available yet.
436
1.54M
    if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
437
438
    // General case (neither leftmost column nor bottom row): check if the
439
    // bottom-left block is coded before the current block.
440
1.31M
    const int this_blk_index =
441
1.31M
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
442
1.31M
        blk_col_in_sb + 0;
443
1.31M
    const int idx1 = this_blk_index / 8;
444
1.31M
    const int idx2 = this_blk_index % 8;
445
1.31M
    const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
446
1.31M
    return (has_bl_table[idx1] >> idx2) & 1;
447
1.54M
  }
448
3.22M
}
449
450
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
451
                              const uint8_t *above, const uint8_t *left);
452
453
static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
454
static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
455
456
#if CONFIG_AV1_HIGHBITDEPTH
457
typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
458
                                   const uint16_t *above, const uint16_t *left,
459
                                   int bd);
460
static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
461
static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
462
#endif
463
464
7
static void init_intra_predictors_internal(void) {
465
7
  assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
466
467
#if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
468
#define INIT_RECTANGULAR(p, type)             \
469
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
470
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
471
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
472
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
473
  p[TX_16X32] = aom_##type##_predictor_16x32; \
474
  p[TX_32X16] = aom_##type##_predictor_32x16; \
475
  p[TX_32X64] = aom_##type##_predictor_32x64; \
476
  p[TX_64X32] = aom_##type##_predictor_64x32;
477
#else
478
7
#define INIT_RECTANGULAR(p, type)             \
479
140
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
480
140
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
481
140
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
482
140
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
483
140
  p[TX_16X32] = aom_##type##_predictor_16x32; \
484
140
  p[TX_32X16] = aom_##type##_predictor_32x16; \
485
140
  p[TX_32X64] = aom_##type##_predictor_32x64; \
486
140
  p[TX_64X32] = aom_##type##_predictor_64x32; \
487
140
  p[TX_4X16] = aom_##type##_predictor_4x16;   \
488
140
  p[TX_16X4] = aom_##type##_predictor_16x4;   \
489
140
  p[TX_8X32] = aom_##type##_predictor_8x32;   \
490
140
  p[TX_32X8] = aom_##type##_predictor_32x8;   \
491
140
  p[TX_16X64] = aom_##type##_predictor_16x64; \
492
140
  p[TX_64X16] = aom_##type##_predictor_64x16;
493
7
#endif  // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
494
495
7
#define INIT_NO_4X4(p, type)                  \
496
140
  p[TX_8X8] = aom_##type##_predictor_8x8;     \
497
140
  p[TX_16X16] = aom_##type##_predictor_16x16; \
498
140
  p[TX_32X32] = aom_##type##_predictor_32x32; \
499
140
  p[TX_64X64] = aom_##type##_predictor_64x64; \
500
140
  INIT_RECTANGULAR(p, type)
501
502
7
#define INIT_ALL_SIZES(p, type)           \
503
140
  p[TX_4X4] = aom_##type##_predictor_4x4; \
504
140
  INIT_NO_4X4(p, type)
505
506
7
  INIT_ALL_SIZES(pred[V_PRED], v)
507
7
  INIT_ALL_SIZES(pred[H_PRED], h)
508
7
  INIT_ALL_SIZES(pred[PAETH_PRED], paeth)
509
7
  INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth)
510
7
  INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v)
511
7
  INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h)
512
7
  INIT_ALL_SIZES(dc_pred[0][0], dc_128)
513
7
  INIT_ALL_SIZES(dc_pred[0][1], dc_top)
514
7
  INIT_ALL_SIZES(dc_pred[1][0], dc_left)
515
7
  INIT_ALL_SIZES(dc_pred[1][1], dc)
516
7
#if CONFIG_AV1_HIGHBITDEPTH
517
7
  INIT_ALL_SIZES(pred_high[V_PRED], highbd_v)
518
7
  INIT_ALL_SIZES(pred_high[H_PRED], highbd_h)
519
7
  INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth)
520
7
  INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth)
521
7
  INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v)
522
7
  INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h)
523
7
  INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128)
524
7
  INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top)
525
7
  INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left)
526
7
  INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc)
527
7
#endif
528
7
#undef intra_pred_allsizes
529
7
}
530
531
// Directional prediction, zone 1: 0 < angle < 90
532
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
533
                            const uint8_t *above, const uint8_t *left,
534
1.58M
                            int upsample_above, int dx, int dy) {
535
1.58M
  int r, c, x, base, shift, val;
536
537
1.58M
  (void)left;
538
1.58M
  (void)dy;
539
1.58M
  assert(dy == 1);
540
1.58M
  assert(dx > 0);
541
542
1.58M
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
543
1.58M
  const int frac_bits = 6 - upsample_above;
544
1.58M
  const int base_inc = 1 << upsample_above;
545
1.58M
  x = dx;
546
21.3M
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
547
19.7M
    base = x >> frac_bits;
548
19.7M
    shift = ((x << upsample_above) & 0x3F) >> 1;
549
550
19.7M
    if (base >= max_base_x) {
551
26.4k
      for (int i = r; i < bh; ++i) {
552
17.4k
        memset(dst, above[max_base_x], bw * sizeof(dst[0]));
553
17.4k
        dst += stride;
554
17.4k
      }
555
8.93k
      return;
556
8.93k
    }
557
558
416M
    for (c = 0; c < bw; ++c, base += base_inc) {
559
396M
      if (base < max_base_x) {
560
394M
        val = above[base] * (32 - shift) + above[base + 1] * shift;
561
394M
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
562
394M
      } else {
563
2.24M
        dst[c] = above[max_base_x];
564
2.24M
      }
565
396M
    }
566
19.7M
  }
567
1.58M
}
568
569
// Directional prediction, zone 2: 90 < angle < 180
570
void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
571
                            const uint8_t *above, const uint8_t *left,
572
                            int upsample_above, int upsample_left, int dx,
573
2.88M
                            int dy) {
574
2.88M
  assert(dx > 0);
575
2.88M
  assert(dy > 0);
576
577
2.88M
  const int min_base_x = -(1 << upsample_above);
578
2.88M
  const int min_base_y = -(1 << upsample_left);
579
2.88M
  (void)min_base_y;
580
2.88M
  const int frac_bits_x = 6 - upsample_above;
581
2.88M
  const int frac_bits_y = 6 - upsample_left;
582
583
36.8M
  for (int r = 0; r < bh; ++r) {
584
677M
    for (int c = 0; c < bw; ++c) {
585
643M
      int val;
586
643M
      int y = r + 1;
587
643M
      int x = (c << 6) - y * dx;
588
643M
      const int base_x = x >> frac_bits_x;
589
643M
      if (base_x >= min_base_x) {
590
316M
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
591
316M
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
592
316M
        val = ROUND_POWER_OF_TWO(val, 5);
593
326M
      } else {
594
326M
        x = c + 1;
595
326M
        y = (r << 6) - x * dy;
596
326M
        const int base_y = y >> frac_bits_y;
597
326M
        assert(base_y >= min_base_y);
598
326M
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
599
326M
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
600
326M
        val = ROUND_POWER_OF_TWO(val, 5);
601
326M
      }
602
643M
      dst[c] = val;
603
643M
    }
604
33.9M
    dst += stride;
605
33.9M
  }
606
2.88M
}
607
608
// Directional prediction, zone 3: 180 < angle < 270
609
void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
610
                            const uint8_t *above, const uint8_t *left,
611
1.63M
                            int upsample_left, int dx, int dy) {
612
1.63M
  int r, c, y, base, shift, val;
613
614
1.63M
  (void)above;
615
1.63M
  (void)dx;
616
617
1.63M
  assert(dx == 1);
618
1.63M
  assert(dy > 0);
619
620
1.63M
  const int max_base_y = (bw + bh - 1) << upsample_left;
621
1.63M
  const int frac_bits = 6 - upsample_left;
622
1.63M
  const int base_inc = 1 << upsample_left;
623
1.63M
  y = dy;
624
21.4M
  for (c = 0; c < bw; ++c, y += dy) {
625
19.8M
    base = y >> frac_bits;
626
19.8M
    shift = ((y << upsample_left) & 0x3F) >> 1;
627
628
374M
    for (r = 0; r < bh; ++r, base += base_inc) {
629
354M
      if (base < max_base_y) {
630
354M
        val = left[base] * (32 - shift) + left[base + 1] * shift;
631
354M
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
632
354M
      } else {
633
0
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
634
0
        break;
635
0
      }
636
354M
    }
637
19.8M
  }
638
1.63M
}
639
640
static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
641
                         const uint8_t *above, const uint8_t *left,
642
8.38M
                         int upsample_above, int upsample_left, int angle) {
643
8.38M
  const int dx = av1_get_dx(angle);
644
8.38M
  const int dy = av1_get_dy(angle);
645
8.38M
  const int bw = tx_size_wide[tx_size];
646
8.38M
  const int bh = tx_size_high[tx_size];
647
8.38M
  assert(angle > 0 && angle < 270);
648
649
8.38M
  if (angle > 0 && angle < 90) {
650
1.58M
    av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
651
1.58M
                         dy);
652
6.79M
  } else if (angle > 90 && angle < 180) {
653
2.88M
    av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
654
2.88M
                         upsample_left, dx, dy);
655
3.91M
  } else if (angle > 180 && angle < 270) {
656
1.63M
    av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
657
1.63M
                         dy);
658
2.27M
  } else if (angle == 90) {
659
948k
    pred[V_PRED][tx_size](dst, stride, above, left);
660
1.33M
  } else if (angle == 180) {
661
1.33M
    pred[H_PRED][tx_size](dst, stride, above, left);
662
1.33M
  }
663
8.38M
}
664
665
#if CONFIG_AV1_HIGHBITDEPTH
666
// Directional prediction, zone 1: 0 < angle < 90
667
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
668
                                   int bh, const uint16_t *above,
669
                                   const uint16_t *left, int upsample_above,
670
1.43M
                                   int dx, int dy, int bd) {
671
1.43M
  int r, c, x, base, shift, val;
672
673
1.43M
  (void)left;
674
1.43M
  (void)dy;
675
1.43M
  (void)bd;
676
1.43M
  assert(dy == 1);
677
1.43M
  assert(dx > 0);
678
679
1.43M
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
680
1.43M
  const int frac_bits = 6 - upsample_above;
681
1.43M
  const int base_inc = 1 << upsample_above;
682
1.43M
  x = dx;
683
17.7M
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
684
16.3M
    base = x >> frac_bits;
685
16.3M
    shift = ((x << upsample_above) & 0x3F) >> 1;
686
687
16.3M
    if (base >= max_base_x) {
688
22.3k
      for (int i = r; i < bh; ++i) {
689
14.2k
        aom_memset16(dst, above[max_base_x], bw);
690
14.2k
        dst += stride;
691
14.2k
      }
692
8.07k
      return;
693
8.07k
    }
694
695
328M
    for (c = 0; c < bw; ++c, base += base_inc) {
696
312M
      if (base < max_base_x) {
697
310M
        val = above[base] * (32 - shift) + above[base + 1] * shift;
698
310M
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
699
310M
      } else {
700
2.12M
        dst[c] = above[max_base_x];
701
2.12M
      }
702
312M
    }
703
16.3M
  }
704
1.43M
}
705
706
// Directional prediction, zone 2: 90 < angle < 180
707
void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
708
                                   int bh, const uint16_t *above,
709
                                   const uint16_t *left, int upsample_above,
710
2.98M
                                   int upsample_left, int dx, int dy, int bd) {
711
2.98M
  (void)bd;
712
2.98M
  assert(dx > 0);
713
2.98M
  assert(dy > 0);
714
715
2.98M
  const int min_base_x = -(1 << upsample_above);
716
2.98M
  const int min_base_y = -(1 << upsample_left);
717
2.98M
  (void)min_base_y;
718
2.98M
  const int frac_bits_x = 6 - upsample_above;
719
2.98M
  const int frac_bits_y = 6 - upsample_left;
720
721
33.9M
  for (int r = 0; r < bh; ++r) {
722
613M
    for (int c = 0; c < bw; ++c) {
723
582M
      int val;
724
582M
      int y = r + 1;
725
582M
      int x = (c << 6) - y * dx;
726
582M
      const int base_x = x >> frac_bits_x;
727
582M
      if (base_x >= min_base_x) {
728
266M
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
729
266M
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
730
266M
        val = ROUND_POWER_OF_TWO(val, 5);
731
315M
      } else {
732
315M
        x = c + 1;
733
315M
        y = (r << 6) - x * dy;
734
315M
        const int base_y = y >> frac_bits_y;
735
315M
        assert(base_y >= min_base_y);
736
315M
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
737
315M
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
738
315M
        val = ROUND_POWER_OF_TWO(val, 5);
739
315M
      }
740
582M
      dst[c] = val;
741
582M
    }
742
30.9M
    dst += stride;
743
30.9M
  }
744
2.98M
}
745
746
// Directional prediction, zone 3: 180 < angle < 270
747
void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
748
                                   int bh, const uint16_t *above,
749
                                   const uint16_t *left, int upsample_left,
750
1.74M
                                   int dx, int dy, int bd) {
751
1.74M
  int r, c, y, base, shift, val;
752
753
1.74M
  (void)above;
754
1.74M
  (void)dx;
755
1.74M
  (void)bd;
756
1.74M
  assert(dx == 1);
757
1.74M
  assert(dy > 0);
758
759
1.74M
  const int max_base_y = (bw + bh - 1) << upsample_left;
760
1.74M
  const int frac_bits = 6 - upsample_left;
761
1.74M
  const int base_inc = 1 << upsample_left;
762
1.74M
  y = dy;
763
19.6M
  for (c = 0; c < bw; ++c, y += dy) {
764
17.8M
    base = y >> frac_bits;
765
17.8M
    shift = ((y << upsample_left) & 0x3F) >> 1;
766
767
323M
    for (r = 0; r < bh; ++r, base += base_inc) {
768
306M
      if (base < max_base_y) {
769
306M
        val = left[base] * (32 - shift) + left[base + 1] * shift;
770
306M
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
771
306M
      } else {
772
0
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
773
0
        break;
774
0
      }
775
306M
    }
776
17.8M
  }
777
1.74M
}
778
779
static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
780
                                TX_SIZE tx_size, const uint16_t *above,
781
                                const uint16_t *left, int upsample_above,
782
8.18M
                                int upsample_left, int angle, int bd) {
783
8.18M
  const int dx = av1_get_dx(angle);
784
8.18M
  const int dy = av1_get_dy(angle);
785
8.18M
  const int bw = tx_size_wide[tx_size];
786
8.18M
  const int bh = tx_size_high[tx_size];
787
8.18M
  assert(angle > 0 && angle < 270);
788
789
8.18M
  if (angle > 0 && angle < 90) {
790
1.43M
    av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
791
1.43M
                                upsample_above, dx, dy, bd);
792
6.75M
  } else if (angle > 90 && angle < 180) {
793
2.98M
    av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
794
2.98M
                                upsample_above, upsample_left, dx, dy, bd);
795
3.76M
  } else if (angle > 180 && angle < 270) {
796
1.74M
    av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
797
1.74M
                                dx, dy, bd);
798
2.02M
  } else if (angle == 90) {
799
786k
    pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
800
1.23M
  } else if (angle == 180) {
801
1.23M
    pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
802
1.23M
  }
803
8.18M
}
804
#endif  // CONFIG_AV1_HIGHBITDEPTH
805
806
DECLARE_ALIGNED(16, const int8_t,
807
                av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
808
  {
809
      { -6, 10, 0, 0, 0, 12, 0, 0 },
810
      { -5, 2, 10, 0, 0, 9, 0, 0 },
811
      { -3, 1, 1, 10, 0, 7, 0, 0 },
812
      { -3, 1, 1, 2, 10, 5, 0, 0 },
813
      { -4, 6, 0, 0, 0, 2, 12, 0 },
814
      { -3, 2, 6, 0, 0, 2, 9, 0 },
815
      { -3, 2, 2, 6, 0, 2, 7, 0 },
816
      { -3, 1, 2, 2, 6, 3, 5, 0 },
817
  },
818
  {
819
      { -10, 16, 0, 0, 0, 10, 0, 0 },
820
      { -6, 0, 16, 0, 0, 6, 0, 0 },
821
      { -4, 0, 0, 16, 0, 4, 0, 0 },
822
      { -2, 0, 0, 0, 16, 2, 0, 0 },
823
      { -10, 16, 0, 0, 0, 0, 10, 0 },
824
      { -6, 0, 16, 0, 0, 0, 6, 0 },
825
      { -4, 0, 0, 16, 0, 0, 4, 0 },
826
      { -2, 0, 0, 0, 16, 0, 2, 0 },
827
  },
828
  {
829
      { -8, 8, 0, 0, 0, 16, 0, 0 },
830
      { -8, 0, 8, 0, 0, 16, 0, 0 },
831
      { -8, 0, 0, 8, 0, 16, 0, 0 },
832
      { -8, 0, 0, 0, 8, 16, 0, 0 },
833
      { -4, 4, 0, 0, 0, 0, 16, 0 },
834
      { -4, 0, 4, 0, 0, 0, 16, 0 },
835
      { -4, 0, 0, 4, 0, 0, 16, 0 },
836
      { -4, 0, 0, 0, 4, 0, 16, 0 },
837
  },
838
  {
839
      { -2, 8, 0, 0, 0, 10, 0, 0 },
840
      { -1, 3, 8, 0, 0, 6, 0, 0 },
841
      { -1, 2, 3, 8, 0, 4, 0, 0 },
842
      { 0, 1, 2, 3, 8, 2, 0, 0 },
843
      { -1, 4, 0, 0, 0, 3, 10, 0 },
844
      { -1, 3, 4, 0, 0, 4, 6, 0 },
845
      { -1, 2, 3, 4, 0, 4, 4, 0 },
846
      { -1, 2, 2, 3, 4, 3, 3, 0 },
847
  },
848
  {
849
      { -12, 14, 0, 0, 0, 14, 0, 0 },
850
      { -10, 0, 14, 0, 0, 12, 0, 0 },
851
      { -9, 0, 0, 14, 0, 11, 0, 0 },
852
      { -8, 0, 0, 0, 14, 10, 0, 0 },
853
      { -10, 12, 0, 0, 0, 0, 14, 0 },
854
      { -9, 1, 12, 0, 0, 0, 12, 0 },
855
      { -8, 0, 0, 12, 0, 1, 11, 0 },
856
      { -7, 0, 0, 1, 12, 1, 9, 0 },
857
  },
858
};
859
860
void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
861
                                  TX_SIZE tx_size, const uint8_t *above,
862
1.58M
                                  const uint8_t *left, int mode) {
863
1.58M
  int r, c;
864
1.58M
  uint8_t buffer[33][33];
865
1.58M
  const int bw = tx_size_wide[tx_size];
866
1.58M
  const int bh = tx_size_high[tx_size];
867
868
1.58M
  assert(bw <= 32 && bh <= 32);
869
870
14.7M
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
871
1.58M
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
872
873
8.15M
  for (r = 1; r < bh + 1; r += 2)
874
24.6M
    for (c = 1; c < bw + 1; c += 4) {
875
18.1M
      const uint8_t p0 = buffer[r - 1][c - 1];
876
18.1M
      const uint8_t p1 = buffer[r - 1][c];
877
18.1M
      const uint8_t p2 = buffer[r - 1][c + 1];
878
18.1M
      const uint8_t p3 = buffer[r - 1][c + 2];
879
18.1M
      const uint8_t p4 = buffer[r - 1][c + 3];
880
18.1M
      const uint8_t p5 = buffer[r][c - 1];
881
18.1M
      const uint8_t p6 = buffer[r + 1][c - 1];
882
163M
      for (int k = 0; k < 8; ++k) {
883
144M
        int r_offset = k >> 2;
884
144M
        int c_offset = k & 0x03;
885
144M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
886
144M
                 av1_filter_intra_taps[mode][k][1] * p1 +
887
144M
                 av1_filter_intra_taps[mode][k][2] * p2 +
888
144M
                 av1_filter_intra_taps[mode][k][3] * p3 +
889
144M
                 av1_filter_intra_taps[mode][k][4] * p4 +
890
144M
                 av1_filter_intra_taps[mode][k][5] * p5 +
891
144M
                 av1_filter_intra_taps[mode][k][6] * p6;
892
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
893
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
894
        // Since Clip1() clips a negative value to 0, it is safe to replace
895
        // Round2Signed() with Round2().
896
144M
        buffer[r + r_offset][c + c_offset] =
897
144M
            clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
898
144M
      }
899
18.1M
    }
900
901
14.7M
  for (r = 0; r < bh; ++r) {
902
13.1M
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
903
13.1M
    dst += stride;
904
13.1M
  }
905
1.58M
}
906
907
#if CONFIG_AV1_HIGHBITDEPTH
908
static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
909
                                          TX_SIZE tx_size,
910
                                          const uint16_t *above,
911
                                          const uint16_t *left, int mode,
912
1.93M
                                          int bd) {
913
1.93M
  int r, c;
914
1.93M
  uint16_t buffer[33][33];
915
1.93M
  const int bw = tx_size_wide[tx_size];
916
1.93M
  const int bh = tx_size_high[tx_size];
917
918
1.93M
  assert(bw <= 32 && bh <= 32);
919
920
17.4M
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
921
1.93M
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
922
923
9.71M
  for (r = 1; r < bh + 1; r += 2)
924
28.0M
    for (c = 1; c < bw + 1; c += 4) {
925
20.3M
      const uint16_t p0 = buffer[r - 1][c - 1];
926
20.3M
      const uint16_t p1 = buffer[r - 1][c];
927
20.3M
      const uint16_t p2 = buffer[r - 1][c + 1];
928
20.3M
      const uint16_t p3 = buffer[r - 1][c + 2];
929
20.3M
      const uint16_t p4 = buffer[r - 1][c + 3];
930
20.3M
      const uint16_t p5 = buffer[r][c - 1];
931
20.3M
      const uint16_t p6 = buffer[r + 1][c - 1];
932
182M
      for (int k = 0; k < 8; ++k) {
933
162M
        int r_offset = k >> 2;
934
162M
        int c_offset = k & 0x03;
935
162M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
936
162M
                 av1_filter_intra_taps[mode][k][1] * p1 +
937
162M
                 av1_filter_intra_taps[mode][k][2] * p2 +
938
162M
                 av1_filter_intra_taps[mode][k][3] * p3 +
939
162M
                 av1_filter_intra_taps[mode][k][4] * p4 +
940
162M
                 av1_filter_intra_taps[mode][k][5] * p5 +
941
162M
                 av1_filter_intra_taps[mode][k][6] * p6;
942
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
943
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
944
        // Since Clip1() clips a negative value to 0, it is safe to replace
945
        // Round2Signed() with Round2().
946
162M
        buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
947
162M
            ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
948
162M
      }
949
20.3M
    }
950
951
17.4M
  for (r = 0; r < bh; ++r) {
952
15.5M
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
953
15.5M
    dst += stride;
954
15.5M
  }
955
1.93M
}
956
#endif  // CONFIG_AV1_HIGHBITDEPTH
957
958
34.4M
static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
959
34.4M
  if (plane == 0) {
960
21.0M
    const PREDICTION_MODE mode = mbmi->mode;
961
21.0M
    return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
962
18.6M
            mode == SMOOTH_H_PRED);
963
21.0M
  } else {
964
    // uv_mode is not set for inter blocks, so need to explicitly
965
    // detect that case.
966
13.4M
    if (is_inter_block(mbmi)) return 0;
967
968
13.3M
    const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
969
13.3M
    return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
970
11.9M
            uv_mode == UV_SMOOTH_H_PRED);
971
13.4M
  }
972
34.4M
}
973
974
20.4M
static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
975
20.4M
  const MB_MODE_INFO *above;
976
20.4M
  const MB_MODE_INFO *left;
977
978
20.4M
  if (plane == 0) {
979
12.3M
    above = xd->above_mbmi;
980
12.3M
    left = xd->left_mbmi;
981
12.3M
  } else {
982
8.10M
    above = xd->chroma_above_mbmi;
983
8.10M
    left = xd->chroma_left_mbmi;
984
8.10M
  }
985
986
20.4M
  return (above && is_smooth(above, plane)) || (left && is_smooth(left, plane));
987
20.4M
}
988
989
13.1M
static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
990
13.1M
  const int d = abs(delta);
991
13.1M
  int strength = 0;
992
993
13.1M
  const int blk_wh = bs0 + bs1;
994
13.1M
  if (type == 0) {
995
9.92M
    if (blk_wh <= 8) {
996
3.64M
      if (d >= 56) strength = 1;
997
6.28M
    } else if (blk_wh <= 12) {
998
937k
      if (d >= 40) strength = 1;
999
5.34M
    } else if (blk_wh <= 16) {
1000
1.84M
      if (d >= 40) strength = 1;
1001
3.50M
    } else if (blk_wh <= 24) {
1002
1.34M
      if (d >= 8) strength = 1;
1003
1.34M
      if (d >= 16) strength = 2;
1004
1.34M
      if (d >= 32) strength = 3;
1005
2.16M
    } else if (blk_wh <= 32) {
1006
990k
      if (d >= 1) strength = 1;
1007
990k
      if (d >= 4) strength = 2;
1008
990k
      if (d >= 32) strength = 3;
1009
1.16M
    } else {
1010
1.16M
      if (d >= 1) strength = 3;
1011
1.16M
    }
1012
9.92M
  } else {
1013
3.22M
    if (blk_wh <= 8) {
1014
800k
      if (d >= 40) strength = 1;
1015
800k
      if (d >= 64) strength = 2;
1016
2.42M
    } else if (blk_wh <= 16) {
1017
1.11M
      if (d >= 20) strength = 1;
1018
1.11M
      if (d >= 48) strength = 2;
1019
1.31M
    } else if (blk_wh <= 24) {
1020
558k
      if (d >= 4) strength = 3;
1021
760k
    } else {
1022
760k
      if (d >= 1) strength = 3;
1023
760k
    }
1024
3.22M
  }
1025
13.1M
  return strength;
1026
13.1M
}
1027
1028
5.82M
void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1029
5.82M
  if (!strength) return;
1030
1031
3.68M
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1032
3.68M
                                                         { 0, 5, 6, 5, 0 },
1033
3.68M
                                                         { 2, 4, 4, 4, 2 } };
1034
3.68M
  const int filt = strength - 1;
1035
3.68M
  uint8_t edge[129];
1036
1037
3.68M
  memcpy(edge, p, sz * sizeof(*p));
1038
75.6M
  for (int i = 1; i < sz; i++) {
1039
71.9M
    int s = 0;
1040
431M
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1041
359M
      int k = i - 2 + j;
1042
359M
      k = (k < 0) ? 0 : k;
1043
359M
      k = (k > sz - 1) ? sz - 1 : k;
1044
359M
      s += edge[k] * kernel[filt][j];
1045
359M
    }
1046
71.9M
    s = (s + 8) >> 4;
1047
71.9M
    p[i] = s;
1048
71.9M
  }
1049
3.68M
}
1050
1051
689k
static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1052
689k
  const int kernel[3] = { 5, 6, 5 };
1053
1054
689k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1055
689k
          (p_above[0] * kernel[2]);
1056
689k
  s = (s + 8) >> 4;
1057
689k
  p_above[-1] = s;
1058
689k
  p_left[-1] = s;
1059
689k
}
1060
1061
1.65M
void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1062
  // interpolate half-sample positions
1063
1.65M
  assert(sz <= MAX_UPSAMPLE_SZ);
1064
1065
1.65M
  uint8_t in[MAX_UPSAMPLE_SZ + 3];
1066
  // copy p[-1..(sz-1)] and extend first and last samples
1067
1.65M
  in[0] = p[-1];
1068
1.65M
  in[1] = p[-1];
1069
15.2M
  for (int i = 0; i < sz; i++) {
1070
13.5M
    in[i + 2] = p[i];
1071
13.5M
  }
1072
1.65M
  in[sz + 2] = p[sz - 1];
1073
1074
  // interpolate half-sample edge positions
1075
1.65M
  p[-2] = in[0];
1076
15.2M
  for (int i = 0; i < sz; i++) {
1077
13.5M
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1078
13.5M
    s = clip_pixel((s + 8) >> 4);
1079
13.5M
    p[2 * i - 1] = s;
1080
13.5M
    p[2 * i] = in[i + 2];
1081
13.5M
  }
1082
1.65M
}
1083
1084
static void build_directional_and_filter_intra_predictors(
1085
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1086
    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1087
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1088
10.1M
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1089
10.1M
  int i;
1090
10.1M
  const uint8_t *above_ref = ref - ref_stride;
1091
10.1M
  const uint8_t *left_ref = ref - 1;
1092
10.1M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1093
10.1M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1094
10.1M
  uint8_t *const above_row = above_data + 16;
1095
10.1M
  uint8_t *const left_col = left_data + 16;
1096
10.1M
  const int txwpx = tx_size_wide[tx_size];
1097
10.1M
  const int txhpx = tx_size_high[tx_size];
1098
10.1M
  int need_left = extend_modes[mode] & NEED_LEFT;
1099
10.1M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1100
10.1M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1101
10.1M
  const int is_dr_mode = av1_is_directional_mode(mode);
1102
10.1M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1103
10.1M
  assert(use_filter_intra || is_dr_mode);
1104
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1105
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1106
  // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1107
  // be the potential reason for this issue.
1108
10.1M
  memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1109
10.1M
  memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1110
1111
  // The default values if ref pixels are not available:
1112
  // 128 127 127 .. 127 127 127 127 127 127
1113
  // 129  A   B  ..  Y   Z
1114
  // 129  C   D  ..  W   X
1115
  // 129  E   F  ..  U   V
1116
  // 129  G   H  ..  S   T   T   T   T   T
1117
  // ..
1118
1119
10.1M
  if (is_dr_mode) {
1120
8.55M
    if (p_angle <= 90)
1121
2.58M
      need_above = 1, need_left = 0, need_above_left = 1;
1122
5.97M
    else if (p_angle < 180)
1123
2.88M
      need_above = 1, need_left = 1, need_above_left = 1;
1124
3.08M
    else
1125
3.08M
      need_above = 0, need_left = 1, need_above_left = 1;
1126
8.55M
  }
1127
10.1M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1128
1129
10.1M
  assert(n_top_px >= 0);
1130
10.1M
  assert(n_topright_px >= -1);
1131
10.1M
  assert(n_left_px >= 0);
1132
10.1M
  assert(n_bottomleft_px >= -1);
1133
1134
10.1M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1135
177k
    int val;
1136
177k
    if (need_left) {
1137
124k
      val = (n_top_px > 0) ? above_ref[0] : 129;
1138
124k
    } else {
1139
52.8k
      val = (n_left_px > 0) ? left_ref[0] : 127;
1140
52.8k
    }
1141
3.82M
    for (i = 0; i < txhpx; ++i) {
1142
3.65M
      memset(dst, val, txwpx);
1143
3.65M
      dst += dst_stride;
1144
3.65M
    }
1145
177k
    return;
1146
177k
  }
1147
1148
  // NEED_LEFT
1149
9.96M
  if (need_left) {
1150
7.43M
    const int num_left_pixels_needed =
1151
7.43M
        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1152
7.43M
    i = 0;
1153
7.43M
    if (n_left_px > 0) {
1154
84.1M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1155
7.28M
      if (n_bottomleft_px > 0) {
1156
557k
        assert(i == txhpx);
1157
6.07M
        for (; i < txhpx + n_bottomleft_px; i++)
1158
5.52M
          left_col[i] = left_ref[i * ref_stride];
1159
557k
      }
1160
7.28M
      if (i < num_left_pixels_needed)
1161
1.30M
        memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1162
7.28M
    } else if (n_top_px > 0) {
1163
149k
      memset(left_col, above_ref[0], num_left_pixels_needed);
1164
149k
    }
1165
7.43M
  }
1166
1167
  // NEED_ABOVE
1168
9.96M
  if (need_above) {
1169
7.00M
    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1170
7.00M
    if (n_top_px > 0) {
1171
6.89M
      memcpy(above_row, above_ref, n_top_px);
1172
6.89M
      i = n_top_px;
1173
6.89M
      if (n_topright_px > 0) {
1174
979k
        assert(n_top_px == txwpx);
1175
979k
        memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1176
979k
        i += n_topright_px;
1177
979k
      }
1178
6.89M
      if (i < num_top_pixels_needed)
1179
890k
        memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1180
6.89M
    } else if (n_left_px > 0) {
1181
108k
      memset(above_row, left_ref[0], num_top_pixels_needed);
1182
108k
    }
1183
7.00M
  }
1184
1185
9.96M
  if (need_above_left) {
1186
9.96M
    if (n_top_px > 0 && n_left_px > 0) {
1187
9.51M
      above_row[-1] = above_ref[-1];
1188
9.51M
    } else if (n_top_px > 0) {
1189
273k
      above_row[-1] = above_ref[0];
1190
273k
    } else if (n_left_px > 0) {
1191
181k
      above_row[-1] = left_ref[0];
1192
181k
    } else {
1193
2.28k
      above_row[-1] = 128;
1194
2.28k
    }
1195
9.96M
    left_col[-1] = above_row[-1];
1196
9.96M
  }
1197
1198
9.96M
  if (use_filter_intra) {
1199
1.58M
    av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1200
1.58M
                               filter_intra_mode);
1201
1.58M
    return;
1202
1.58M
  }
1203
1204
9.96M
  assert(is_dr_mode);
1205
8.38M
  int upsample_above = 0;
1206
8.38M
  int upsample_left = 0;
1207
8.38M
  if (!disable_edge_filter) {
1208
5.51M
    const int need_right = p_angle < 90;
1209
5.51M
    const int need_bottom = p_angle > 180;
1210
5.51M
    if (p_angle != 90 && p_angle != 180) {
1211
4.01M
      assert(need_above_left);
1212
4.01M
      const int ab_le = 1;
1213
4.01M
      if (need_above && need_left && (txwpx + txhpx >= 24)) {
1214
689k
        filter_intra_edge_corner(above_row, left_col);
1215
689k
      }
1216
4.01M
      if (need_above && n_top_px > 0) {
1217
2.87M
        const int strength = intra_edge_filter_strength(
1218
2.87M
            txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1219
2.87M
        const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1220
2.87M
        av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1221
2.87M
      }
1222
4.01M
      if (need_left && n_left_px > 0) {
1223
2.94M
        const int strength = intra_edge_filter_strength(
1224
2.94M
            txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1225
2.94M
        const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1226
2.94M
        av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1227
2.94M
      }
1228
4.01M
    }
1229
5.51M
    upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1230
5.51M
                                                 intra_edge_filter_type);
1231
5.51M
    if (need_above && upsample_above) {
1232
675k
      const int n_px = txwpx + (need_right ? txhpx : 0);
1233
675k
      av1_upsample_intra_edge(above_row, n_px);
1234
675k
    }
1235
5.51M
    upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1236
5.51M
                                                intra_edge_filter_type);
1237
5.51M
    if (need_left && upsample_left) {
1238
984k
      const int n_px = txhpx + (need_bottom ? txwpx : 0);
1239
984k
      av1_upsample_intra_edge(left_col, n_px);
1240
984k
    }
1241
5.51M
  }
1242
8.38M
  dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1243
8.38M
               upsample_left, p_angle);
1244
8.38M
}
1245
1246
// This function generates the pred data of a given block for non-directional
1247
// intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H, SMOOTH_V and PAETH).
1248
static void build_non_directional_intra_predictors(
1249
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1250
23.4M
    PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px) {
1251
23.4M
  const uint8_t *above_ref = ref - ref_stride;
1252
23.4M
  const uint8_t *left_ref = ref - 1;
1253
23.4M
  const int txwpx = tx_size_wide[tx_size];
1254
23.4M
  const int txhpx = tx_size_high[tx_size];
1255
23.4M
  const int need_left = extend_modes[mode] & NEED_LEFT;
1256
23.4M
  const int need_above = extend_modes[mode] & NEED_ABOVE;
1257
23.4M
  const int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1258
23.4M
  int i = 0;
1259
23.4M
  assert(n_top_px >= 0);
1260
23.4M
  assert(n_left_px >= 0);
1261
23.4M
  assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1262
23.4M
         mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1263
1264
23.4M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1265
0
    int val = 0;
1266
0
    if (need_left) {
1267
0
      val = (n_top_px > 0) ? above_ref[0] : 129;
1268
0
    } else {
1269
0
      val = (n_left_px > 0) ? left_ref[0] : 127;
1270
0
    }
1271
0
    for (i = 0; i < txhpx; ++i) {
1272
0
      memset(dst, val, txwpx);
1273
0
      dst += dst_stride;
1274
0
    }
1275
0
    return;
1276
0
  }
1277
1278
23.4M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1279
23.4M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1280
23.4M
  uint8_t *const above_row = above_data + 16;
1281
23.4M
  uint8_t *const left_col = left_data + 16;
1282
1283
23.4M
  if (need_left) {
1284
23.4M
    memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1285
23.4M
    if (n_left_px > 0) {
1286
280M
      for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1287
21.7M
      if (i < txhpx) memset(&left_col[i], left_col[i - 1], txhpx - i);
1288
21.7M
    } else if (n_top_px > 0) {
1289
1.70M
      memset(left_col, above_ref[0], txhpx);
1290
1.70M
    }
1291
23.4M
  }
1292
1293
23.4M
  if (need_above) {
1294
23.4M
    memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1295
23.4M
    if (n_top_px > 0) {
1296
22.8M
      memcpy(above_row, above_ref, n_top_px);
1297
22.8M
      i = n_top_px;
1298
22.8M
      if (i < txwpx) memset(&above_row[i], above_row[i - 1], txwpx - i);
1299
22.8M
    } else if (n_left_px > 0) {
1300
598k
      memset(above_row, left_ref[0], txwpx);
1301
598k
    }
1302
23.4M
  }
1303
1304
23.4M
  if (need_above_left) {
1305
4.98M
    if (n_top_px > 0 && n_left_px > 0) {
1306
4.49M
      above_row[-1] = above_ref[-1];
1307
4.49M
    } else if (n_top_px > 0) {
1308
381k
      above_row[-1] = above_ref[0];
1309
381k
    } else if (n_left_px > 0) {
1310
101k
      above_row[-1] = left_ref[0];
1311
101k
    } else {
1312
2.02k
      above_row[-1] = 128;
1313
2.02k
    }
1314
4.98M
    left_col[-1] = above_row[-1];
1315
4.98M
  }
1316
1317
23.4M
  if (mode == DC_PRED) {
1318
13.5M
    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1319
13.5M
                                                  left_col);
1320
13.5M
  } else {
1321
9.97M
    pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1322
9.97M
  }
1323
23.4M
}
1324
1325
#if CONFIG_AV1_HIGHBITDEPTH
1326
7.33M
void av1_highbd_filter_intra_edge_c(uint16_t *p, int sz, int strength) {
1327
7.33M
  if (!strength) return;
1328
1329
4.07M
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1330
4.07M
                                                         { 0, 5, 6, 5, 0 },
1331
4.07M
                                                         { 2, 4, 4, 4, 2 } };
1332
4.07M
  const int filt = strength - 1;
1333
4.07M
  uint16_t edge[129];
1334
1335
4.07M
  memcpy(edge, p, sz * sizeof(*p));
1336
70.1M
  for (int i = 1; i < sz; i++) {
1337
66.1M
    int s = 0;
1338
396M
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1339
330M
      int k = i - 2 + j;
1340
330M
      k = (k < 0) ? 0 : k;
1341
330M
      k = (k > sz - 1) ? sz - 1 : k;
1342
330M
      s += edge[k] * kernel[filt][j];
1343
330M
    }
1344
66.1M
    s = (s + 8) >> 4;
1345
66.1M
    p[i] = s;
1346
66.1M
  }
1347
4.07M
}
1348
1349
static void highbd_filter_intra_edge_corner(uint16_t *p_above,
1350
590k
                                            uint16_t *p_left) {
1351
590k
  const int kernel[3] = { 5, 6, 5 };
1352
1353
590k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1354
590k
          (p_above[0] * kernel[2]);
1355
590k
  s = (s + 8) >> 4;
1356
590k
  p_above[-1] = s;
1357
590k
  p_left[-1] = s;
1358
590k
}
1359
1360
2.73M
void av1_highbd_upsample_intra_edge_c(uint16_t *p, int sz, int bd) {
1361
  // interpolate half-sample positions
1362
2.73M
  assert(sz <= MAX_UPSAMPLE_SZ);
1363
1364
2.73M
  uint16_t in[MAX_UPSAMPLE_SZ + 3];
1365
  // copy p[-1..(sz-1)] and extend first and last samples
1366
2.73M
  in[0] = p[-1];
1367
2.73M
  in[1] = p[-1];
1368
23.9M
  for (int i = 0; i < sz; i++) {
1369
21.2M
    in[i + 2] = p[i];
1370
21.2M
  }
1371
2.73M
  in[sz + 2] = p[sz - 1];
1372
1373
  // interpolate half-sample edge positions
1374
2.73M
  p[-2] = in[0];
1375
23.9M
  for (int i = 0; i < sz; i++) {
1376
21.2M
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1377
21.2M
    s = (s + 8) >> 4;
1378
21.2M
    s = clip_pixel_highbd(s, bd);
1379
21.2M
    p[2 * i - 1] = s;
1380
21.2M
    p[2 * i] = in[i + 2];
1381
21.2M
  }
1382
2.73M
}
1383
1384
static void highbd_build_directional_and_filter_intra_predictors(
1385
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1386
    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1387
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1388
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1389
10.3M
    int bit_depth) {
1390
10.3M
  int i;
1391
10.3M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1392
10.3M
  const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1393
10.3M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1394
10.3M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1395
10.3M
  uint16_t *const above_row = above_data + 16;
1396
10.3M
  uint16_t *const left_col = left_data + 16;
1397
10.3M
  const int txwpx = tx_size_wide[tx_size];
1398
10.3M
  const int txhpx = tx_size_high[tx_size];
1399
10.3M
  int need_left = extend_modes[mode] & NEED_LEFT;
1400
10.3M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1401
10.3M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1402
10.3M
  const uint16_t *above_ref = ref - ref_stride;
1403
10.3M
  const uint16_t *left_ref = ref - 1;
1404
10.3M
  const int is_dr_mode = av1_is_directional_mode(mode);
1405
10.3M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1406
10.3M
  assert(use_filter_intra || is_dr_mode);
1407
10.3M
  const int base = 128 << (bit_depth - 8);
1408
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1409
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1410
  // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1411
  // seen to be the potential reason for this issue.
1412
10.3M
  aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1413
10.3M
  aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1414
1415
  // The default values if ref pixels are not available:
1416
  // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1417
  // base+1   A      B  ..     Y      Z
1418
  // base+1   C      D  ..     W      X
1419
  // base+1   E      F  ..     U      V
1420
  // base+1   G      H  ..     S      T      T      T      T      T
1421
1422
10.3M
  if (is_dr_mode) {
1423
8.41M
    if (p_angle <= 90)
1424
2.29M
      need_above = 1, need_left = 0, need_above_left = 1;
1425
6.11M
    else if (p_angle < 180)
1426
2.98M
      need_above = 1, need_left = 1, need_above_left = 1;
1427
3.12M
    else
1428
3.12M
      need_above = 0, need_left = 1, need_above_left = 1;
1429
8.41M
  }
1430
10.3M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1431
1432
10.3M
  assert(n_top_px >= 0);
1433
10.3M
  assert(n_topright_px >= -1);
1434
10.3M
  assert(n_left_px >= 0);
1435
10.3M
  assert(n_bottomleft_px >= -1);
1436
1437
10.3M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1438
222k
    int val;
1439
222k
    if (need_left) {
1440
150k
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1441
150k
    } else {
1442
72.2k
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1443
72.2k
    }
1444
4.89M
    for (i = 0; i < txhpx; ++i) {
1445
4.66M
      aom_memset16(dst, val, txwpx);
1446
4.66M
      dst += dst_stride;
1447
4.66M
    }
1448
222k
    return;
1449
222k
  }
1450
1451
  // NEED_LEFT
1452
10.1M
  if (need_left) {
1453
7.90M
    const int num_left_pixels_needed =
1454
7.90M
        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1455
7.90M
    i = 0;
1456
7.90M
    if (n_left_px > 0) {
1457
79.5M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1458
7.72M
      if (n_bottomleft_px > 0) {
1459
501k
        assert(i == txhpx);
1460
5.11M
        for (; i < txhpx + n_bottomleft_px; i++)
1461
4.61M
          left_col[i] = left_ref[i * ref_stride];
1462
501k
      }
1463
7.72M
      if (i < num_left_pixels_needed)
1464
1.43M
        aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1465
7.72M
    } else if (n_top_px > 0) {
1466
177k
      aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1467
177k
    }
1468
7.90M
  }
1469
1470
  // NEED_ABOVE
1471
10.1M
  if (need_above) {
1472
7.14M
    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1473
7.14M
    if (n_top_px > 0) {
1474
6.99M
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1475
6.99M
      i = n_top_px;
1476
6.99M
      if (n_topright_px > 0) {
1477
912k
        assert(n_top_px == txwpx);
1478
912k
        memcpy(above_row + txwpx, above_ref + txwpx,
1479
912k
               n_topright_px * sizeof(above_ref[0]));
1480
912k
        i += n_topright_px;
1481
912k
      }
1482
6.99M
      if (i < num_top_pixels_needed)
1483
732k
        aom_memset16(&above_row[i], above_row[i - 1],
1484
732k
                     num_top_pixels_needed - i);
1485
6.99M
    } else if (n_left_px > 0) {
1486
140k
      aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1487
140k
    }
1488
7.14M
  }
1489
1490
10.1M
  if (need_above_left) {
1491
10.1M
    if (n_top_px > 0 && n_left_px > 0) {
1492
9.58M
      above_row[-1] = above_ref[-1];
1493
9.58M
    } else if (n_top_px > 0) {
1494
295k
      above_row[-1] = above_ref[0];
1495
295k
    } else if (n_left_px > 0) {
1496
242k
      above_row[-1] = left_ref[0];
1497
242k
    } else {
1498
7.64k
      above_row[-1] = base;
1499
7.64k
    }
1500
10.1M
    left_col[-1] = above_row[-1];
1501
10.1M
  }
1502
1503
10.1M
  if (use_filter_intra) {
1504
1.93M
    highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1505
1.93M
                                  filter_intra_mode, bit_depth);
1506
1.93M
    return;
1507
1.93M
  }
1508
1509
10.1M
  assert(is_dr_mode);
1510
8.18M
  int upsample_above = 0;
1511
8.18M
  int upsample_left = 0;
1512
8.18M
  if (!disable_edge_filter) {
1513
6.60M
    const int need_right = p_angle < 90;
1514
6.60M
    const int need_bottom = p_angle > 180;
1515
6.60M
    if (p_angle != 90 && p_angle != 180) {
1516
5.00M
      assert(need_above_left);
1517
5.00M
      const int ab_le = 1;
1518
5.00M
      if (need_above && need_left && (txwpx + txhpx >= 24)) {
1519
590k
        highbd_filter_intra_edge_corner(above_row, left_col);
1520
590k
      }
1521
5.00M
      if (need_above && n_top_px > 0) {
1522
3.49M
        const int strength = intra_edge_filter_strength(
1523
3.49M
            txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1524
3.49M
        const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1525
3.49M
        av1_highbd_filter_intra_edge(above_row - ab_le, n_px, strength);
1526
3.49M
      }
1527
5.00M
      if (need_left && n_left_px > 0) {
1528
3.83M
        const int strength = intra_edge_filter_strength(
1529
3.83M
            txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1530
3.83M
        const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1531
3.83M
        av1_highbd_filter_intra_edge(left_col - ab_le, n_px, strength);
1532
3.83M
      }
1533
5.00M
    }
1534
6.60M
    upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1535
6.60M
                                                 intra_edge_filter_type);
1536
6.60M
    if (need_above && upsample_above) {
1537
1.05M
      const int n_px = txwpx + (need_right ? txhpx : 0);
1538
1.05M
      av1_highbd_upsample_intra_edge(above_row, n_px, bit_depth);
1539
1.05M
    }
1540
6.60M
    upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1541
6.60M
                                                intra_edge_filter_type);
1542
6.60M
    if (need_left && upsample_left) {
1543
1.67M
      const int n_px = txhpx + (need_bottom ? txwpx : 0);
1544
1.67M
      av1_highbd_upsample_intra_edge(left_col, n_px, bit_depth);
1545
1.67M
    }
1546
6.60M
  }
1547
8.18M
  highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1548
8.18M
                      upsample_above, upsample_left, p_angle, bit_depth);
1549
8.18M
}
1550
1551
// For HBD encode/decode, this function generates the pred data of a given
1552
// block for non-directional intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H,
1553
// SMOOTH_V and PAETH).
1554
static void highbd_build_non_directional_intra_predictors(
1555
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1556
    PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px,
1557
23.8M
    int bit_depth) {
1558
23.8M
  int i = 0;
1559
23.8M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1560
23.8M
  const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1561
23.8M
  const int txwpx = tx_size_wide[tx_size];
1562
23.8M
  const int txhpx = tx_size_high[tx_size];
1563
23.8M
  int need_left = extend_modes[mode] & NEED_LEFT;
1564
23.8M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1565
23.8M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1566
23.8M
  const uint16_t *above_ref = ref - ref_stride;
1567
23.8M
  const uint16_t *left_ref = ref - 1;
1568
23.8M
  const int base = 128 << (bit_depth - 8);
1569
1570
23.8M
  assert(n_top_px >= 0);
1571
23.8M
  assert(n_left_px >= 0);
1572
23.8M
  assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1573
23.8M
         mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1574
1575
23.8M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1576
0
    int val = 0;
1577
0
    if (need_left) {
1578
0
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1579
0
    } else {
1580
0
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1581
0
    }
1582
0
    for (i = 0; i < txhpx; ++i) {
1583
0
      aom_memset16(dst, val, txwpx);
1584
0
      dst += dst_stride;
1585
0
    }
1586
0
    return;
1587
0
  }
1588
1589
23.8M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1590
23.8M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1591
23.8M
  uint16_t *const above_row = above_data + 16;
1592
23.8M
  uint16_t *const left_col = left_data + 16;
1593
1594
23.8M
  if (need_left) {
1595
23.8M
    aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1596
23.8M
    if (n_left_px > 0) {
1597
271M
      for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1598
21.9M
      if (i < txhpx) aom_memset16(&left_col[i], left_col[i - 1], txhpx - i);
1599
21.9M
    } else if (n_top_px > 0) {
1600
1.79M
      aom_memset16(left_col, above_ref[0], txhpx);
1601
1.79M
    }
1602
23.8M
  }
1603
1604
23.8M
  if (need_above) {
1605
23.8M
    aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1606
23.8M
    if (n_top_px > 0) {
1607
23.0M
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1608
23.0M
      i = n_top_px;
1609
23.0M
      if (i < txwpx) aom_memset16(&above_row[i], above_row[i - 1], (txwpx - i));
1610
23.0M
    } else if (n_left_px > 0) {
1611
737k
      aom_memset16(above_row, left_ref[0], txwpx);
1612
737k
    }
1613
23.8M
  }
1614
1615
23.8M
  if (need_above_left) {
1616
6.23M
    if (n_top_px > 0 && n_left_px > 0) {
1617
5.68M
      above_row[-1] = above_ref[-1];
1618
5.68M
    } else if (n_top_px > 0) {
1619
436k
      above_row[-1] = above_ref[0];
1620
436k
    } else if (n_left_px > 0) {
1621
106k
      above_row[-1] = left_ref[0];
1622
106k
    } else {
1623
3.36k
      above_row[-1] = base;
1624
3.36k
    }
1625
6.23M
    left_col[-1] = above_row[-1];
1626
6.23M
  }
1627
1628
23.8M
  if (mode == DC_PRED) {
1629
13.2M
    dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1630
13.2M
        dst, dst_stride, above_row, left_col, bit_depth);
1631
13.2M
  } else {
1632
10.5M
    pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1633
10.5M
  }
1634
23.8M
}
1635
#endif  // CONFIG_AV1_HIGHBITDEPTH
1636
1637
static inline BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1638
3.27M
                                            int subsampling_y) {
1639
3.27M
  assert(subsampling_x >= 0 && subsampling_x < 2);
1640
3.27M
  assert(subsampling_y >= 0 && subsampling_y < 2);
1641
3.27M
  BLOCK_SIZE bs = bsize;
1642
3.27M
  switch (bsize) {
1643
48.4k
    case BLOCK_4X4:
1644
48.4k
      if (subsampling_x == 1 && subsampling_y == 1)
1645
48.2k
        bs = BLOCK_8X8;
1646
234
      else if (subsampling_x == 1)
1647
234
        bs = BLOCK_8X4;
1648
0
      else if (subsampling_y == 1)
1649
0
        bs = BLOCK_4X8;
1650
48.4k
      break;
1651
96.0k
    case BLOCK_4X8:
1652
96.0k
      if (subsampling_x == 1 && subsampling_y == 1)
1653
96.0k
        bs = BLOCK_8X8;
1654
0
      else if (subsampling_x == 1)
1655
0
        bs = BLOCK_8X8;
1656
0
      else if (subsampling_y == 1)
1657
0
        bs = BLOCK_4X8;
1658
96.0k
      break;
1659
120k
    case BLOCK_8X4:
1660
120k
      if (subsampling_x == 1 && subsampling_y == 1)
1661
120k
        bs = BLOCK_8X8;
1662
458
      else if (subsampling_x == 1)
1663
458
        bs = BLOCK_8X4;
1664
0
      else if (subsampling_y == 1)
1665
0
        bs = BLOCK_8X8;
1666
120k
      break;
1667
72.9k
    case BLOCK_4X16:
1668
72.9k
      if (subsampling_x == 1 && subsampling_y == 1)
1669
72.9k
        bs = BLOCK_8X16;
1670
0
      else if (subsampling_x == 1)
1671
0
        bs = BLOCK_8X16;
1672
0
      else if (subsampling_y == 1)
1673
0
        bs = BLOCK_4X16;
1674
72.9k
      break;
1675
143k
    case BLOCK_16X4:
1676
143k
      if (subsampling_x == 1 && subsampling_y == 1)
1677
143k
        bs = BLOCK_16X8;
1678
576
      else if (subsampling_x == 1)
1679
576
        bs = BLOCK_16X4;
1680
0
      else if (subsampling_y == 1)
1681
0
        bs = BLOCK_16X8;
1682
143k
      break;
1683
2.79M
    default: break;
1684
3.27M
  }
1685
3.27M
  return bs;
1686
3.27M
}
1687
1688
void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1689
                             int enable_intra_edge_filter, int wpx, int hpx,
1690
                             TX_SIZE tx_size, PREDICTION_MODE mode,
1691
                             int angle_delta, int use_palette,
1692
                             FILTER_INTRA_MODE filter_intra_mode,
1693
                             const uint8_t *ref, int ref_stride, uint8_t *dst,
1694
                             int dst_stride, int col_off, int row_off,
1695
68.7M
                             int plane) {
1696
68.7M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1697
68.7M
  const int txwpx = tx_size_wide[tx_size];
1698
68.7M
  const int txhpx = tx_size_high[tx_size];
1699
68.7M
  const int x = col_off << MI_SIZE_LOG2;
1700
68.7M
  const int y = row_off << MI_SIZE_LOG2;
1701
68.7M
  const int is_hbd = is_cur_buf_hbd(xd);
1702
1703
68.7M
  assert(mode < INTRA_MODES);
1704
1705
68.7M
  if (use_palette) {
1706
928k
    int r, c;
1707
928k
    const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1708
928k
                               xd->color_index_map_offset[plane != 0];
1709
928k
    const uint16_t *const palette =
1710
928k
        mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1711
928k
    if (is_hbd) {
1712
96.1k
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1713
771k
      for (r = 0; r < txhpx; ++r) {
1714
8.50M
        for (c = 0; c < txwpx; ++c) {
1715
7.83M
          dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1716
7.83M
        }
1717
675k
      }
1718
831k
    } else {
1719
8.22M
      for (r = 0; r < txhpx; ++r) {
1720
109M
        for (c = 0; c < txwpx; ++c) {
1721
102M
          dst[r * dst_stride + c] =
1722
102M
              (uint8_t)palette[map[(r + y) * wpx + c + x]];
1723
102M
        }
1724
7.39M
      }
1725
831k
    }
1726
928k
    return;
1727
928k
  }
1728
1729
67.8M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
1730
67.8M
  const int ss_x = pd->subsampling_x;
1731
67.8M
  const int ss_y = pd->subsampling_y;
1732
67.8M
  const int have_top =
1733
67.8M
      row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1734
67.8M
  const int have_left =
1735
67.8M
      col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1736
1737
  // Distance between the right edge of this prediction block to
1738
  // the frame right edge
1739
67.8M
  const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1740
  // Distance between the bottom edge of this prediction block to
1741
  // the frame bottom edge
1742
67.8M
  const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1743
67.8M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1744
67.8M
  const int is_dr_mode = av1_is_directional_mode(mode);
1745
1746
  // The computations in this function, as well as in build_intra_predictors(),
1747
  // are generalized for all intra modes. Some of these operations are not
1748
  // required since non-directional intra modes (i.e., DC, SMOOTH, SMOOTH_H,
1749
  // SMOOTH_V, and PAETH) specifically require left and top neighbors. Hence, a
1750
  // separate function build_non_directional_intra_predictors() is introduced
1751
  // for these modes to avoid redundant computations while generating pred data.
1752
1753
67.8M
  const int n_top_px = have_top ? AOMMIN(txwpx, xr + txwpx) : 0;
1754
67.8M
  const int n_left_px = have_left ? AOMMIN(txhpx, yd + txhpx) : 0;
1755
67.8M
  if (!use_filter_intra && !is_dr_mode) {
1756
47.3M
#if CONFIG_AV1_HIGHBITDEPTH
1757
47.3M
    if (is_hbd) {
1758
23.8M
      highbd_build_non_directional_intra_predictors(
1759
23.8M
          ref, ref_stride, dst, dst_stride, mode, tx_size, n_top_px, n_left_px,
1760
23.8M
          xd->bd);
1761
23.8M
      return;
1762
23.8M
    }
1763
23.4M
#endif  // CONFIG_AV1_HIGHBITDEPTH
1764
23.4M
    build_non_directional_intra_predictors(ref, ref_stride, dst, dst_stride,
1765
23.4M
                                           mode, tx_size, n_top_px, n_left_px);
1766
23.4M
    return;
1767
47.3M
  }
1768
1769
20.4M
  const int txw = tx_size_wide_unit[tx_size];
1770
20.4M
  const int txh = tx_size_high_unit[tx_size];
1771
20.4M
  const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1772
20.4M
  const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1773
20.4M
  const int right_available =
1774
20.4M
      mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1775
20.4M
  const int bottom_available =
1776
20.4M
      (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1777
1778
20.4M
  const PARTITION_TYPE partition = mbmi->partition;
1779
1780
20.4M
  BLOCK_SIZE bsize = mbmi->bsize;
1781
  // force 4x4 chroma component block size.
1782
20.4M
  if (ss_x || ss_y) {
1783
3.27M
    bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1784
3.27M
  }
1785
1786
20.4M
  int p_angle = 0;
1787
20.4M
  int need_top_right = extend_modes[mode] & NEED_ABOVERIGHT;
1788
20.4M
  int need_bottom_left = extend_modes[mode] & NEED_BOTTOMLEFT;
1789
1790
20.4M
  if (use_filter_intra) {
1791
3.52M
    need_top_right = 0;
1792
3.52M
    need_bottom_left = 0;
1793
3.52M
  }
1794
20.4M
  if (is_dr_mode) {
1795
16.9M
    p_angle = mode_to_angle_map[mode] + angle_delta;
1796
16.9M
    need_top_right = p_angle < 90;
1797
16.9M
    need_bottom_left = p_angle > 180;
1798
16.9M
  }
1799
1800
  // Possible states for have_top_right(TR) and have_bottom_left(BL)
1801
  // -1 : TR and BL are not needed
1802
  //  0 : TR and BL are needed but not available
1803
  // > 0 : TR and BL are needed and pixels are available
1804
20.4M
  const int have_top_right =
1805
20.4M
      need_top_right ? has_top_right(sb_size, bsize, mi_row, mi_col, have_top,
1806
3.10M
                                     right_available, partition, tx_size,
1807
3.10M
                                     row_off, col_off, ss_x, ss_y)
1808
20.4M
                     : -1;
1809
20.4M
  const int have_bottom_left =
1810
20.4M
      need_bottom_left ? has_bottom_left(sb_size, bsize, mi_row, mi_col,
1811
3.54M
                                         bottom_available, have_left, partition,
1812
3.54M
                                         tx_size, row_off, col_off, ss_x, ss_y)
1813
20.4M
                       : -1;
1814
1815
20.4M
  const int disable_edge_filter = !enable_intra_edge_filter;
1816
20.4M
  const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1817
20.4M
  const int n_topright_px =
1818
20.4M
      have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right;
1819
20.4M
  const int n_bottomleft_px =
1820
20.4M
      have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left;
1821
20.4M
#if CONFIG_AV1_HIGHBITDEPTH
1822
20.4M
  if (is_hbd) {
1823
10.3M
    highbd_build_directional_and_filter_intra_predictors(
1824
10.3M
        ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1825
10.3M
        tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1826
10.3M
        n_bottomleft_px, intra_edge_filter_type, xd->bd);
1827
10.3M
    return;
1828
10.3M
  }
1829
10.1M
#endif
1830
10.1M
  build_directional_and_filter_intra_predictors(
1831
10.1M
      ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1832
10.1M
      tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1833
10.1M
      n_bottomleft_px, intra_edge_filter_type);
1834
10.1M
}
1835
1836
void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1837
                                    int plane, int blk_col, int blk_row,
1838
68.7M
                                    TX_SIZE tx_size) {
1839
68.7M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1840
68.7M
  struct macroblockd_plane *const pd = &xd->plane[plane];
1841
68.7M
  const int dst_stride = pd->dst.stride;
1842
68.7M
  uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1843
68.7M
  const PREDICTION_MODE mode =
1844
68.7M
      (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1845
68.7M
  const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1846
68.7M
  const FILTER_INTRA_MODE filter_intra_mode =
1847
68.7M
      (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1848
68.7M
          ? mbmi->filter_intra_mode_info.filter_intra_mode
1849
68.7M
          : FILTER_INTRA_MODES;
1850
68.7M
  const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1851
68.7M
  const SequenceHeader *seq_params = cm->seq_params;
1852
1853
68.7M
#if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
1854
68.7M
  if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1855
6.24M
#if CONFIG_DEBUG
1856
6.24M
    assert(is_cfl_allowed(xd));
1857
6.24M
    const BLOCK_SIZE plane_bsize =
1858
6.24M
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1859
6.24M
    (void)plane_bsize;
1860
6.24M
    assert(plane_bsize < BLOCK_SIZES_ALL);
1861
6.24M
    if (!xd->lossless[mbmi->segment_id]) {
1862
6.22M
      assert(blk_col == 0);
1863
6.22M
      assert(blk_row == 0);
1864
6.22M
      assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1865
6.22M
      assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1866
6.22M
    }
1867
6.24M
#endif
1868
6.24M
    CFL_CTX *const cfl = &xd->cfl;
1869
6.24M
    CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1870
6.24M
    if (!cfl->dc_pred_is_cached[pred_plane]) {
1871
6.24M
      av1_predict_intra_block(xd, seq_params->sb_size,
1872
6.24M
                              seq_params->enable_intra_edge_filter, pd->width,
1873
6.24M
                              pd->height, tx_size, mode, angle_delta,
1874
6.24M
                              use_palette, filter_intra_mode, dst, dst_stride,
1875
6.24M
                              dst, dst_stride, blk_col, blk_row, plane);
1876
6.24M
      if (cfl->use_dc_pred_cache) {
1877
0
        cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1878
0
        cfl->dc_pred_is_cached[pred_plane] = true;
1879
0
      }
1880
6.24M
    } else {
1881
0
      cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1882
0
    }
1883
6.24M
    av1_cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1884
6.24M
    return;
1885
6.24M
  }
1886
62.4M
#endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
1887
62.4M
  av1_predict_intra_block(
1888
62.4M
      xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1889
62.4M
      pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1890
62.4M
      dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1891
62.4M
}
1892
1893
39.3k
void av1_init_intra_predictors(void) {
1894
39.3k
  aom_once(init_intra_predictors_internal);
1895
39.3k
}