Coverage Report

Created: 2026-02-14 07:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/common/reconintra.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <math.h>
14
15
#include "config/aom_config.h"
16
#include "config/aom_dsp_rtcd.h"
17
#include "config/av1_rtcd.h"
18
19
#include "aom_dsp/aom_dsp_common.h"
20
#include "aom_mem/aom_mem.h"
21
#include "aom_ports/aom_once.h"
22
#include "aom_ports/mem.h"
23
#include "av1/common/av1_common_int.h"
24
#include "av1/common/cfl.h"
25
#include "av1/common/reconintra.h"
26
27
enum {
28
  NEED_LEFT = 1 << 1,
29
  NEED_ABOVE = 1 << 2,
30
  NEED_ABOVERIGHT = 1 << 3,
31
  NEED_ABOVELEFT = 1 << 4,
32
  NEED_BOTTOMLEFT = 1 << 5,
33
};
34
35
#define INTRA_EDGE_FILT 3
36
107M
#define INTRA_EDGE_TAPS 5
37
#define MAX_UPSAMPLE_SZ 16
38
35.5M
#define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
39
40
static const uint8_t extend_modes[INTRA_MODES] = {
41
  NEED_ABOVE | NEED_LEFT,                   // DC
42
  NEED_ABOVE,                               // V
43
  NEED_LEFT,                                // H
44
  NEED_ABOVE | NEED_ABOVERIGHT,             // D45
45
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
46
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
47
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
48
  NEED_LEFT | NEED_BOTTOMLEFT,              // D203
49
  NEED_ABOVE | NEED_ABOVERIGHT,             // D67
50
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH
51
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
52
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
53
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
54
};
55
56
// Tables to store if the top-right reference pixels are available. The flags
57
// are represented with bits, packed into 8-bit integers. E.g., for the 32x32
58
// blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
59
// order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
60
// i.e. (table[10 / 8] >> (10 % 8)) & 1.
61
//       . . . .
62
//       . . . .
63
//       . . o .
64
//       . . . .
65
static uint8_t has_tr_4x4[128] = {
66
  255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70
  255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
74
};
75
static uint8_t has_tr_4x8[64] = {
76
  255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
77
  119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
78
  127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
79
  119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
80
  119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
81
};
82
static uint8_t has_tr_8x4[64] = {
83
  255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85
  255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
87
};
88
static uint8_t has_tr_8x8[32] = {
89
  255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90
  255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
91
};
92
static uint8_t has_tr_8x16[16] = {
93
  255, 255, 119, 119, 127, 127, 119, 119,
94
  255, 127, 119, 119, 127, 127, 119, 119,
95
};
96
static uint8_t has_tr_16x8[16] = {
97
  255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
98
};
99
static uint8_t has_tr_16x16[8] = {
100
  255, 85, 119, 85, 127, 85, 119, 85,
101
};
102
static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
103
static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
104
static uint8_t has_tr_32x32[2] = { 95, 87 };
105
static uint8_t has_tr_32x64[1] = { 127 };
106
static uint8_t has_tr_64x32[1] = { 19 };
107
static uint8_t has_tr_64x64[1] = { 7 };
108
static uint8_t has_tr_64x128[1] = { 3 };
109
static uint8_t has_tr_128x64[1] = { 1 };
110
static uint8_t has_tr_128x128[1] = { 1 };
111
static uint8_t has_tr_4x16[32] = {
112
  255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
113
  127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
114
  127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
115
};
116
static uint8_t has_tr_16x4[32] = {
117
  255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118
  127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
119
};
120
static uint8_t has_tr_8x32[8] = {
121
  255, 255, 127, 127, 255, 127, 127, 127,
122
};
123
static uint8_t has_tr_32x8[8] = {
124
  15, 0, 5, 0, 7, 0, 5, 0,
125
};
126
static uint8_t has_tr_16x64[2] = { 255, 127 };
127
static uint8_t has_tr_64x16[2] = { 3, 1 };
128
129
static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
130
  // 4X4
131
  has_tr_4x4,
132
  // 4X8,       8X4,            8X8
133
  has_tr_4x8, has_tr_8x4, has_tr_8x8,
134
  // 8X16,      16X8,           16X16
135
  has_tr_8x16, has_tr_16x8, has_tr_16x16,
136
  // 16X32,     32X16,          32X32
137
  has_tr_16x32, has_tr_32x16, has_tr_32x32,
138
  // 32X64,     64X32,          64X64
139
  has_tr_32x64, has_tr_64x32, has_tr_64x64,
140
  // 64x128,    128x64,         128x128
141
  has_tr_64x128, has_tr_128x64, has_tr_128x128,
142
  // 4x16,      16x4,            8x32
143
  has_tr_4x16, has_tr_16x4, has_tr_8x32,
144
  // 32x8,      16x64,           64x16
145
  has_tr_32x8, has_tr_16x64, has_tr_64x16
146
};
147
148
static uint8_t has_tr_vert_8x8[32] = {
149
  255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150
  255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
151
};
152
static uint8_t has_tr_vert_16x16[8] = {
153
  255, 0, 119, 0, 127, 0, 119, 0,
154
};
155
static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
156
static uint8_t has_tr_vert_64x64[1] = { 3 };
157
158
// The _vert_* tables are like the ordinary tables above, but describe the
159
// order we visit square blocks when doing a PARTITION_VERT_A or
160
// PARTITION_VERT_B. This is the same order as normal except for on the last
161
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
162
// as a pair of squares, which means that these tables work correctly for both
163
// mixed vertical partition types.
164
//
165
// There are tables for each of the square sizes. Vertical rectangles (like
166
// BLOCK_16X32) use their respective "non-vert" table
167
static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
168
  // 4X4
169
  NULL,
170
  // 4X8,      8X4,         8X8
171
  has_tr_4x8, NULL, has_tr_vert_8x8,
172
  // 8X16,     16X8,        16X16
173
  has_tr_8x16, NULL, has_tr_vert_16x16,
174
  // 16X32,    32X16,       32X32
175
  has_tr_16x32, NULL, has_tr_vert_32x32,
176
  // 32X64,    64X32,       64X64
177
  has_tr_32x64, NULL, has_tr_vert_64x64,
178
  // 64x128,   128x64,      128x128
179
  has_tr_64x128, NULL, has_tr_128x128
180
};
181
182
static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
183
217k
                                       BLOCK_SIZE bsize) {
184
217k
  const uint8_t *ret = NULL;
185
  // If this is a mixed vertical partition, look up bsize in orders_vert.
186
217k
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
187
17.7k
    assert(bsize < BLOCK_SIZES);
188
17.7k
    ret = has_tr_vert_tables[bsize];
189
199k
  } else {
190
199k
    ret = has_tr_tables[bsize];
191
199k
  }
192
217k
  assert(ret);
193
217k
  return ret;
194
217k
}
195
196
static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
197
                         int mi_col, int top_available, int right_available,
198
                         PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
199
521k
                         int col_off, int ss_x, int ss_y) {
200
521k
  if (!top_available || !right_available) return 0;
201
202
479k
  const int bw_unit = mi_size_wide[bsize];
203
479k
  const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
204
479k
  const int top_right_count_unit = tx_size_wide_unit[txsz];
205
206
479k
  if (row_off > 0) {  // Just need to check if enough pixels on the right.
207
148k
    if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
208
      // Special case: For 128x128 blocks, the transform unit whose
209
      // top-right corner is at the center of the block does in fact have
210
      // pixels available at its top-right corner.
211
53.8k
      if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
212
16.7k
          col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
213
6.32k
        return 1;
214
6.32k
      }
215
47.5k
      const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
216
47.5k
      const int col_off_64 = col_off % plane_bw_unit_64;
217
47.5k
      return col_off_64 + top_right_count_unit < plane_bw_unit_64;
218
53.8k
    }
219
94.8k
    return col_off + top_right_count_unit < plane_bw_unit;
220
330k
  } else {
221
    // All top-right pixels are in the block above, which is already available.
222
330k
    if (col_off + top_right_count_unit < plane_bw_unit) return 1;
223
224
294k
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
225
294k
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
226
294k
    const int sb_mi_size = mi_size_high[sb_size];
227
294k
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
228
294k
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
229
230
    // Top row of superblock: so top-right pixels are in the top and/or
231
    // top-right superblocks, both of which are already available.
232
294k
    if (blk_row_in_sb == 0) return 1;
233
234
    // Rightmost column of superblock (and not the top row): so top-right pixels
235
    // fall in the right superblock, which is not available yet.
236
253k
    if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
237
36.2k
      return 0;
238
36.2k
    }
239
240
    // General case (neither top row nor rightmost column): check if the
241
    // top-right block is coded before the current block.
242
217k
    const int this_blk_index =
243
217k
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
244
217k
        blk_col_in_sb + 0;
245
217k
    const int idx1 = this_blk_index / 8;
246
217k
    const int idx2 = this_blk_index % 8;
247
217k
    const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
248
217k
    return (has_tr_table[idx1] >> idx2) & 1;
249
253k
  }
250
479k
}
251
252
// Similar to the has_tr_* tables, but store if the bottom-left reference
253
// pixels are available.
254
static uint8_t has_bl_4x4[128] = {
255
  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
256
  85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
257
  17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
258
  85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
259
  0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
260
  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
261
  85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
262
};
263
static uint8_t has_bl_4x8[64] = {
264
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
265
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
266
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
267
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
268
};
269
static uint8_t has_bl_8x4[64] = {
270
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
271
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
272
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
273
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
274
};
275
static uint8_t has_bl_8x8[32] = {
276
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
278
};
279
static uint8_t has_bl_8x16[16] = {
280
  16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
281
};
282
static uint8_t has_bl_16x8[16] = {
283
  254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
284
};
285
static uint8_t has_bl_16x16[8] = {
286
  84, 16, 84, 0, 84, 16, 84, 0,
287
};
288
static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
289
static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
290
static uint8_t has_bl_32x32[2] = { 4, 4 };
291
static uint8_t has_bl_32x64[1] = { 0 };
292
static uint8_t has_bl_64x32[1] = { 34 };
293
static uint8_t has_bl_64x64[1] = { 0 };
294
static uint8_t has_bl_64x128[1] = { 0 };
295
static uint8_t has_bl_128x64[1] = { 0 };
296
static uint8_t has_bl_128x128[1] = { 0 };
297
static uint8_t has_bl_4x16[32] = {
298
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
300
};
301
static uint8_t has_bl_16x4[32] = {
302
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
304
};
305
static uint8_t has_bl_8x32[8] = {
306
  0, 1, 0, 0, 0, 1, 0, 0,
307
};
308
static uint8_t has_bl_32x8[8] = {
309
  238, 78, 238, 14, 238, 78, 238, 14,
310
};
311
static uint8_t has_bl_16x64[2] = { 0, 0 };
312
static uint8_t has_bl_64x16[2] = { 42, 42 };
313
314
static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
315
  // 4X4
316
  has_bl_4x4,
317
  // 4X8,         8X4,         8X8
318
  has_bl_4x8, has_bl_8x4, has_bl_8x8,
319
  // 8X16,        16X8,        16X16
320
  has_bl_8x16, has_bl_16x8, has_bl_16x16,
321
  // 16X32,       32X16,       32X32
322
  has_bl_16x32, has_bl_32x16, has_bl_32x32,
323
  // 32X64,       64X32,       64X64
324
  has_bl_32x64, has_bl_64x32, has_bl_64x64,
325
  // 64x128,      128x64,      128x128
326
  has_bl_64x128, has_bl_128x64, has_bl_128x128,
327
  // 4x16,        16x4,        8x32
328
  has_bl_4x16, has_bl_16x4, has_bl_8x32,
329
  // 32x8,        16x64,       64x16
330
  has_bl_32x8, has_bl_16x64, has_bl_64x16
331
};
332
333
static uint8_t has_bl_vert_8x8[32] = {
334
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
336
};
337
static uint8_t has_bl_vert_16x16[8] = {
338
  254, 16, 254, 0, 254, 16, 254, 0,
339
};
340
static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
341
static uint8_t has_bl_vert_64x64[1] = { 2 };
342
343
// The _vert_* tables are like the ordinary tables above, but describe the
344
// order we visit square blocks when doing a PARTITION_VERT_A or
345
// PARTITION_VERT_B. This is the same order as normal except for on the last
346
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
347
// as a pair of squares, which means that these tables work correctly for both
348
// mixed vertical partition types.
349
//
350
// There are tables for each of the square sizes. Vertical rectangles (like
351
// BLOCK_16X32) use their respective "non-vert" table
352
static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
353
  // 4X4
354
  NULL,
355
  // 4X8,     8X4,         8X8
356
  has_bl_4x8, NULL, has_bl_vert_8x8,
357
  // 8X16,    16X8,        16X16
358
  has_bl_8x16, NULL, has_bl_vert_16x16,
359
  // 16X32,   32X16,       32X32
360
  has_bl_16x32, NULL, has_bl_vert_32x32,
361
  // 32X64,   64X32,       64X64
362
  has_bl_32x64, NULL, has_bl_vert_64x64,
363
  // 64x128,  128x64,      128x128
364
  has_bl_64x128, NULL, has_bl_128x128
365
};
366
367
static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
368
209k
                                       BLOCK_SIZE bsize) {
369
209k
  const uint8_t *ret = NULL;
370
  // If this is a mixed vertical partition, look up bsize in orders_vert.
371
209k
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
372
17.0k
    assert(bsize < BLOCK_SIZES);
373
17.0k
    ret = has_bl_vert_tables[bsize];
374
192k
  } else {
375
192k
    ret = has_bl_tables[bsize];
376
192k
  }
377
209k
  assert(ret);
378
209k
  return ret;
379
209k
}
380
381
static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
382
                           int mi_col, int bottom_available, int left_available,
383
                           PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
384
533k
                           int col_off, int ss_x, int ss_y) {
385
533k
  if (!bottom_available || !left_available) return 0;
386
387
  // Special case for 128x* blocks, when col_off is half the block width.
388
  // This is needed because 128x* superblocks are divided into 64x* blocks in
389
  // raster order
390
501k
  if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
391
78.7k
    const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
392
78.7k
    const int col_off_64 = col_off % plane_bw_unit_64;
393
78.7k
    if (col_off_64 == 0) {
394
      // We are at the left edge of top-right or bottom-right 64x* block.
395
17.1k
      const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
396
17.1k
      const int row_off_64 = row_off % plane_bh_unit_64;
397
17.1k
      const int plane_bh_unit =
398
17.1k
          AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
399
      // Check if all bottom-left pixels are in the left 64x* block (which is
400
      // already coded).
401
17.1k
      return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
402
17.1k
    }
403
78.7k
  }
404
405
484k
  if (col_off > 0) {
406
    // Bottom-left pixels are in the bottom-left block, which is not available.
407
159k
    return 0;
408
324k
  } else {
409
324k
    const int bh_unit = mi_size_high[bsize];
410
324k
    const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
411
324k
    const int bottom_left_count_unit = tx_size_high_unit[txsz];
412
413
    // All bottom-left pixels are in the left block, which is already available.
414
324k
    if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
415
416
290k
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
417
290k
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
418
290k
    const int sb_mi_size = mi_size_high[sb_size];
419
290k
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
420
290k
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
421
422
    // Leftmost column of superblock: so bottom-left pixels maybe in the left
423
    // and/or bottom-left superblocks. But only the left superblock is
424
    // available, so check if all required pixels fall in that superblock.
425
290k
    if (blk_col_in_sb == 0) {
426
44.5k
      const int blk_start_row_off =
427
44.5k
          blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
428
44.5k
          ss_y;
429
44.5k
      const int row_off_in_sb = blk_start_row_off + row_off;
430
44.5k
      const int sb_height_unit = sb_mi_size >> ss_y;
431
44.5k
      return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
432
44.5k
    }
433
434
    // Bottom row of superblock (and not the leftmost column): so bottom-left
435
    // pixels fall in the bottom superblock, which is not available yet.
436
245k
    if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
437
438
    // General case (neither leftmost column nor bottom row): check if the
439
    // bottom-left block is coded before the current block.
440
209k
    const int this_blk_index =
441
209k
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
442
209k
        blk_col_in_sb + 0;
443
209k
    const int idx1 = this_blk_index / 8;
444
209k
    const int idx2 = this_blk_index % 8;
445
209k
    const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
446
209k
    return (has_bl_table[idx1] >> idx2) & 1;
447
245k
  }
448
484k
}
449
450
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
451
                              const uint8_t *above, const uint8_t *left);
452
453
static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
454
static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
455
456
#if CONFIG_AV1_HIGHBITDEPTH
457
typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
458
                                   const uint16_t *above, const uint16_t *left,
459
                                   int bd);
460
static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
461
static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
462
#endif
463
464
2
static void init_intra_predictors_internal(void) {
465
2
  assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
466
467
#if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
468
#define INIT_RECTANGULAR(p, type)             \
469
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
470
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
471
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
472
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
473
  p[TX_16X32] = aom_##type##_predictor_16x32; \
474
  p[TX_32X16] = aom_##type##_predictor_32x16; \
475
  p[TX_32X64] = aom_##type##_predictor_32x64; \
476
  p[TX_64X32] = aom_##type##_predictor_64x32;
477
#else
478
2
#define INIT_RECTANGULAR(p, type)             \
479
40
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
480
40
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
481
40
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
482
40
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
483
40
  p[TX_16X32] = aom_##type##_predictor_16x32; \
484
40
  p[TX_32X16] = aom_##type##_predictor_32x16; \
485
40
  p[TX_32X64] = aom_##type##_predictor_32x64; \
486
40
  p[TX_64X32] = aom_##type##_predictor_64x32; \
487
40
  p[TX_4X16] = aom_##type##_predictor_4x16;   \
488
40
  p[TX_16X4] = aom_##type##_predictor_16x4;   \
489
40
  p[TX_8X32] = aom_##type##_predictor_8x32;   \
490
40
  p[TX_32X8] = aom_##type##_predictor_32x8;   \
491
40
  p[TX_16X64] = aom_##type##_predictor_16x64; \
492
40
  p[TX_64X16] = aom_##type##_predictor_64x16;
493
2
#endif  // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
494
495
2
#define INIT_NO_4X4(p, type)                  \
496
40
  p[TX_8X8] = aom_##type##_predictor_8x8;     \
497
40
  p[TX_16X16] = aom_##type##_predictor_16x16; \
498
40
  p[TX_32X32] = aom_##type##_predictor_32x32; \
499
40
  p[TX_64X64] = aom_##type##_predictor_64x64; \
500
40
  INIT_RECTANGULAR(p, type)
501
502
2
#define INIT_ALL_SIZES(p, type)           \
503
40
  p[TX_4X4] = aom_##type##_predictor_4x4; \
504
40
  INIT_NO_4X4(p, type)
505
506
2
  INIT_ALL_SIZES(pred[V_PRED], v)
507
2
  INIT_ALL_SIZES(pred[H_PRED], h)
508
2
  INIT_ALL_SIZES(pred[PAETH_PRED], paeth)
509
2
  INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth)
510
2
  INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v)
511
2
  INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h)
512
2
  INIT_ALL_SIZES(dc_pred[0][0], dc_128)
513
2
  INIT_ALL_SIZES(dc_pred[0][1], dc_top)
514
2
  INIT_ALL_SIZES(dc_pred[1][0], dc_left)
515
2
  INIT_ALL_SIZES(dc_pred[1][1], dc)
516
2
#if CONFIG_AV1_HIGHBITDEPTH
517
2
  INIT_ALL_SIZES(pred_high[V_PRED], highbd_v)
518
2
  INIT_ALL_SIZES(pred_high[H_PRED], highbd_h)
519
2
  INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth)
520
2
  INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth)
521
2
  INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v)
522
2
  INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h)
523
2
  INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128)
524
2
  INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top)
525
2
  INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left)
526
2
  INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc)
527
2
#endif
528
2
#undef intra_pred_allsizes
529
2
}
530
531
// Directional prediction, zone 1: 0 < angle < 90
532
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
533
                            const uint8_t *above, const uint8_t *left,
534
263k
                            int upsample_above, int dx, int dy) {
535
263k
  int r, c, x, base, shift, val;
536
537
263k
  (void)left;
538
263k
  (void)dy;
539
263k
  assert(dy == 1);
540
263k
  assert(dx > 0);
541
542
263k
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
543
263k
  const int frac_bits = 6 - upsample_above;
544
263k
  const int base_inc = 1 << upsample_above;
545
263k
  x = dx;
546
3.52M
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
547
3.26M
    base = x >> frac_bits;
548
3.26M
    shift = ((x << upsample_above) & 0x3F) >> 1;
549
550
3.26M
    if (base >= max_base_x) {
551
5.90k
      for (int i = r; i < bh; ++i) {
552
3.96k
        memset(dst, above[max_base_x], bw * sizeof(dst[0]));
553
3.96k
        dst += stride;
554
3.96k
      }
555
1.94k
      return;
556
1.94k
    }
557
558
78.6M
    for (c = 0; c < bw; ++c, base += base_inc) {
559
75.3M
      if (base < max_base_x) {
560
74.8M
        val = above[base] * (32 - shift) + above[base + 1] * shift;
561
74.8M
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
562
74.8M
      } else {
563
475k
        dst[c] = above[max_base_x];
564
475k
      }
565
75.3M
    }
566
3.25M
  }
567
263k
}
568
569
// Directional prediction, zone 2: 90 < angle < 180
570
void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
571
                            const uint8_t *above, const uint8_t *left,
572
                            int upsample_above, int upsample_left, int dx,
573
476k
                            int dy) {
574
476k
  assert(dx > 0);
575
476k
  assert(dy > 0);
576
577
476k
  const int min_base_x = -(1 << upsample_above);
578
476k
  const int min_base_y = -(1 << upsample_left);
579
476k
  (void)min_base_y;
580
476k
  const int frac_bits_x = 6 - upsample_above;
581
476k
  const int frac_bits_y = 6 - upsample_left;
582
583
5.97M
  for (int r = 0; r < bh; ++r) {
584
120M
    for (int c = 0; c < bw; ++c) {
585
115M
      int val;
586
115M
      int y = r + 1;
587
115M
      int x = (c << 6) - y * dx;
588
115M
      const int base_x = x >> frac_bits_x;
589
115M
      if (base_x >= min_base_x) {
590
57.0M
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
591
57.0M
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
592
57.0M
        val = ROUND_POWER_OF_TWO(val, 5);
593
58.1M
      } else {
594
58.1M
        x = c + 1;
595
58.1M
        y = (r << 6) - x * dy;
596
58.1M
        const int base_y = y >> frac_bits_y;
597
58.1M
        assert(base_y >= min_base_y);
598
58.1M
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
599
58.1M
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
600
58.1M
        val = ROUND_POWER_OF_TWO(val, 5);
601
58.1M
      }
602
115M
      dst[c] = val;
603
115M
    }
604
5.49M
    dst += stride;
605
5.49M
  }
606
476k
}
607
608
// Directional prediction, zone 3: 180 < angle < 270
609
void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
610
                            const uint8_t *above, const uint8_t *left,
611
249k
                            int upsample_left, int dx, int dy) {
612
249k
  int r, c, y, base, shift, val;
613
614
249k
  (void)above;
615
249k
  (void)dx;
616
617
249k
  assert(dx == 1);
618
249k
  assert(dy > 0);
619
620
249k
  const int max_base_y = (bw + bh - 1) << upsample_left;
621
249k
  const int frac_bits = 6 - upsample_left;
622
249k
  const int base_inc = 1 << upsample_left;
623
249k
  y = dy;
624
3.44M
  for (c = 0; c < bw; ++c, y += dy) {
625
3.19M
    base = y >> frac_bits;
626
3.19M
    shift = ((y << upsample_left) & 0x3F) >> 1;
627
628
69.7M
    for (r = 0; r < bh; ++r, base += base_inc) {
629
66.5M
      if (base < max_base_y) {
630
66.5M
        val = left[base] * (32 - shift) + left[base + 1] * shift;
631
66.5M
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
632
66.5M
      } else {
633
22
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
634
22
        break;
635
22
      }
636
66.5M
    }
637
3.19M
  }
638
249k
}
639
640
static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
641
                         const uint8_t *above, const uint8_t *left,
642
1.68M
                         int upsample_above, int upsample_left, int angle) {
643
1.68M
  const int dx = av1_get_dx(angle);
644
1.68M
  const int dy = av1_get_dy(angle);
645
1.68M
  const int bw = tx_size_wide[tx_size];
646
1.68M
  const int bh = tx_size_high[tx_size];
647
1.68M
  assert(angle > 0 && angle < 270);
648
649
1.68M
  if (angle > 0 && angle < 90) {
650
263k
    av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
651
263k
                         dy);
652
1.41M
  } else if (angle > 90 && angle < 180) {
653
476k
    av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
654
476k
                         upsample_left, dx, dy);
655
942k
  } else if (angle > 180 && angle < 270) {
656
249k
    av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
657
249k
                         dy);
658
693k
  } else if (angle == 90) {
659
365k
    pred[V_PRED][tx_size](dst, stride, above, left);
660
365k
  } else if (angle == 180) {
661
328k
    pred[H_PRED][tx_size](dst, stride, above, left);
662
328k
  }
663
1.68M
}
664
665
#if CONFIG_AV1_HIGHBITDEPTH
666
// Directional prediction, zone 1: 0 < angle < 90
667
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
668
                                   int bh, const uint16_t *above,
669
                                   const uint16_t *left, int upsample_above,
670
238k
                                   int dx, int dy, int bd) {
671
238k
  int r, c, x, base, shift, val;
672
673
238k
  (void)left;
674
238k
  (void)dy;
675
238k
  (void)bd;
676
238k
  assert(dy == 1);
677
238k
  assert(dx > 0);
678
679
238k
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
680
238k
  const int frac_bits = 6 - upsample_above;
681
238k
  const int base_inc = 1 << upsample_above;
682
238k
  x = dx;
683
3.14M
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
684
2.90M
    base = x >> frac_bits;
685
2.90M
    shift = ((x << upsample_above) & 0x3F) >> 1;
686
687
2.90M
    if (base >= max_base_x) {
688
7.17k
      for (int i = r; i < bh; ++i) {
689
5.00k
        aom_memset16(dst, above[max_base_x], bw);
690
5.00k
        dst += stride;
691
5.00k
      }
692
2.17k
      return;
693
2.17k
    }
694
695
63.5M
    for (c = 0; c < bw; ++c, base += base_inc) {
696
60.6M
      if (base < max_base_x) {
697
60.2M
        val = above[base] * (32 - shift) + above[base + 1] * shift;
698
60.2M
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
699
60.2M
      } else {
700
448k
        dst[c] = above[max_base_x];
701
448k
      }
702
60.6M
    }
703
2.90M
  }
704
238k
}
705
706
// Directional prediction, zone 2: 90 < angle < 180
707
void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
708
                                   int bh, const uint16_t *above,
709
                                   const uint16_t *left, int upsample_above,
710
442k
                                   int upsample_left, int dx, int dy, int bd) {
711
442k
  (void)bd;
712
442k
  assert(dx > 0);
713
442k
  assert(dy > 0);
714
715
442k
  const int min_base_x = -(1 << upsample_above);
716
442k
  const int min_base_y = -(1 << upsample_left);
717
442k
  (void)min_base_y;
718
442k
  const int frac_bits_x = 6 - upsample_above;
719
442k
  const int frac_bits_y = 6 - upsample_left;
720
721
6.27M
  for (int r = 0; r < bh; ++r) {
722
134M
    for (int c = 0; c < bw; ++c) {
723
128M
      int val;
724
128M
      int y = r + 1;
725
128M
      int x = (c << 6) - y * dx;
726
128M
      const int base_x = x >> frac_bits_x;
727
128M
      if (base_x >= min_base_x) {
728
57.6M
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
729
57.6M
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
730
57.6M
        val = ROUND_POWER_OF_TWO(val, 5);
731
70.9M
      } else {
732
70.9M
        x = c + 1;
733
70.9M
        y = (r << 6) - x * dy;
734
70.9M
        const int base_y = y >> frac_bits_y;
735
70.9M
        assert(base_y >= min_base_y);
736
70.9M
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
737
70.9M
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
738
70.9M
        val = ROUND_POWER_OF_TWO(val, 5);
739
70.9M
      }
740
128M
      dst[c] = val;
741
128M
    }
742
5.83M
    dst += stride;
743
5.83M
  }
744
442k
}
745
746
// Directional prediction, zone 3: 180 < angle < 270
747
void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
748
                                   int bh, const uint16_t *above,
749
                                   const uint16_t *left, int upsample_left,
750
259k
                                   int dx, int dy, int bd) {
751
259k
  int r, c, y, base, shift, val;
752
753
259k
  (void)above;
754
259k
  (void)dx;
755
259k
  (void)bd;
756
259k
  assert(dx == 1);
757
259k
  assert(dy > 0);
758
759
259k
  const int max_base_y = (bw + bh - 1) << upsample_left;
760
259k
  const int frac_bits = 6 - upsample_left;
761
259k
  const int base_inc = 1 << upsample_left;
762
259k
  y = dy;
763
3.49M
  for (c = 0; c < bw; ++c, y += dy) {
764
3.23M
    base = y >> frac_bits;
765
3.23M
    shift = ((y << upsample_left) & 0x3F) >> 1;
766
767
70.3M
    for (r = 0; r < bh; ++r, base += base_inc) {
768
67.0M
      if (base < max_base_y) {
769
67.0M
        val = left[base] * (32 - shift) + left[base + 1] * shift;
770
67.0M
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
771
18.4E
      } else {
772
18.4E
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
773
18.4E
        break;
774
18.4E
      }
775
67.0M
    }
776
3.23M
  }
777
259k
}
778
779
static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
780
                                TX_SIZE tx_size, const uint16_t *above,
781
                                const uint16_t *left, int upsample_above,
782
1.24M
                                int upsample_left, int angle, int bd) {
783
1.24M
  const int dx = av1_get_dx(angle);
784
1.24M
  const int dy = av1_get_dy(angle);
785
1.24M
  const int bw = tx_size_wide[tx_size];
786
1.24M
  const int bh = tx_size_high[tx_size];
787
1.24M
  assert(angle > 0 && angle < 270);
788
789
1.24M
  if (angle > 0 && angle < 90) {
790
238k
    av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
791
238k
                                upsample_above, dx, dy, bd);
792
1.01M
  } else if (angle > 90 && angle < 180) {
793
442k
    av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
794
442k
                                upsample_above, upsample_left, dx, dy, bd);
795
568k
  } else if (angle > 180 && angle < 270) {
796
259k
    av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
797
259k
                                dx, dy, bd);
798
309k
  } else if (angle == 90) {
799
125k
    pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
800
184k
  } else if (angle == 180) {
801
184k
    pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
802
184k
  }
803
1.24M
}
804
#endif  // CONFIG_AV1_HIGHBITDEPTH
805
806
DECLARE_ALIGNED(16, const int8_t,
807
                av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
808
  {
809
      { -6, 10, 0, 0, 0, 12, 0, 0 },
810
      { -5, 2, 10, 0, 0, 9, 0, 0 },
811
      { -3, 1, 1, 10, 0, 7, 0, 0 },
812
      { -3, 1, 1, 2, 10, 5, 0, 0 },
813
      { -4, 6, 0, 0, 0, 2, 12, 0 },
814
      { -3, 2, 6, 0, 0, 2, 9, 0 },
815
      { -3, 2, 2, 6, 0, 2, 7, 0 },
816
      { -3, 1, 2, 2, 6, 3, 5, 0 },
817
  },
818
  {
819
      { -10, 16, 0, 0, 0, 10, 0, 0 },
820
      { -6, 0, 16, 0, 0, 6, 0, 0 },
821
      { -4, 0, 0, 16, 0, 4, 0, 0 },
822
      { -2, 0, 0, 0, 16, 2, 0, 0 },
823
      { -10, 16, 0, 0, 0, 0, 10, 0 },
824
      { -6, 0, 16, 0, 0, 0, 6, 0 },
825
      { -4, 0, 0, 16, 0, 0, 4, 0 },
826
      { -2, 0, 0, 0, 16, 0, 2, 0 },
827
  },
828
  {
829
      { -8, 8, 0, 0, 0, 16, 0, 0 },
830
      { -8, 0, 8, 0, 0, 16, 0, 0 },
831
      { -8, 0, 0, 8, 0, 16, 0, 0 },
832
      { -8, 0, 0, 0, 8, 16, 0, 0 },
833
      { -4, 4, 0, 0, 0, 0, 16, 0 },
834
      { -4, 0, 4, 0, 0, 0, 16, 0 },
835
      { -4, 0, 0, 4, 0, 0, 16, 0 },
836
      { -4, 0, 0, 0, 4, 0, 16, 0 },
837
  },
838
  {
839
      { -2, 8, 0, 0, 0, 10, 0, 0 },
840
      { -1, 3, 8, 0, 0, 6, 0, 0 },
841
      { -1, 2, 3, 8, 0, 4, 0, 0 },
842
      { 0, 1, 2, 3, 8, 2, 0, 0 },
843
      { -1, 4, 0, 0, 0, 3, 10, 0 },
844
      { -1, 3, 4, 0, 0, 4, 6, 0 },
845
      { -1, 2, 3, 4, 0, 4, 4, 0 },
846
      { -1, 2, 2, 3, 4, 3, 3, 0 },
847
  },
848
  {
849
      { -12, 14, 0, 0, 0, 14, 0, 0 },
850
      { -10, 0, 14, 0, 0, 12, 0, 0 },
851
      { -9, 0, 0, 14, 0, 11, 0, 0 },
852
      { -8, 0, 0, 0, 14, 10, 0, 0 },
853
      { -10, 12, 0, 0, 0, 0, 14, 0 },
854
      { -9, 1, 12, 0, 0, 0, 12, 0 },
855
      { -8, 0, 0, 12, 0, 1, 11, 0 },
856
      { -7, 0, 0, 1, 12, 1, 9, 0 },
857
  },
858
};
859
860
void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
861
                                  TX_SIZE tx_size, const uint8_t *above,
862
339k
                                  const uint8_t *left, int mode) {
863
339k
  int r, c;
864
339k
  uint8_t buffer[33][33];
865
339k
  const int bw = tx_size_wide[tx_size];
866
339k
  const int bh = tx_size_high[tx_size];
867
868
339k
  assert(bw <= 32 && bh <= 32);
869
870
3.09M
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
871
339k
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
872
873
1.71M
  for (r = 1; r < bh + 1; r += 2)
874
5.06M
    for (c = 1; c < bw + 1; c += 4) {
875
3.68M
      const uint8_t p0 = buffer[r - 1][c - 1];
876
3.68M
      const uint8_t p1 = buffer[r - 1][c];
877
3.68M
      const uint8_t p2 = buffer[r - 1][c + 1];
878
3.68M
      const uint8_t p3 = buffer[r - 1][c + 2];
879
3.68M
      const uint8_t p4 = buffer[r - 1][c + 3];
880
3.68M
      const uint8_t p5 = buffer[r][c - 1];
881
3.68M
      const uint8_t p6 = buffer[r + 1][c - 1];
882
33.1M
      for (int k = 0; k < 8; ++k) {
883
29.4M
        int r_offset = k >> 2;
884
29.4M
        int c_offset = k & 0x03;
885
29.4M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
886
29.4M
                 av1_filter_intra_taps[mode][k][1] * p1 +
887
29.4M
                 av1_filter_intra_taps[mode][k][2] * p2 +
888
29.4M
                 av1_filter_intra_taps[mode][k][3] * p3 +
889
29.4M
                 av1_filter_intra_taps[mode][k][4] * p4 +
890
29.4M
                 av1_filter_intra_taps[mode][k][5] * p5 +
891
29.4M
                 av1_filter_intra_taps[mode][k][6] * p6;
892
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
893
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
894
        // Since Clip1() clips a negative value to 0, it is safe to replace
895
        // Round2Signed() with Round2().
896
29.4M
        buffer[r + r_offset][c + c_offset] =
897
29.4M
            clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
898
29.4M
      }
899
3.68M
    }
900
901
3.09M
  for (r = 0; r < bh; ++r) {
902
2.75M
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
903
2.75M
    dst += stride;
904
2.75M
  }
905
339k
}
906
907
#if CONFIG_AV1_HIGHBITDEPTH
908
static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
909
                                          TX_SIZE tx_size,
910
                                          const uint16_t *above,
911
                                          const uint16_t *left, int mode,
912
101k
                                          int bd) {
913
101k
  int r, c;
914
101k
  uint16_t buffer[33][33];
915
101k
  const int bw = tx_size_wide[tx_size];
916
101k
  const int bh = tx_size_high[tx_size];
917
918
101k
  assert(bw <= 32 && bh <= 32);
919
920
950k
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
921
101k
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
922
923
525k
  for (r = 1; r < bh + 1; r += 2)
924
1.66M
    for (c = 1; c < bw + 1; c += 4) {
925
1.24M
      const uint16_t p0 = buffer[r - 1][c - 1];
926
1.24M
      const uint16_t p1 = buffer[r - 1][c];
927
1.24M
      const uint16_t p2 = buffer[r - 1][c + 1];
928
1.24M
      const uint16_t p3 = buffer[r - 1][c + 2];
929
1.24M
      const uint16_t p4 = buffer[r - 1][c + 3];
930
1.24M
      const uint16_t p5 = buffer[r][c - 1];
931
1.24M
      const uint16_t p6 = buffer[r + 1][c - 1];
932
11.1M
      for (int k = 0; k < 8; ++k) {
933
9.95M
        int r_offset = k >> 2;
934
9.95M
        int c_offset = k & 0x03;
935
9.95M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
936
9.95M
                 av1_filter_intra_taps[mode][k][1] * p1 +
937
9.95M
                 av1_filter_intra_taps[mode][k][2] * p2 +
938
9.95M
                 av1_filter_intra_taps[mode][k][3] * p3 +
939
9.95M
                 av1_filter_intra_taps[mode][k][4] * p4 +
940
9.95M
                 av1_filter_intra_taps[mode][k][5] * p5 +
941
9.95M
                 av1_filter_intra_taps[mode][k][6] * p6;
942
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
943
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
944
        // Since Clip1() clips a negative value to 0, it is safe to replace
945
        // Round2Signed() with Round2().
946
9.95M
        buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
947
9.95M
            ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
948
9.95M
      }
949
1.24M
    }
950
951
950k
  for (r = 0; r < bh; ++r) {
952
849k
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
953
849k
    dst += stride;
954
849k
  }
955
101k
}
956
#endif  // CONFIG_AV1_HIGHBITDEPTH
957
958
5.95M
static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
959
5.95M
  if (plane == 0) {
960
2.91M
    const PREDICTION_MODE mode = mbmi->mode;
961
2.91M
    return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
962
2.61M
            mode == SMOOTH_H_PRED);
963
3.03M
  } else {
964
    // uv_mode is not set for inter blocks, so need to explicitly
965
    // detect that case.
966
3.03M
    if (is_inter_block(mbmi)) return 0;
967
968
3.00M
    const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
969
3.00M
    return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
970
2.71M
            uv_mode == UV_SMOOTH_H_PRED);
971
3.03M
  }
972
5.95M
}
973
974
3.44M
static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
975
3.44M
  const MB_MODE_INFO *above;
976
3.44M
  const MB_MODE_INFO *left;
977
978
3.44M
  if (plane == 0) {
979
1.68M
    above = xd->above_mbmi;
980
1.68M
    left = xd->left_mbmi;
981
1.76M
  } else {
982
1.76M
    above = xd->chroma_above_mbmi;
983
1.76M
    left = xd->chroma_left_mbmi;
984
1.76M
  }
985
986
3.44M
  return (above && is_smooth(above, plane)) || (left && is_smooth(left, plane));
987
3.44M
}
988
989
1.51M
static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
990
1.51M
  const int d = abs(delta);
991
1.51M
  int strength = 0;
992
993
1.51M
  const int blk_wh = bs0 + bs1;
994
1.51M
  if (type == 0) {
995
1.11M
    if (blk_wh <= 8) {
996
419k
      if (d >= 56) strength = 1;
997
691k
    } else if (blk_wh <= 12) {
998
83.6k
      if (d >= 40) strength = 1;
999
608k
    } else if (blk_wh <= 16) {
1000
149k
      if (d >= 40) strength = 1;
1001
458k
    } else if (blk_wh <= 24) {
1002
190k
      if (d >= 8) strength = 1;
1003
190k
      if (d >= 16) strength = 2;
1004
190k
      if (d >= 32) strength = 3;
1005
267k
    } else if (blk_wh <= 32) {
1006
86.0k
      if (d >= 1) strength = 1;
1007
86.0k
      if (d >= 4) strength = 2;
1008
86.0k
      if (d >= 32) strength = 3;
1009
181k
    } else {
1010
181k
      if (d >= 1) strength = 3;
1011
181k
    }
1012
1.11M
  } else {
1013
398k
    if (blk_wh <= 8) {
1014
151k
      if (d >= 40) strength = 1;
1015
151k
      if (d >= 64) strength = 2;
1016
247k
    } else if (blk_wh <= 16) {
1017
87.1k
      if (d >= 20) strength = 1;
1018
87.1k
      if (d >= 48) strength = 2;
1019
160k
    } else if (blk_wh <= 24) {
1020
76.7k
      if (d >= 4) strength = 3;
1021
83.2k
    } else {
1022
83.3k
      if (d >= 1) strength = 3;
1023
83.2k
    }
1024
398k
  }
1025
1.51M
  return strength;
1026
1.51M
}
1027
1028
1.03M
void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1029
1.03M
  if (!strength) return;
1030
1031
597k
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1032
597k
                                                         { 0, 5, 6, 5, 0 },
1033
597k
                                                         { 2, 4, 4, 4, 2 } };
1034
597k
  const int filt = strength - 1;
1035
597k
  uint8_t edge[129];
1036
1037
597k
  memcpy(edge, p, sz * sizeof(*p));
1038
11.6M
  for (int i = 1; i < sz; i++) {
1039
11.0M
    int s = 0;
1040
66.2M
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1041
55.2M
      int k = i - 2 + j;
1042
55.2M
      k = (k < 0) ? 0 : k;
1043
55.2M
      k = (k > sz - 1) ? sz - 1 : k;
1044
55.2M
      s += edge[k] * kernel[filt][j];
1045
55.2M
    }
1046
11.0M
    s = (s + 8) >> 4;
1047
11.0M
    p[i] = s;
1048
11.0M
  }
1049
597k
}
1050
1051
101k
static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1052
101k
  const int kernel[3] = { 5, 6, 5 };
1053
1054
101k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1055
101k
          (p_above[0] * kernel[2]);
1056
101k
  s = (s + 8) >> 4;
1057
101k
  p_above[-1] = s;
1058
101k
  p_left[-1] = s;
1059
101k
}
1060
1061
313k
void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1062
  // interpolate half-sample positions
1063
313k
  assert(sz <= MAX_UPSAMPLE_SZ);
1064
1065
313k
  uint8_t in[MAX_UPSAMPLE_SZ + 3];
1066
  // copy p[-1..(sz-1)] and extend first and last samples
1067
313k
  in[0] = p[-1];
1068
313k
  in[1] = p[-1];
1069
2.59M
  for (int i = 0; i < sz; i++) {
1070
2.28M
    in[i + 2] = p[i];
1071
2.28M
  }
1072
313k
  in[sz + 2] = p[sz - 1];
1073
1074
  // interpolate half-sample edge positions
1075
313k
  p[-2] = in[0];
1076
2.59M
  for (int i = 0; i < sz; i++) {
1077
2.28M
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1078
2.28M
    s = clip_pixel((s + 8) >> 4);
1079
2.28M
    p[2 * i - 1] = s;
1080
2.28M
    p[2 * i] = in[i + 2];
1081
2.28M
  }
1082
313k
}
1083
1084
static void build_directional_and_filter_intra_predictors(
1085
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1086
    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1087
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1088
2.06M
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1089
2.06M
  int i;
1090
2.06M
  const uint8_t *above_ref = ref - ref_stride;
1091
2.06M
  const uint8_t *left_ref = ref - 1;
1092
2.06M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1093
2.06M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1094
2.06M
  uint8_t *const above_row = above_data + 16;
1095
2.06M
  uint8_t *const left_col = left_data + 16;
1096
2.06M
  const int txwpx = tx_size_wide[tx_size];
1097
2.06M
  const int txhpx = tx_size_high[tx_size];
1098
2.06M
  int need_left = extend_modes[mode] & NEED_LEFT;
1099
2.06M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1100
2.06M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1101
2.06M
  const int is_dr_mode = av1_is_directional_mode(mode);
1102
2.06M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1103
2.06M
  assert(use_filter_intra || is_dr_mode);
1104
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1105
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1106
  // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1107
  // be the potential reason for this issue.
1108
2.06M
  memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1109
2.06M
  memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1110
1111
  // The default values if ref pixels are not available:
1112
  // 128 127 127 .. 127 127 127 127 127 127
1113
  // 129  A   B  ..  Y   Z
1114
  // 129  C   D  ..  W   X
1115
  // 129  E   F  ..  U   V
1116
  // 129  G   H  ..  S   T   T   T   T   T
1117
  // ..
1118
1119
2.06M
  if (is_dr_mode) {
1120
1.72M
    if (p_angle <= 90)
1121
643k
      need_above = 1, need_left = 0, need_above_left = 1;
1122
1.07M
    else if (p_angle < 180)
1123
476k
      need_above = 1, need_left = 1, need_above_left = 1;
1124
601k
    else
1125
601k
      need_above = 0, need_left = 1, need_above_left = 1;
1126
1.72M
  }
1127
2.06M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1128
1129
2.06M
  assert(n_top_px >= 0);
1130
2.06M
  assert(n_topright_px >= -1);
1131
2.06M
  assert(n_left_px >= 0);
1132
2.06M
  assert(n_bottomleft_px >= -1);
1133
1134
2.06M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1135
38.9k
    int val;
1136
38.9k
    if (need_left) {
1137
24.3k
      val = (n_top_px > 0) ? above_ref[0] : 129;
1138
24.3k
    } else {
1139
14.5k
      val = (n_left_px > 0) ? left_ref[0] : 127;
1140
14.5k
    }
1141
764k
    for (i = 0; i < txhpx; ++i) {
1142
725k
      memset(dst, val, txwpx);
1143
725k
      dst += dst_stride;
1144
725k
    }
1145
38.9k
    return;
1146
38.9k
  }
1147
1148
  // NEED_LEFT
1149
2.02M
  if (need_left) {
1150
1.39M
    const int num_left_pixels_needed =
1151
1.39M
        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1152
1.39M
    i = 0;
1153
1.39M
    if (n_left_px > 0) {
1154
15.0M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1155
1.37M
      if (n_bottomleft_px > 0) {
1156
78.5k
        assert(i == txhpx);
1157
941k
        for (; i < txhpx + n_bottomleft_px; i++)
1158
863k
          left_col[i] = left_ref[i * ref_stride];
1159
78.5k
      }
1160
1.37M
      if (i < num_left_pixels_needed)
1161
202k
        memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1162
1.37M
    } else if (n_top_px > 0) {
1163
21.0k
      memset(left_col, above_ref[0], num_left_pixels_needed);
1164
21.0k
    }
1165
1.39M
  }
1166
1167
  // NEED_ABOVE
1168
2.02M
  if (need_above) {
1169
1.44M
    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1170
1.44M
    if (n_top_px > 0) {
1171
1.42M
      memcpy(above_row, above_ref, n_top_px);
1172
1.42M
      i = n_top_px;
1173
1.42M
      if (n_topright_px > 0) {
1174
172k
        assert(n_top_px == txwpx);
1175
172k
        memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1176
172k
        i += n_topright_px;
1177
172k
      }
1178
1.42M
      if (i < num_top_pixels_needed)
1179
127k
        memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1180
1.42M
    } else if (n_left_px > 0) {
1181
15.5k
      memset(above_row, left_ref[0], num_top_pixels_needed);
1182
15.5k
    }
1183
1.44M
  }
1184
1185
2.02M
  if (need_above_left) {
1186
2.02M
    if (n_top_px > 0 && n_left_px > 0) {
1187
1.95M
      above_row[-1] = above_ref[-1];
1188
1.95M
    } else if (n_top_px > 0) {
1189
38.1k
      above_row[-1] = above_ref[0];
1190
38.1k
    } else if (n_left_px > 0) {
1191
26.3k
      above_row[-1] = left_ref[0];
1192
26.3k
    } else {
1193
1.03k
      above_row[-1] = 128;
1194
1.03k
    }
1195
2.02M
    left_col[-1] = above_row[-1];
1196
2.02M
  }
1197
1198
2.02M
  if (use_filter_intra) {
1199
339k
    av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1200
339k
                               filter_intra_mode);
1201
339k
    return;
1202
339k
  }
1203
1204
2.02M
  assert(is_dr_mode);
1205
1.68M
  int upsample_above = 0;
1206
1.68M
  int upsample_left = 0;
1207
1.68M
  if (!disable_edge_filter) {
1208
1.30M
    const int need_right = p_angle < 90;
1209
1.30M
    const int need_bottom = p_angle > 180;
1210
1.30M
    if (p_angle != 90 && p_angle != 180) {
1211
710k
      assert(need_above_left);
1212
710k
      const int ab_le = 1;
1213
710k
      if (need_above && need_left && (txwpx + txhpx >= 24)) {
1214
101k
        filter_intra_edge_corner(above_row, left_col);
1215
101k
      }
1216
710k
      if (need_above && n_top_px > 0) {
1217
521k
        const int strength = intra_edge_filter_strength(
1218
521k
            txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1219
521k
        const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1220
521k
        av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1221
521k
      }
1222
710k
      if (need_left && n_left_px > 0) {
1223
512k
        const int strength = intra_edge_filter_strength(
1224
512k
            txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1225
512k
        const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1226
512k
        av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1227
512k
      }
1228
710k
    }
1229
1.30M
    upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1230
1.30M
                                                 intra_edge_filter_type);
1231
1.30M
    if (need_above && upsample_above) {
1232
121k
      const int n_px = txwpx + (need_right ? txhpx : 0);
1233
121k
      av1_upsample_intra_edge(above_row, n_px);
1234
121k
    }
1235
1.30M
    upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1236
1.30M
                                                intra_edge_filter_type);
1237
1.30M
    if (need_left && upsample_left) {
1238
191k
      const int n_px = txhpx + (need_bottom ? txwpx : 0);
1239
191k
      av1_upsample_intra_edge(left_col, n_px);
1240
191k
    }
1241
1.30M
  }
1242
1.68M
  dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1243
1.68M
               upsample_left, p_angle);
1244
1.68M
}
1245
1246
// This function generates the pred data of a given block for non-directional
1247
// intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H, SMOOTH_V and PAETH).
1248
static void build_non_directional_intra_predictors(
1249
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1250
9.22M
    PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px) {
1251
9.22M
  const uint8_t *above_ref = ref - ref_stride;
1252
9.22M
  const uint8_t *left_ref = ref - 1;
1253
9.22M
  const int txwpx = tx_size_wide[tx_size];
1254
9.22M
  const int txhpx = tx_size_high[tx_size];
1255
9.22M
  const int need_left = extend_modes[mode] & NEED_LEFT;
1256
9.22M
  const int need_above = extend_modes[mode] & NEED_ABOVE;
1257
9.22M
  const int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1258
9.22M
  int i = 0;
1259
9.22M
  assert(n_top_px >= 0);
1260
9.22M
  assert(n_left_px >= 0);
1261
9.22M
  assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1262
9.22M
         mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1263
1264
9.22M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1265
0
    int val = 0;
1266
0
    if (need_left) {
1267
0
      val = (n_top_px > 0) ? above_ref[0] : 129;
1268
0
    } else {
1269
0
      val = (n_left_px > 0) ? left_ref[0] : 127;
1270
0
    }
1271
0
    for (i = 0; i < txhpx; ++i) {
1272
0
      memset(dst, val, txwpx);
1273
0
      dst += dst_stride;
1274
0
    }
1275
0
    return;
1276
0
  }
1277
1278
9.22M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1279
9.22M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1280
9.22M
  uint8_t *const above_row = above_data + 16;
1281
9.22M
  uint8_t *const left_col = left_data + 16;
1282
1283
9.22M
  if (need_left) {
1284
9.22M
    memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1285
9.22M
    if (n_left_px > 0) {
1286
82.2M
      for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1287
8.73M
      if (i < txhpx) memset(&left_col[i], left_col[i - 1], txhpx - i);
1288
8.73M
    } else if (n_top_px > 0) {
1289
466k
      memset(left_col, above_ref[0], txhpx);
1290
466k
    }
1291
9.22M
  }
1292
1293
9.22M
  if (need_above) {
1294
9.22M
    memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1295
9.22M
    if (n_top_px > 0) {
1296
8.99M
      memcpy(above_row, above_ref, n_top_px);
1297
8.99M
      i = n_top_px;
1298
8.99M
      if (i < txwpx) memset(&above_row[i], above_row[i - 1], txwpx - i);
1299
8.99M
    } else if (n_left_px > 0) {
1300
204k
      memset(above_row, left_ref[0], txwpx);
1301
204k
    }
1302
9.22M
  }
1303
1304
9.22M
  if (need_above_left) {
1305
3.82M
    if (n_top_px > 0 && n_left_px > 0) {
1306
3.64M
      above_row[-1] = above_ref[-1];
1307
3.64M
    } else if (n_top_px > 0) {
1308
138k
      above_row[-1] = above_ref[0];
1309
138k
    } else if (n_left_px > 0) {
1310
43.1k
      above_row[-1] = left_ref[0];
1311
43.1k
    } else {
1312
1.24k
      above_row[-1] = 128;
1313
1.24k
    }
1314
3.82M
    left_col[-1] = above_row[-1];
1315
3.82M
  }
1316
1317
9.22M
  if (mode == DC_PRED) {
1318
4.44M
    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1319
4.44M
                                                  left_col);
1320
4.77M
  } else {
1321
4.77M
    pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1322
4.77M
  }
1323
9.22M
}
1324
1325
#if CONFIG_AV1_HIGHBITDEPTH
1326
476k
void av1_highbd_filter_intra_edge_c(uint16_t *p, int sz, int strength) {
1327
476k
  if (!strength) return;
1328
1329
292k
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1330
292k
                                                         { 0, 5, 6, 5, 0 },
1331
292k
                                                         { 2, 4, 4, 4, 2 } };
1332
292k
  const int filt = strength - 1;
1333
292k
  uint16_t edge[129];
1334
1335
292k
  memcpy(edge, p, sz * sizeof(*p));
1336
7.13M
  for (int i = 1; i < sz; i++) {
1337
6.84M
    int s = 0;
1338
41.0M
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1339
34.2M
      int k = i - 2 + j;
1340
34.2M
      k = (k < 0) ? 0 : k;
1341
34.2M
      k = (k > sz - 1) ? sz - 1 : k;
1342
34.2M
      s += edge[k] * kernel[filt][j];
1343
34.2M
    }
1344
6.84M
    s = (s + 8) >> 4;
1345
6.84M
    p[i] = s;
1346
6.84M
  }
1347
292k
}
1348
1349
static void highbd_filter_intra_edge_corner(uint16_t *p_above,
1350
66.7k
                                            uint16_t *p_left) {
1351
66.7k
  const int kernel[3] = { 5, 6, 5 };
1352
1353
66.7k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1354
66.7k
          (p_above[0] * kernel[2]);
1355
66.7k
  s = (s + 8) >> 4;
1356
66.7k
  p_above[-1] = s;
1357
66.7k
  p_left[-1] = s;
1358
66.7k
}
1359
1360
151k
void av1_highbd_upsample_intra_edge_c(uint16_t *p, int sz, int bd) {
1361
  // interpolate half-sample positions
1362
151k
  assert(sz <= MAX_UPSAMPLE_SZ);
1363
1364
151k
  uint16_t in[MAX_UPSAMPLE_SZ + 3];
1365
  // copy p[-1..(sz-1)] and extend first and last samples
1366
151k
  in[0] = p[-1];
1367
151k
  in[1] = p[-1];
1368
1.36M
  for (int i = 0; i < sz; i++) {
1369
1.21M
    in[i + 2] = p[i];
1370
1.21M
  }
1371
151k
  in[sz + 2] = p[sz - 1];
1372
1373
  // interpolate half-sample edge positions
1374
151k
  p[-2] = in[0];
1375
1.36M
  for (int i = 0; i < sz; i++) {
1376
1.21M
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1377
1.21M
    s = (s + 8) >> 4;
1378
1.21M
    s = clip_pixel_highbd(s, bd);
1379
1.21M
    p[2 * i - 1] = s;
1380
1.21M
    p[2 * i] = in[i + 2];
1381
1.21M
  }
1382
151k
}
1383
1384
static void highbd_build_directional_and_filter_intra_predictors(
1385
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1386
    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1387
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1388
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1389
1.38M
    int bit_depth) {
1390
1.38M
  int i;
1391
1.38M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1392
1.38M
  const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1393
1.38M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1394
1.38M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1395
1.38M
  uint16_t *const above_row = above_data + 16;
1396
1.38M
  uint16_t *const left_col = left_data + 16;
1397
1.38M
  const int txwpx = tx_size_wide[tx_size];
1398
1.38M
  const int txhpx = tx_size_high[tx_size];
1399
1.38M
  int need_left = extend_modes[mode] & NEED_LEFT;
1400
1.38M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1401
1.38M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1402
1.38M
  const uint16_t *above_ref = ref - ref_stride;
1403
1.38M
  const uint16_t *left_ref = ref - 1;
1404
1.38M
  const int is_dr_mode = av1_is_directional_mode(mode);
1405
1.38M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1406
1.38M
  assert(use_filter_intra || is_dr_mode);
1407
1.38M
  const int base = 128 << (bit_depth - 8);
1408
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1409
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1410
  // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1411
  // seen to be the potential reason for this issue.
1412
1.38M
  aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1413
1.38M
  aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1414
1415
  // The default values if ref pixels are not available:
1416
  // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1417
  // base+1   A      B  ..     Y      Z
1418
  // base+1   C      D  ..     W      X
1419
  // base+1   E      F  ..     U      V
1420
  // base+1   G      H  ..     S      T      T      T      T      T
1421
1422
1.38M
  if (is_dr_mode) {
1423
1.28M
    if (p_angle <= 90)
1424
379k
      need_above = 1, need_left = 0, need_above_left = 1;
1425
906k
    else if (p_angle < 180)
1426
442k
      need_above = 1, need_left = 1, need_above_left = 1;
1427
463k
    else
1428
463k
      need_above = 0, need_left = 1, need_above_left = 1;
1429
1.28M
  }
1430
1.38M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1431
1432
1.38M
  assert(n_top_px >= 0);
1433
1.38M
  assert(n_topright_px >= -1);
1434
1.38M
  assert(n_left_px >= 0);
1435
1.38M
  assert(n_bottomleft_px >= -1);
1436
1437
1.38M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1438
36.1k
    int val;
1439
36.1k
    if (need_left) {
1440
20.2k
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1441
20.2k
    } else {
1442
15.9k
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1443
15.9k
    }
1444
1.06M
    for (i = 0; i < txhpx; ++i) {
1445
1.02M
      aom_memset16(dst, val, txwpx);
1446
1.02M
      dst += dst_stride;
1447
1.02M
    }
1448
36.1k
    return;
1449
36.1k
  }
1450
1451
  // NEED_LEFT
1452
1.35M
  if (need_left) {
1453
986k
    const int num_left_pixels_needed =
1454
986k
        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1455
986k
    i = 0;
1456
986k
    if (n_left_px > 0) {
1457
12.1M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1458
965k
      if (n_bottomleft_px > 0) {
1459
85.9k
        assert(i == txhpx);
1460
970k
        for (; i < txhpx + n_bottomleft_px; i++)
1461
884k
          left_col[i] = left_ref[i * ref_stride];
1462
85.9k
      }
1463
965k
      if (i < num_left_pixels_needed)
1464
213k
        aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1465
965k
    } else if (n_top_px > 0) {
1466
18.6k
      aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1467
18.6k
    }
1468
986k
  }
1469
1470
  // NEED_ABOVE
1471
1.35M
  if (need_above) {
1472
907k
    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1473
907k
    if (n_top_px > 0) {
1474
882k
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1475
882k
      i = n_top_px;
1476
882k
      if (n_topright_px > 0) {
1477
145k
        assert(n_top_px == txwpx);
1478
145k
        memcpy(above_row + txwpx, above_ref + txwpx,
1479
145k
               n_topright_px * sizeof(above_ref[0]));
1480
145k
        i += n_topright_px;
1481
145k
      }
1482
882k
      if (i < num_top_pixels_needed)
1483
135k
        aom_memset16(&above_row[i], above_row[i - 1],
1484
135k
                     num_top_pixels_needed - i);
1485
882k
    } else if (n_left_px > 0) {
1486
22.6k
      aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1487
22.6k
    }
1488
907k
  }
1489
1490
1.35M
  if (need_above_left) {
1491
1.35M
    if (n_top_px > 0 && n_left_px > 0) {
1492
1.27M
      above_row[-1] = above_ref[-1];
1493
1.27M
    } else if (n_top_px > 0) {
1494
32.4k
      above_row[-1] = above_ref[0];
1495
41.8k
    } else if (n_left_px > 0) {
1496
39.3k
      above_row[-1] = left_ref[0];
1497
39.3k
    } else {
1498
2.45k
      above_row[-1] = base;
1499
2.45k
    }
1500
1.35M
    left_col[-1] = above_row[-1];
1501
1.35M
  }
1502
1503
1.35M
  if (use_filter_intra) {
1504
101k
    highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1505
101k
                                  filter_intra_mode, bit_depth);
1506
101k
    return;
1507
101k
  }
1508
1509
1.35M
  assert(is_dr_mode);
1510
1.24M
  int upsample_above = 0;
1511
1.24M
  int upsample_left = 0;
1512
1.24M
  if (!disable_edge_filter) {
1513
460k
    const int need_right = p_angle < 90;
1514
460k
    const int need_bottom = p_angle > 180;
1515
460k
    if (p_angle != 90 && p_angle != 180) {
1516
343k
      assert(need_above_left);
1517
343k
      const int ab_le = 1;
1518
343k
      if (need_above && need_left && (txwpx + txhpx >= 24)) {
1519
66.7k
        highbd_filter_intra_edge_corner(above_row, left_col);
1520
66.7k
      }
1521
343k
      if (need_above && n_top_px > 0) {
1522
217k
        const int strength = intra_edge_filter_strength(
1523
217k
            txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1524
217k
        const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1525
217k
        av1_highbd_filter_intra_edge(above_row - ab_le, n_px, strength);
1526
217k
      }
1527
343k
      if (need_left && n_left_px > 0) {
1528
259k
        const int strength = intra_edge_filter_strength(
1529
259k
            txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1530
259k
        const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1531
259k
        av1_highbd_filter_intra_edge(left_col - ab_le, n_px, strength);
1532
259k
      }
1533
343k
    }
1534
460k
    upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1535
460k
                                                 intra_edge_filter_type);
1536
460k
    if (need_above && upsample_above) {
1537
50.6k
      const int n_px = txwpx + (need_right ? txhpx : 0);
1538
50.6k
      av1_highbd_upsample_intra_edge(above_row, n_px, bit_depth);
1539
50.6k
    }
1540
460k
    upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1541
460k
                                                intra_edge_filter_type);
1542
460k
    if (need_left && upsample_left) {
1543
100k
      const int n_px = txhpx + (need_bottom ? txwpx : 0);
1544
100k
      av1_highbd_upsample_intra_edge(left_col, n_px, bit_depth);
1545
100k
    }
1546
460k
  }
1547
1.24M
  highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1548
1.24M
                      upsample_above, upsample_left, p_angle, bit_depth);
1549
1.24M
}
1550
1551
// For HBD encode/decode, this function generates the pred data of a given
1552
// block for non-directional intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H,
1553
// SMOOTH_V and PAETH).
1554
static void highbd_build_non_directional_intra_predictors(
1555
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1556
    PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px,
1557
5.09M
    int bit_depth) {
1558
5.09M
  int i = 0;
1559
5.09M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1560
5.09M
  const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1561
5.09M
  const int txwpx = tx_size_wide[tx_size];
1562
5.09M
  const int txhpx = tx_size_high[tx_size];
1563
5.09M
  int need_left = extend_modes[mode] & NEED_LEFT;
1564
5.09M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1565
5.09M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1566
5.09M
  const uint16_t *above_ref = ref - ref_stride;
1567
5.09M
  const uint16_t *left_ref = ref - 1;
1568
5.09M
  const int base = 128 << (bit_depth - 8);
1569
1570
5.09M
  assert(n_top_px >= 0);
1571
5.09M
  assert(n_left_px >= 0);
1572
5.09M
  assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1573
5.09M
         mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1574
1575
5.09M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1576
0
    int val = 0;
1577
0
    if (need_left) {
1578
0
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1579
0
    } else {
1580
0
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1581
0
    }
1582
0
    for (i = 0; i < txhpx; ++i) {
1583
0
      aom_memset16(dst, val, txwpx);
1584
0
      dst += dst_stride;
1585
0
    }
1586
0
    return;
1587
0
  }
1588
1589
5.09M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1590
5.09M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1591
5.09M
  uint16_t *const above_row = above_data + 16;
1592
5.09M
  uint16_t *const left_col = left_data + 16;
1593
1594
5.09M
  if (need_left) {
1595
5.09M
    aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1596
5.09M
    if (n_left_px > 0) {
1597
57.1M
      for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1598
4.66M
      if (i < txhpx) aom_memset16(&left_col[i], left_col[i - 1], txhpx - i);
1599
4.66M
    } else if (n_top_px > 0) {
1600
407k
      aom_memset16(left_col, above_ref[0], txhpx);
1601
407k
    }
1602
5.09M
  }
1603
1604
5.09M
  if (need_above) {
1605
5.09M
    aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1606
5.09M
    if (n_top_px > 0) {
1607
4.92M
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1608
4.92M
      i = n_top_px;
1609
4.92M
      if (i < txwpx) aom_memset16(&above_row[i], above_row[i - 1], (txwpx - i));
1610
4.92M
    } else if (n_left_px > 0) {
1611
148k
      aom_memset16(above_row, left_ref[0], txwpx);
1612
148k
    }
1613
5.09M
  }
1614
1615
5.09M
  if (need_above_left) {
1616
725k
    if (n_top_px > 0 && n_left_px > 0) {
1617
609k
      above_row[-1] = above_ref[-1];
1618
609k
    } else if (n_top_px > 0) {
1619
97.5k
      above_row[-1] = above_ref[0];
1620
97.5k
    } else if (n_left_px > 0) {
1621
16.9k
      above_row[-1] = left_ref[0];
1622
16.9k
    } else {
1623
1.48k
      above_row[-1] = base;
1624
1.48k
    }
1625
725k
    left_col[-1] = above_row[-1];
1626
725k
  }
1627
1628
5.09M
  if (mode == DC_PRED) {
1629
3.43M
    dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1630
3.43M
        dst, dst_stride, above_row, left_col, bit_depth);
1631
3.43M
  } else {
1632
1.66M
    pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1633
1.66M
  }
1634
5.09M
}
1635
#endif  // CONFIG_AV1_HIGHBITDEPTH
1636
1637
static inline BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1638
298k
                                            int subsampling_y) {
1639
298k
  assert(subsampling_x >= 0 && subsampling_x < 2);
1640
298k
  assert(subsampling_y >= 0 && subsampling_y < 2);
1641
298k
  BLOCK_SIZE bs = bsize;
1642
298k
  switch (bsize) {
1643
4.02k
    case BLOCK_4X4:
1644
4.02k
      if (subsampling_x == 1 && subsampling_y == 1)
1645
3.92k
        bs = BLOCK_8X8;
1646
96
      else if (subsampling_x == 1)
1647
96
        bs = BLOCK_8X4;
1648
0
      else if (subsampling_y == 1)
1649
0
        bs = BLOCK_4X8;
1650
4.02k
      break;
1651
7.33k
    case BLOCK_4X8:
1652
7.33k
      if (subsampling_x == 1 && subsampling_y == 1)
1653
7.33k
        bs = BLOCK_8X8;
1654
0
      else if (subsampling_x == 1)
1655
0
        bs = BLOCK_8X8;
1656
0
      else if (subsampling_y == 1)
1657
0
        bs = BLOCK_4X8;
1658
7.33k
      break;
1659
11.2k
    case BLOCK_8X4:
1660
11.2k
      if (subsampling_x == 1 && subsampling_y == 1)
1661
11.1k
        bs = BLOCK_8X8;
1662
28
      else if (subsampling_x == 1)
1663
28
        bs = BLOCK_8X4;
1664
0
      else if (subsampling_y == 1)
1665
0
        bs = BLOCK_8X8;
1666
11.2k
      break;
1667
7.22k
    case BLOCK_4X16:
1668
7.22k
      if (subsampling_x == 1 && subsampling_y == 1)
1669
7.22k
        bs = BLOCK_8X16;
1670
0
      else if (subsampling_x == 1)
1671
0
        bs = BLOCK_8X16;
1672
0
      else if (subsampling_y == 1)
1673
0
        bs = BLOCK_4X16;
1674
7.22k
      break;
1675
15.3k
    case BLOCK_16X4:
1676
15.3k
      if (subsampling_x == 1 && subsampling_y == 1)
1677
15.3k
        bs = BLOCK_16X8;
1678
18
      else if (subsampling_x == 1)
1679
18
        bs = BLOCK_16X4;
1680
0
      else if (subsampling_y == 1)
1681
0
        bs = BLOCK_16X8;
1682
15.3k
      break;
1683
253k
    default: break;
1684
298k
  }
1685
298k
  return bs;
1686
298k
}
1687
1688
void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1689
                             int enable_intra_edge_filter, int wpx, int hpx,
1690
                             TX_SIZE tx_size, PREDICTION_MODE mode,
1691
                             int angle_delta, int use_palette,
1692
                             FILTER_INTRA_MODE filter_intra_mode,
1693
                             const uint8_t *ref, int ref_stride, uint8_t *dst,
1694
                             int dst_stride, int col_off, int row_off,
1695
20.0M
                             int plane) {
1696
20.0M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1697
20.0M
  const int txwpx = tx_size_wide[tx_size];
1698
20.0M
  const int txhpx = tx_size_high[tx_size];
1699
20.0M
  const int x = col_off << MI_SIZE_LOG2;
1700
20.0M
  const int y = row_off << MI_SIZE_LOG2;
1701
20.0M
  const int is_hbd = is_cur_buf_hbd(xd);
1702
1703
20.0M
  assert(mode < INTRA_MODES);
1704
1705
20.0M
  if (use_palette) {
1706
2.24M
    int r, c;
1707
2.24M
    const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1708
2.24M
                               xd->color_index_map_offset[plane != 0];
1709
2.24M
    const uint16_t *const palette =
1710
2.24M
        mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1711
2.24M
    if (is_hbd) {
1712
298k
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1713
4.09M
      for (r = 0; r < txhpx; ++r) {
1714
65.4M
        for (c = 0; c < txwpx; ++c) {
1715
61.6M
          dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1716
61.6M
        }
1717
3.79M
      }
1718
1.94M
    } else {
1719
12.0M
      for (r = 0; r < txhpx; ++r) {
1720
89.2M
        for (c = 0; c < txwpx; ++c) {
1721
79.1M
          dst[r * dst_stride + c] =
1722
79.1M
              (uint8_t)palette[map[(r + y) * wpx + c + x]];
1723
79.1M
        }
1724
10.1M
      }
1725
1.94M
    }
1726
2.24M
    return;
1727
2.24M
  }
1728
1729
17.7M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
1730
17.7M
  const int ss_x = pd->subsampling_x;
1731
17.7M
  const int ss_y = pd->subsampling_y;
1732
17.7M
  const int have_top =
1733
17.7M
      row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1734
17.7M
  const int have_left =
1735
17.7M
      col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1736
1737
  // Distance between the right edge of this prediction block to
1738
  // the frame right edge
1739
17.7M
  const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1740
  // Distance between the bottom edge of this prediction block to
1741
  // the frame bottom edge
1742
17.7M
  const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1743
17.7M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1744
17.7M
  const int is_dr_mode = av1_is_directional_mode(mode);
1745
1746
  // The computations in this function, as well as in build_intra_predictors(),
1747
  // are generalized for all intra modes. Some of these operations are not
1748
  // required since non-directional intra modes (i.e., DC, SMOOTH, SMOOTH_H,
1749
  // SMOOTH_V, and PAETH) specifically require left and top neighbors. Hence, a
1750
  // separate function build_non_directional_intra_predictors() is introduced
1751
  // for these modes to avoid redundant computations while generating pred data.
1752
1753
17.7M
  const int n_top_px = have_top ? AOMMIN(txwpx, xr + txwpx) : 0;
1754
17.7M
  const int n_left_px = have_left ? AOMMIN(txhpx, yd + txhpx) : 0;
1755
17.7M
  if (!use_filter_intra && !is_dr_mode) {
1756
14.3M
#if CONFIG_AV1_HIGHBITDEPTH
1757
14.3M
    if (is_hbd) {
1758
5.09M
      highbd_build_non_directional_intra_predictors(
1759
5.09M
          ref, ref_stride, dst, dst_stride, mode, tx_size, n_top_px, n_left_px,
1760
5.09M
          xd->bd);
1761
5.09M
      return;
1762
5.09M
    }
1763
9.22M
#endif  // CONFIG_AV1_HIGHBITDEPTH
1764
9.22M
    build_non_directional_intra_predictors(ref, ref_stride, dst, dst_stride,
1765
9.22M
                                           mode, tx_size, n_top_px, n_left_px);
1766
9.22M
    return;
1767
14.3M
  }
1768
1769
3.44M
  const int txw = tx_size_wide_unit[tx_size];
1770
3.44M
  const int txh = tx_size_high_unit[tx_size];
1771
3.44M
  const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1772
3.44M
  const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1773
3.44M
  const int right_available =
1774
3.44M
      mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1775
3.44M
  const int bottom_available =
1776
3.44M
      (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1777
1778
3.44M
  const PARTITION_TYPE partition = mbmi->partition;
1779
1780
3.44M
  BLOCK_SIZE bsize = mbmi->bsize;
1781
  // force 4x4 chroma component block size.
1782
3.44M
  if (ss_x || ss_y) {
1783
298k
    bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1784
298k
  }
1785
1786
3.44M
  int p_angle = 0;
1787
3.44M
  int need_top_right = extend_modes[mode] & NEED_ABOVERIGHT;
1788
3.44M
  int need_bottom_left = extend_modes[mode] & NEED_BOTTOMLEFT;
1789
1790
3.44M
  if (use_filter_intra) {
1791
440k
    need_top_right = 0;
1792
440k
    need_bottom_left = 0;
1793
440k
  }
1794
3.44M
  if (is_dr_mode) {
1795
3.00M
    p_angle = mode_to_angle_map[mode] + angle_delta;
1796
3.00M
    need_top_right = p_angle < 90;
1797
3.00M
    need_bottom_left = p_angle > 180;
1798
3.00M
  }
1799
1800
  // Possible states for have_top_right(TR) and have_bottom_left(BL)
1801
  // -1 : TR and BL are not needed
1802
  //  0 : TR and BL are needed but not available
1803
  // > 0 : TR and BL are needed and pixels are available
1804
3.44M
  const int have_top_right =
1805
3.44M
      need_top_right ? has_top_right(sb_size, bsize, mi_row, mi_col, have_top,
1806
521k
                                     right_available, partition, tx_size,
1807
521k
                                     row_off, col_off, ss_x, ss_y)
1808
3.44M
                     : -1;
1809
3.44M
  const int have_bottom_left =
1810
3.44M
      need_bottom_left ? has_bottom_left(sb_size, bsize, mi_row, mi_col,
1811
533k
                                         bottom_available, have_left, partition,
1812
533k
                                         tx_size, row_off, col_off, ss_x, ss_y)
1813
3.44M
                       : -1;
1814
1815
3.44M
  const int disable_edge_filter = !enable_intra_edge_filter;
1816
3.44M
  const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1817
3.44M
  const int n_topright_px =
1818
3.44M
      have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right;
1819
3.44M
  const int n_bottomleft_px =
1820
3.44M
      have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left;
1821
3.44M
#if CONFIG_AV1_HIGHBITDEPTH
1822
3.44M
  if (is_hbd) {
1823
1.38M
    highbd_build_directional_and_filter_intra_predictors(
1824
1.38M
        ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1825
1.38M
        tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1826
1.38M
        n_bottomleft_px, intra_edge_filter_type, xd->bd);
1827
1.38M
    return;
1828
1.38M
  }
1829
2.06M
#endif
1830
2.06M
  build_directional_and_filter_intra_predictors(
1831
2.06M
      ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1832
2.06M
      tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1833
2.06M
      n_bottomleft_px, intra_edge_filter_type);
1834
2.06M
}
1835
1836
void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1837
                                    int plane, int blk_col, int blk_row,
1838
20.0M
                                    TX_SIZE tx_size) {
1839
20.0M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1840
20.0M
  struct macroblockd_plane *const pd = &xd->plane[plane];
1841
20.0M
  const int dst_stride = pd->dst.stride;
1842
20.0M
  uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1843
20.0M
  const PREDICTION_MODE mode =
1844
20.0M
      (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1845
20.0M
  const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1846
20.0M
  const FILTER_INTRA_MODE filter_intra_mode =
1847
20.0M
      (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1848
20.0M
          ? mbmi->filter_intra_mode_info.filter_intra_mode
1849
20.0M
          : FILTER_INTRA_MODES;
1850
20.0M
  const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1851
20.0M
  const SequenceHeader *seq_params = cm->seq_params;
1852
1853
20.0M
#if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
1854
20.0M
  if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1855
#if CONFIG_DEBUG
1856
    assert(is_cfl_allowed(xd));
1857
    const BLOCK_SIZE plane_bsize =
1858
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1859
    (void)plane_bsize;
1860
    assert(plane_bsize < BLOCK_SIZES_ALL);
1861
    if (!xd->lossless[mbmi->segment_id]) {
1862
      assert(blk_col == 0);
1863
      assert(blk_row == 0);
1864
      assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1865
      assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1866
    }
1867
#endif
1868
1.67M
    CFL_CTX *const cfl = &xd->cfl;
1869
1.67M
    CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1870
1.67M
    if (!cfl->dc_pred_is_cached[pred_plane]) {
1871
1.67M
      av1_predict_intra_block(xd, seq_params->sb_size,
1872
1.67M
                              seq_params->enable_intra_edge_filter, pd->width,
1873
1.67M
                              pd->height, tx_size, mode, angle_delta,
1874
1.67M
                              use_palette, filter_intra_mode, dst, dst_stride,
1875
1.67M
                              dst, dst_stride, blk_col, blk_row, plane);
1876
1.67M
      if (cfl->use_dc_pred_cache) {
1877
0
        cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1878
0
        cfl->dc_pred_is_cached[pred_plane] = true;
1879
0
      }
1880
18.4E
    } else {
1881
18.4E
      cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1882
18.4E
    }
1883
1.67M
    av1_cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1884
1.67M
    return;
1885
1.67M
  }
1886
18.3M
#endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
1887
18.3M
  av1_predict_intra_block(
1888
18.3M
      xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1889
18.3M
      pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1890
18.3M
      dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1891
18.3M
}
1892
1893
15.5k
void av1_init_intra_predictors(void) {
1894
15.5k
  aom_once(init_intra_predictors_internal);
1895
15.5k
}