Coverage Report

Created: 2025-06-22 08:04

/src/aom/av1/common/reconintra.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <math.h>
14
15
#include "config/aom_config.h"
16
#include "config/aom_dsp_rtcd.h"
17
#include "config/av1_rtcd.h"
18
19
#include "aom_dsp/aom_dsp_common.h"
20
#include "aom_mem/aom_mem.h"
21
#include "aom_ports/aom_once.h"
22
#include "aom_ports/mem.h"
23
#include "av1/common/av1_common_int.h"
24
#include "av1/common/cfl.h"
25
#include "av1/common/reconintra.h"
26
27
enum {
28
  NEED_LEFT = 1 << 1,
29
  NEED_ABOVE = 1 << 2,
30
  NEED_ABOVERIGHT = 1 << 3,
31
  NEED_ABOVELEFT = 1 << 4,
32
  NEED_BOTTOMLEFT = 1 << 5,
33
};
34
35
#define INTRA_EDGE_FILT 3
36
144M
#define INTRA_EDGE_TAPS 5
37
#define MAX_UPSAMPLE_SZ 16
38
45.7M
#define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
39
40
static const uint8_t extend_modes[INTRA_MODES] = {
41
  NEED_ABOVE | NEED_LEFT,                   // DC
42
  NEED_ABOVE,                               // V
43
  NEED_LEFT,                                // H
44
  NEED_ABOVE | NEED_ABOVERIGHT,             // D45
45
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
46
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
47
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
48
  NEED_LEFT | NEED_BOTTOMLEFT,              // D203
49
  NEED_ABOVE | NEED_ABOVERIGHT,             // D67
50
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH
51
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
52
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
53
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
54
};
55
56
// Tables to store if the top-right reference pixels are available. The flags
57
// are represented with bits, packed into 8-bit integers. E.g., for the 32x32
58
// blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
59
// order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
60
// i.e. (table[10 / 8] >> (10 % 8)) & 1.
61
//       . . . .
62
//       . . . .
63
//       . . o .
64
//       . . . .
65
static uint8_t has_tr_4x4[128] = {
66
  255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70
  255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
74
};
75
static uint8_t has_tr_4x8[64] = {
76
  255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
77
  119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
78
  127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
79
  119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
80
  119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
81
};
82
static uint8_t has_tr_8x4[64] = {
83
  255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85
  255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
87
};
88
static uint8_t has_tr_8x8[32] = {
89
  255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90
  255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
91
};
92
static uint8_t has_tr_8x16[16] = {
93
  255, 255, 119, 119, 127, 127, 119, 119,
94
  255, 127, 119, 119, 127, 127, 119, 119,
95
};
96
static uint8_t has_tr_16x8[16] = {
97
  255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
98
};
99
static uint8_t has_tr_16x16[8] = {
100
  255, 85, 119, 85, 127, 85, 119, 85,
101
};
102
static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
103
static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
104
static uint8_t has_tr_32x32[2] = { 95, 87 };
105
static uint8_t has_tr_32x64[1] = { 127 };
106
static uint8_t has_tr_64x32[1] = { 19 };
107
static uint8_t has_tr_64x64[1] = { 7 };
108
static uint8_t has_tr_64x128[1] = { 3 };
109
static uint8_t has_tr_128x64[1] = { 1 };
110
static uint8_t has_tr_128x128[1] = { 1 };
111
static uint8_t has_tr_4x16[32] = {
112
  255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
113
  127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
114
  127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
115
};
116
static uint8_t has_tr_16x4[32] = {
117
  255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118
  127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
119
};
120
static uint8_t has_tr_8x32[8] = {
121
  255, 255, 127, 127, 255, 127, 127, 127,
122
};
123
static uint8_t has_tr_32x8[8] = {
124
  15, 0, 5, 0, 7, 0, 5, 0,
125
};
126
static uint8_t has_tr_16x64[2] = { 255, 127 };
127
static uint8_t has_tr_64x16[2] = { 3, 1 };
128
129
static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
130
  // 4X4
131
  has_tr_4x4,
132
  // 4X8,       8X4,            8X8
133
  has_tr_4x8, has_tr_8x4, has_tr_8x8,
134
  // 8X16,      16X8,           16X16
135
  has_tr_8x16, has_tr_16x8, has_tr_16x16,
136
  // 16X32,     32X16,          32X32
137
  has_tr_16x32, has_tr_32x16, has_tr_32x32,
138
  // 32X64,     64X32,          64X64
139
  has_tr_32x64, has_tr_64x32, has_tr_64x64,
140
  // 64x128,    128x64,         128x128
141
  has_tr_64x128, has_tr_128x64, has_tr_128x128,
142
  // 4x16,      16x4,            8x32
143
  has_tr_4x16, has_tr_16x4, has_tr_8x32,
144
  // 32x8,      16x64,           64x16
145
  has_tr_32x8, has_tr_16x64, has_tr_64x16
146
};
147
148
static uint8_t has_tr_vert_8x8[32] = {
149
  255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150
  255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
151
};
152
static uint8_t has_tr_vert_16x16[8] = {
153
  255, 0, 119, 0, 127, 0, 119, 0,
154
};
155
static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
156
static uint8_t has_tr_vert_64x64[1] = { 3 };
157
158
// The _vert_* tables are like the ordinary tables above, but describe the
159
// order we visit square blocks when doing a PARTITION_VERT_A or
160
// PARTITION_VERT_B. This is the same order as normal except for on the last
161
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
162
// as a pair of squares, which means that these tables work correctly for both
163
// mixed vertical partition types.
164
//
165
// There are tables for each of the square sizes. Vertical rectangles (like
166
// BLOCK_16X32) use their respective "non-vert" table
167
static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
168
  // 4X4
169
  NULL,
170
  // 4X8,      8X4,         8X8
171
  has_tr_4x8, NULL, has_tr_vert_8x8,
172
  // 8X16,     16X8,        16X16
173
  has_tr_8x16, NULL, has_tr_vert_16x16,
174
  // 16X32,    32X16,       32X32
175
  has_tr_16x32, NULL, has_tr_vert_32x32,
176
  // 32X64,    64X32,       64X64
177
  has_tr_32x64, NULL, has_tr_vert_64x64,
178
  // 64x128,   128x64,      128x128
179
  has_tr_64x128, NULL, has_tr_128x128
180
};
181
182
static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
183
273k
                                       BLOCK_SIZE bsize) {
184
273k
  const uint8_t *ret = NULL;
185
  // If this is a mixed vertical partition, look up bsize in orders_vert.
186
273k
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
187
23.6k
    assert(bsize < BLOCK_SIZES);
188
23.6k
    ret = has_tr_vert_tables[bsize];
189
250k
  } else {
190
250k
    ret = has_tr_tables[bsize];
191
250k
  }
192
273k
  assert(ret);
193
273k
  return ret;
194
273k
}
195
196
static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
197
                         int mi_col, int top_available, int right_available,
198
                         PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
199
621k
                         int col_off, int ss_x, int ss_y) {
200
621k
  if (!top_available || !right_available) return 0;
201
202
556k
  const int bw_unit = mi_size_wide[bsize];
203
556k
  const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
204
556k
  const int top_right_count_unit = tx_size_wide_unit[txsz];
205
206
556k
  if (row_off > 0) {  // Just need to check if enough pixels on the right.
207
153k
    if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
208
      // Special case: For 128x128 blocks, the transform unit whose
209
      // top-right corner is at the center of the block does in fact have
210
      // pixels available at its top-right corner.
211
68.6k
      if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
212
68.6k
          col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
213
3.07k
        return 1;
214
3.07k
      }
215
65.5k
      const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
216
65.5k
      const int col_off_64 = col_off % plane_bw_unit_64;
217
65.5k
      return col_off_64 + top_right_count_unit < plane_bw_unit_64;
218
68.6k
    }
219
85.1k
    return col_off + top_right_count_unit < plane_bw_unit;
220
402k
  } else {
221
    // All top-right pixels are in the block above, which is already available.
222
402k
    if (col_off + top_right_count_unit < plane_bw_unit) return 1;
223
224
371k
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
225
371k
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
226
371k
    const int sb_mi_size = mi_size_high[sb_size];
227
371k
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
228
371k
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
229
230
    // Top row of superblock: so top-right pixels are in the top and/or
231
    // top-right superblocks, both of which are already available.
232
371k
    if (blk_row_in_sb == 0) return 1;
233
234
    // Rightmost column of superblock (and not the top row): so top-right pixels
235
    // fall in the right superblock, which is not available yet.
236
320k
    if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
237
46.3k
      return 0;
238
46.3k
    }
239
240
    // General case (neither top row nor rightmost column): check if the
241
    // top-right block is coded before the current block.
242
273k
    const int this_blk_index =
243
273k
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
244
273k
        blk_col_in_sb + 0;
245
273k
    const int idx1 = this_blk_index / 8;
246
273k
    const int idx2 = this_blk_index % 8;
247
273k
    const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
248
273k
    return (has_tr_table[idx1] >> idx2) & 1;
249
320k
  }
250
556k
}
251
252
// Similar to the has_tr_* tables, but store if the bottom-left reference
253
// pixels are available.
254
static uint8_t has_bl_4x4[128] = {
255
  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
256
  85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
257
  17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
258
  85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
259
  0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
260
  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
261
  85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
262
};
263
static uint8_t has_bl_4x8[64] = {
264
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
265
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
266
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
267
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
268
};
269
static uint8_t has_bl_8x4[64] = {
270
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
271
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
272
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
273
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
274
};
275
static uint8_t has_bl_8x8[32] = {
276
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
278
};
279
static uint8_t has_bl_8x16[16] = {
280
  16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
281
};
282
static uint8_t has_bl_16x8[16] = {
283
  254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
284
};
285
static uint8_t has_bl_16x16[8] = {
286
  84, 16, 84, 0, 84, 16, 84, 0,
287
};
288
static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
289
static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
290
static uint8_t has_bl_32x32[2] = { 4, 4 };
291
static uint8_t has_bl_32x64[1] = { 0 };
292
static uint8_t has_bl_64x32[1] = { 34 };
293
static uint8_t has_bl_64x64[1] = { 0 };
294
static uint8_t has_bl_64x128[1] = { 0 };
295
static uint8_t has_bl_128x64[1] = { 0 };
296
static uint8_t has_bl_128x128[1] = { 0 };
297
static uint8_t has_bl_4x16[32] = {
298
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
300
};
301
static uint8_t has_bl_16x4[32] = {
302
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
304
};
305
static uint8_t has_bl_8x32[8] = {
306
  0, 1, 0, 0, 0, 1, 0, 0,
307
};
308
static uint8_t has_bl_32x8[8] = {
309
  238, 78, 238, 14, 238, 78, 238, 14,
310
};
311
static uint8_t has_bl_16x64[2] = { 0, 0 };
312
static uint8_t has_bl_64x16[2] = { 42, 42 };
313
314
static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
315
  // 4X4
316
  has_bl_4x4,
317
  // 4X8,         8X4,         8X8
318
  has_bl_4x8, has_bl_8x4, has_bl_8x8,
319
  // 8X16,        16X8,        16X16
320
  has_bl_8x16, has_bl_16x8, has_bl_16x16,
321
  // 16X32,       32X16,       32X32
322
  has_bl_16x32, has_bl_32x16, has_bl_32x32,
323
  // 32X64,       64X32,       64X64
324
  has_bl_32x64, has_bl_64x32, has_bl_64x64,
325
  // 64x128,      128x64,      128x128
326
  has_bl_64x128, has_bl_128x64, has_bl_128x128,
327
  // 4x16,        16x4,        8x32
328
  has_bl_4x16, has_bl_16x4, has_bl_8x32,
329
  // 32x8,        16x64,       64x16
330
  has_bl_32x8, has_bl_16x64, has_bl_64x16
331
};
332
333
static uint8_t has_bl_vert_8x8[32] = {
334
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
336
};
337
static uint8_t has_bl_vert_16x16[8] = {
338
  254, 16, 254, 0, 254, 16, 254, 0,
339
};
340
static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
341
static uint8_t has_bl_vert_64x64[1] = { 2 };
342
343
// The _vert_* tables are like the ordinary tables above, but describe the
344
// order we visit square blocks when doing a PARTITION_VERT_A or
345
// PARTITION_VERT_B. This is the same order as normal except for on the last
346
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
347
// as a pair of squares, which means that these tables work correctly for both
348
// mixed vertical partition types.
349
//
350
// There are tables for each of the square sizes. Vertical rectangles (like
351
// BLOCK_16X32) use their respective "non-vert" table
352
static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
353
  // 4X4
354
  NULL,
355
  // 4X8,     8X4,         8X8
356
  has_bl_4x8, NULL, has_bl_vert_8x8,
357
  // 8X16,    16X8,        16X16
358
  has_bl_8x16, NULL, has_bl_vert_16x16,
359
  // 16X32,   32X16,       32X32
360
  has_bl_16x32, NULL, has_bl_vert_32x32,
361
  // 32X64,   64X32,       64X64
362
  has_bl_32x64, NULL, has_bl_vert_64x64,
363
  // 64x128,  128x64,      128x128
364
  has_bl_64x128, NULL, has_bl_128x128
365
};
366
367
static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
368
297k
                                       BLOCK_SIZE bsize) {
369
297k
  const uint8_t *ret = NULL;
370
  // If this is a mixed vertical partition, look up bsize in orders_vert.
371
297k
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
372
26.4k
    assert(bsize < BLOCK_SIZES);
373
26.4k
    ret = has_bl_vert_tables[bsize];
374
271k
  } else {
375
271k
    ret = has_bl_tables[bsize];
376
271k
  }
377
297k
  assert(ret);
378
297k
  return ret;
379
297k
}
380
381
static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
382
                           int mi_col, int bottom_available, int left_available,
383
                           PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
384
675k
                           int col_off, int ss_x, int ss_y) {
385
675k
  if (!bottom_available || !left_available) return 0;
386
387
  // Special case for 128x* blocks, when col_off is half the block width.
388
  // This is needed because 128x* superblocks are divided into 64x* blocks in
389
  // raster order
390
615k
  if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
391
78.2k
    const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
392
78.2k
    const int col_off_64 = col_off % plane_bw_unit_64;
393
78.2k
    if (col_off_64 == 0) {
394
      // We are at the left edge of top-right or bottom-right 64x* block.
395
10.6k
      const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
396
10.6k
      const int row_off_64 = row_off % plane_bh_unit_64;
397
10.6k
      const int plane_bh_unit =
398
10.6k
          AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
399
      // Check if all bottom-left pixels are in the left 64x* block (which is
400
      // already coded).
401
10.6k
      return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
402
10.6k
    }
403
78.2k
  }
404
405
605k
  if (col_off > 0) {
406
    // Bottom-left pixels are in the bottom-left block, which is not available.
407
157k
    return 0;
408
447k
  } else {
409
447k
    const int bh_unit = mi_size_high[bsize];
410
447k
    const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
411
447k
    const int bottom_left_count_unit = tx_size_high_unit[txsz];
412
413
    // All bottom-left pixels are in the left block, which is already available.
414
447k
    if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
415
416
415k
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
417
415k
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
418
415k
    const int sb_mi_size = mi_size_high[sb_size];
419
415k
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
420
415k
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
421
422
    // Leftmost column of superblock: so bottom-left pixels maybe in the left
423
    // and/or bottom-left superblocks. But only the left superblock is
424
    // available, so check if all required pixels fall in that superblock.
425
415k
    if (blk_col_in_sb == 0) {
426
65.2k
      const int blk_start_row_off =
427
65.2k
          blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
428
65.2k
          ss_y;
429
65.2k
      const int row_off_in_sb = blk_start_row_off + row_off;
430
65.2k
      const int sb_height_unit = sb_mi_size >> ss_y;
431
65.2k
      return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
432
65.2k
    }
433
434
    // Bottom row of superblock (and not the leftmost column): so bottom-left
435
    // pixels fall in the bottom superblock, which is not available yet.
436
350k
    if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
437
438
    // General case (neither leftmost column nor bottom row): check if the
439
    // bottom-left block is coded before the current block.
440
297k
    const int this_blk_index =
441
297k
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
442
297k
        blk_col_in_sb + 0;
443
297k
    const int idx1 = this_blk_index / 8;
444
297k
    const int idx2 = this_blk_index % 8;
445
297k
    const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
446
297k
    return (has_bl_table[idx1] >> idx2) & 1;
447
350k
  }
448
605k
}
449
450
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
451
                              const uint8_t *above, const uint8_t *left);
452
453
static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
454
static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
455
456
#if CONFIG_AV1_HIGHBITDEPTH
457
typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
458
                                   const uint16_t *above, const uint16_t *left,
459
                                   int bd);
460
static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
461
static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
462
#endif
463
464
2
static void init_intra_predictors_internal(void) {
465
2
  assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
466
467
#if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
468
#define INIT_RECTANGULAR(p, type)             \
469
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
470
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
471
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
472
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
473
  p[TX_16X32] = aom_##type##_predictor_16x32; \
474
  p[TX_32X16] = aom_##type##_predictor_32x16; \
475
  p[TX_32X64] = aom_##type##_predictor_32x64; \
476
  p[TX_64X32] = aom_##type##_predictor_64x32;
477
#else
478
2
#define INIT_RECTANGULAR(p, type)             \
479
40
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
480
40
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
481
40
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
482
40
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
483
40
  p[TX_16X32] = aom_##type##_predictor_16x32; \
484
40
  p[TX_32X16] = aom_##type##_predictor_32x16; \
485
40
  p[TX_32X64] = aom_##type##_predictor_32x64; \
486
40
  p[TX_64X32] = aom_##type##_predictor_64x32; \
487
40
  p[TX_4X16] = aom_##type##_predictor_4x16;   \
488
40
  p[TX_16X4] = aom_##type##_predictor_16x4;   \
489
40
  p[TX_8X32] = aom_##type##_predictor_8x32;   \
490
40
  p[TX_32X8] = aom_##type##_predictor_32x8;   \
491
40
  p[TX_16X64] = aom_##type##_predictor_16x64; \
492
40
  p[TX_64X16] = aom_##type##_predictor_64x16;
493
2
#endif  // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
494
495
2
#define INIT_NO_4X4(p, type)                  \
496
40
  p[TX_8X8] = aom_##type##_predictor_8x8;     \
497
40
  p[TX_16X16] = aom_##type##_predictor_16x16; \
498
40
  p[TX_32X32] = aom_##type##_predictor_32x32; \
499
40
  p[TX_64X64] = aom_##type##_predictor_64x64; \
500
40
  INIT_RECTANGULAR(p, type)
501
502
2
#define INIT_ALL_SIZES(p, type)           \
503
40
  p[TX_4X4] = aom_##type##_predictor_4x4; \
504
40
  INIT_NO_4X4(p, type)
505
506
2
  INIT_ALL_SIZES(pred[V_PRED], v)
507
2
  INIT_ALL_SIZES(pred[H_PRED], h)
508
2
  INIT_ALL_SIZES(pred[PAETH_PRED], paeth)
509
2
  INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth)
510
2
  INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v)
511
2
  INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h)
512
2
  INIT_ALL_SIZES(dc_pred[0][0], dc_128)
513
2
  INIT_ALL_SIZES(dc_pred[0][1], dc_top)
514
2
  INIT_ALL_SIZES(dc_pred[1][0], dc_left)
515
2
  INIT_ALL_SIZES(dc_pred[1][1], dc)
516
2
#if CONFIG_AV1_HIGHBITDEPTH
517
2
  INIT_ALL_SIZES(pred_high[V_PRED], highbd_v)
518
2
  INIT_ALL_SIZES(pred_high[H_PRED], highbd_h)
519
2
  INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth)
520
2
  INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth)
521
2
  INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v)
522
2
  INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h)
523
2
  INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128)
524
2
  INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top)
525
2
  INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left)
526
2
  INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc)
527
2
#endif
528
2
#undef intra_pred_allsizes
529
2
}
530
531
// Directional prediction, zone 1: 0 < angle < 90
532
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
533
                            const uint8_t *above, const uint8_t *left,
534
313k
                            int upsample_above, int dx, int dy) {
535
313k
  int r, c, x, base, shift, val;
536
537
313k
  (void)left;
538
313k
  (void)dy;
539
313k
  assert(dy == 1);
540
313k
  assert(dx > 0);
541
542
313k
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
543
313k
  const int frac_bits = 6 - upsample_above;
544
313k
  const int base_inc = 1 << upsample_above;
545
313k
  x = dx;
546
3.70M
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
547
3.39M
    base = x >> frac_bits;
548
3.39M
    shift = ((x << upsample_above) & 0x3F) >> 1;
549
550
3.39M
    if (base >= max_base_x) {
551
6.66k
      for (int i = r; i < bh; ++i) {
552
4.53k
        memset(dst, above[max_base_x], bw * sizeof(dst[0]));
553
4.53k
        dst += stride;
554
4.53k
      }
555
2.12k
      return;
556
2.12k
    }
557
558
62.4M
    for (c = 0; c < bw; ++c, base += base_inc) {
559
59.0M
      if (base < max_base_x) {
560
58.6M
        val = above[base] * (32 - shift) + above[base + 1] * shift;
561
58.6M
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
562
58.6M
      } else {
563
397k
        dst[c] = above[max_base_x];
564
397k
      }
565
59.0M
    }
566
3.39M
  }
567
313k
}
568
569
// Directional prediction, zone 2: 90 < angle < 180
570
void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
571
                            const uint8_t *above, const uint8_t *left,
572
                            int upsample_above, int upsample_left, int dx,
573
590k
                            int dy) {
574
590k
  assert(dx > 0);
575
590k
  assert(dy > 0);
576
577
590k
  const int min_base_x = -(1 << upsample_above);
578
590k
  const int min_base_y = -(1 << upsample_left);
579
590k
  (void)min_base_y;
580
590k
  const int frac_bits_x = 6 - upsample_above;
581
590k
  const int frac_bits_y = 6 - upsample_left;
582
583
6.89M
  for (int r = 0; r < bh; ++r) {
584
117M
    for (int c = 0; c < bw; ++c) {
585
111M
      int val;
586
111M
      int y = r + 1;
587
111M
      int x = (c << 6) - y * dx;
588
111M
      const int base_x = x >> frac_bits_x;
589
111M
      if (base_x >= min_base_x) {
590
53.4M
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
591
53.4M
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
592
53.4M
        val = ROUND_POWER_OF_TWO(val, 5);
593
57.8M
      } else {
594
57.8M
        x = c + 1;
595
57.8M
        y = (r << 6) - x * dy;
596
57.8M
        const int base_y = y >> frac_bits_y;
597
57.8M
        assert(base_y >= min_base_y);
598
57.8M
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
599
57.8M
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
600
57.8M
        val = ROUND_POWER_OF_TWO(val, 5);
601
57.8M
      }
602
111M
      dst[c] = val;
603
111M
    }
604
6.30M
    dst += stride;
605
6.30M
  }
606
590k
}
607
608
// Directional prediction, zone 3: 180 < angle < 270
609
void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
610
                            const uint8_t *above, const uint8_t *left,
611
330k
                            int upsample_left, int dx, int dy) {
612
330k
  int r, c, y, base, shift, val;
613
614
330k
  (void)above;
615
330k
  (void)dx;
616
617
330k
  assert(dx == 1);
618
330k
  assert(dy > 0);
619
620
330k
  const int max_base_y = (bw + bh - 1) << upsample_left;
621
330k
  const int frac_bits = 6 - upsample_left;
622
330k
  const int base_inc = 1 << upsample_left;
623
330k
  y = dy;
624
4.07M
  for (c = 0; c < bw; ++c, y += dy) {
625
3.74M
    base = y >> frac_bits;
626
3.74M
    shift = ((y << upsample_left) & 0x3F) >> 1;
627
628
64.0M
    for (r = 0; r < bh; ++r, base += base_inc) {
629
60.2M
      if (base < max_base_y) {
630
60.2M
        val = left[base] * (32 - shift) + left[base + 1] * shift;
631
60.2M
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
632
60.2M
      } else {
633
115
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
634
115
        break;
635
115
      }
636
60.2M
    }
637
3.74M
  }
638
330k
}
639
640
static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
641
                         const uint8_t *above, const uint8_t *left,
642
2.15M
                         int upsample_above, int upsample_left, int angle) {
643
2.15M
  const int dx = av1_get_dx(angle);
644
2.15M
  const int dy = av1_get_dy(angle);
645
2.15M
  const int bw = tx_size_wide[tx_size];
646
2.15M
  const int bh = tx_size_high[tx_size];
647
2.15M
  assert(angle > 0 && angle < 270);
648
649
2.15M
  if (angle > 0 && angle < 90) {
650
313k
    av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
651
313k
                         dy);
652
1.84M
  } else if (angle > 90 && angle < 180) {
653
590k
    av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
654
590k
                         upsample_left, dx, dy);
655
1.25M
  } else if (angle > 180 && angle < 270) {
656
330k
    av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
657
330k
                         dy);
658
923k
  } else if (angle == 90) {
659
485k
    pred[V_PRED][tx_size](dst, stride, above, left);
660
485k
  } else if (angle == 180) {
661
438k
    pred[H_PRED][tx_size](dst, stride, above, left);
662
438k
  }
663
2.15M
}
664
665
#if CONFIG_AV1_HIGHBITDEPTH
666
// Directional prediction, zone 1: 0 < angle < 90
667
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
668
                                   int bh, const uint16_t *above,
669
                                   const uint16_t *left, int upsample_above,
670
280k
                                   int dx, int dy, int bd) {
671
280k
  int r, c, x, base, shift, val;
672
673
280k
  (void)left;
674
280k
  (void)dy;
675
280k
  (void)bd;
676
280k
  assert(dy == 1);
677
280k
  assert(dx > 0);
678
679
280k
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
680
280k
  const int frac_bits = 6 - upsample_above;
681
280k
  const int base_inc = 1 << upsample_above;
682
280k
  x = dx;
683
3.40M
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
684
3.12M
    base = x >> frac_bits;
685
3.12M
    shift = ((x << upsample_above) & 0x3F) >> 1;
686
687
3.12M
    if (base >= max_base_x) {
688
7.71k
      for (int i = r; i < bh; ++i) {
689
5.25k
        aom_memset16(dst, above[max_base_x], bw);
690
5.25k
        dst += stride;
691
5.25k
      }
692
2.45k
      return;
693
2.45k
    }
694
695
59.0M
    for (c = 0; c < bw; ++c, base += base_inc) {
696
55.8M
      if (base < max_base_x) {
697
55.4M
        val = above[base] * (32 - shift) + above[base + 1] * shift;
698
55.4M
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
699
55.4M
      } else {
700
440k
        dst[c] = above[max_base_x];
701
440k
      }
702
55.8M
    }
703
3.12M
  }
704
280k
}
705
706
// Directional prediction, zone 2: 90 < angle < 180
707
void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
708
                                   int bh, const uint16_t *above,
709
                                   const uint16_t *left, int upsample_above,
710
571k
                                   int upsample_left, int dx, int dy, int bd) {
711
571k
  (void)bd;
712
571k
  assert(dx > 0);
713
571k
  assert(dy > 0);
714
715
571k
  const int min_base_x = -(1 << upsample_above);
716
571k
  const int min_base_y = -(1 << upsample_left);
717
571k
  (void)min_base_y;
718
571k
  const int frac_bits_x = 6 - upsample_above;
719
571k
  const int frac_bits_y = 6 - upsample_left;
720
721
7.56M
  for (int r = 0; r < bh; ++r) {
722
146M
    for (int c = 0; c < bw; ++c) {
723
139M
      int val;
724
139M
      int y = r + 1;
725
139M
      int x = (c << 6) - y * dx;
726
139M
      const int base_x = x >> frac_bits_x;
727
139M
      if (base_x >= min_base_x) {
728
62.4M
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
729
62.4M
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
730
62.4M
        val = ROUND_POWER_OF_TWO(val, 5);
731
76.7M
      } else {
732
76.7M
        x = c + 1;
733
76.7M
        y = (r << 6) - x * dy;
734
76.7M
        const int base_y = y >> frac_bits_y;
735
76.7M
        assert(base_y >= min_base_y);
736
76.7M
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
737
76.7M
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
738
76.7M
        val = ROUND_POWER_OF_TWO(val, 5);
739
76.7M
      }
740
139M
      dst[c] = val;
741
139M
    }
742
6.98M
    dst += stride;
743
6.98M
  }
744
571k
}
745
746
// Directional prediction, zone 3: 180 < angle < 270
747
void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
748
                                   int bh, const uint16_t *above,
749
                                   const uint16_t *left, int upsample_left,
750
302k
                                   int dx, int dy, int bd) {
751
302k
  int r, c, y, base, shift, val;
752
753
302k
  (void)above;
754
302k
  (void)dx;
755
302k
  (void)bd;
756
302k
  assert(dx == 1);
757
302k
  assert(dy > 0);
758
759
302k
  const int max_base_y = (bw + bh - 1) << upsample_left;
760
302k
  const int frac_bits = 6 - upsample_left;
761
302k
  const int base_inc = 1 << upsample_left;
762
302k
  y = dy;
763
3.95M
  for (c = 0; c < bw; ++c, y += dy) {
764
3.65M
    base = y >> frac_bits;
765
3.65M
    shift = ((y << upsample_left) & 0x3F) >> 1;
766
767
66.1M
    for (r = 0; r < bh; ++r, base += base_inc) {
768
62.4M
      if (base < max_base_y) {
769
62.4M
        val = left[base] * (32 - shift) + left[base + 1] * shift;
770
62.4M
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
771
18.4E
      } else {
772
18.4E
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
773
18.4E
        break;
774
18.4E
      }
775
62.4M
    }
776
3.65M
  }
777
302k
}
778
779
static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
780
                                TX_SIZE tx_size, const uint16_t *above,
781
                                const uint16_t *left, int upsample_above,
782
1.56M
                                int upsample_left, int angle, int bd) {
783
1.56M
  const int dx = av1_get_dx(angle);
784
1.56M
  const int dy = av1_get_dy(angle);
785
1.56M
  const int bw = tx_size_wide[tx_size];
786
1.56M
  const int bh = tx_size_high[tx_size];
787
1.56M
  assert(angle > 0 && angle < 270);
788
789
1.56M
  if (angle > 0 && angle < 90) {
790
280k
    av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
791
280k
                                upsample_above, dx, dy, bd);
792
1.28M
  } else if (angle > 90 && angle < 180) {
793
571k
    av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
794
571k
                                upsample_above, upsample_left, dx, dy, bd);
795
709k
  } else if (angle > 180 && angle < 270) {
796
302k
    av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
797
302k
                                dx, dy, bd);
798
406k
  } else if (angle == 90) {
799
142k
    pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
800
263k
  } else if (angle == 180) {
801
263k
    pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
802
263k
  }
803
1.56M
}
804
#endif  // CONFIG_AV1_HIGHBITDEPTH
805
806
DECLARE_ALIGNED(16, const int8_t,
807
                av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
808
  {
809
      { -6, 10, 0, 0, 0, 12, 0, 0 },
810
      { -5, 2, 10, 0, 0, 9, 0, 0 },
811
      { -3, 1, 1, 10, 0, 7, 0, 0 },
812
      { -3, 1, 1, 2, 10, 5, 0, 0 },
813
      { -4, 6, 0, 0, 0, 2, 12, 0 },
814
      { -3, 2, 6, 0, 0, 2, 9, 0 },
815
      { -3, 2, 2, 6, 0, 2, 7, 0 },
816
      { -3, 1, 2, 2, 6, 3, 5, 0 },
817
  },
818
  {
819
      { -10, 16, 0, 0, 0, 10, 0, 0 },
820
      { -6, 0, 16, 0, 0, 6, 0, 0 },
821
      { -4, 0, 0, 16, 0, 4, 0, 0 },
822
      { -2, 0, 0, 0, 16, 2, 0, 0 },
823
      { -10, 16, 0, 0, 0, 0, 10, 0 },
824
      { -6, 0, 16, 0, 0, 0, 6, 0 },
825
      { -4, 0, 0, 16, 0, 0, 4, 0 },
826
      { -2, 0, 0, 0, 16, 0, 2, 0 },
827
  },
828
  {
829
      { -8, 8, 0, 0, 0, 16, 0, 0 },
830
      { -8, 0, 8, 0, 0, 16, 0, 0 },
831
      { -8, 0, 0, 8, 0, 16, 0, 0 },
832
      { -8, 0, 0, 0, 8, 16, 0, 0 },
833
      { -4, 4, 0, 0, 0, 0, 16, 0 },
834
      { -4, 0, 4, 0, 0, 0, 16, 0 },
835
      { -4, 0, 0, 4, 0, 0, 16, 0 },
836
      { -4, 0, 0, 0, 4, 0, 16, 0 },
837
  },
838
  {
839
      { -2, 8, 0, 0, 0, 10, 0, 0 },
840
      { -1, 3, 8, 0, 0, 6, 0, 0 },
841
      { -1, 2, 3, 8, 0, 4, 0, 0 },
842
      { 0, 1, 2, 3, 8, 2, 0, 0 },
843
      { -1, 4, 0, 0, 0, 3, 10, 0 },
844
      { -1, 3, 4, 0, 0, 4, 6, 0 },
845
      { -1, 2, 3, 4, 0, 4, 4, 0 },
846
      { -1, 2, 2, 3, 4, 3, 3, 0 },
847
  },
848
  {
849
      { -12, 14, 0, 0, 0, 14, 0, 0 },
850
      { -10, 0, 14, 0, 0, 12, 0, 0 },
851
      { -9, 0, 0, 14, 0, 11, 0, 0 },
852
      { -8, 0, 0, 0, 14, 10, 0, 0 },
853
      { -10, 12, 0, 0, 0, 0, 14, 0 },
854
      { -9, 1, 12, 0, 0, 0, 12, 0 },
855
      { -8, 0, 0, 12, 0, 1, 11, 0 },
856
      { -7, 0, 0, 1, 12, 1, 9, 0 },
857
  },
858
};
859
860
void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
861
                                  TX_SIZE tx_size, const uint8_t *above,
862
522k
                                  const uint8_t *left, int mode) {
863
522k
  int r, c;
864
522k
  uint8_t buffer[33][33];
865
522k
  const int bw = tx_size_wide[tx_size];
866
522k
  const int bh = tx_size_high[tx_size];
867
868
522k
  assert(bw <= 32 && bh <= 32);
869
870
4.99M
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
871
522k
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
872
873
2.75M
  for (r = 1; r < bh + 1; r += 2)
874
8.44M
    for (c = 1; c < bw + 1; c += 4) {
875
6.20M
      const uint8_t p0 = buffer[r - 1][c - 1];
876
6.20M
      const uint8_t p1 = buffer[r - 1][c];
877
6.20M
      const uint8_t p2 = buffer[r - 1][c + 1];
878
6.20M
      const uint8_t p3 = buffer[r - 1][c + 2];
879
6.20M
      const uint8_t p4 = buffer[r - 1][c + 3];
880
6.20M
      const uint8_t p5 = buffer[r][c - 1];
881
6.20M
      const uint8_t p6 = buffer[r + 1][c - 1];
882
55.8M
      for (int k = 0; k < 8; ++k) {
883
49.6M
        int r_offset = k >> 2;
884
49.6M
        int c_offset = k & 0x03;
885
49.6M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
886
49.6M
                 av1_filter_intra_taps[mode][k][1] * p1 +
887
49.6M
                 av1_filter_intra_taps[mode][k][2] * p2 +
888
49.6M
                 av1_filter_intra_taps[mode][k][3] * p3 +
889
49.6M
                 av1_filter_intra_taps[mode][k][4] * p4 +
890
49.6M
                 av1_filter_intra_taps[mode][k][5] * p5 +
891
49.6M
                 av1_filter_intra_taps[mode][k][6] * p6;
892
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
893
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
894
        // Since Clip1() clips a negative value to 0, it is safe to replace
895
        // Round2Signed() with Round2().
896
49.6M
        buffer[r + r_offset][c + c_offset] =
897
49.6M
            clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
898
49.6M
      }
899
6.20M
    }
900
901
4.99M
  for (r = 0; r < bh; ++r) {
902
4.46M
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
903
4.46M
    dst += stride;
904
4.46M
  }
905
522k
}
906
907
#if CONFIG_AV1_HIGHBITDEPTH
908
static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
909
                                          TX_SIZE tx_size,
910
                                          const uint16_t *above,
911
                                          const uint16_t *left, int mode,
912
144k
                                          int bd) {
913
144k
  int r, c;
914
144k
  uint16_t buffer[33][33];
915
144k
  const int bw = tx_size_wide[tx_size];
916
144k
  const int bh = tx_size_high[tx_size];
917
918
144k
  assert(bw <= 32 && bh <= 32);
919
920
1.32M
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
921
144k
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
922
923
736k
  for (r = 1; r < bh + 1; r += 2)
924
2.23M
    for (c = 1; c < bw + 1; c += 4) {
925
1.63M
      const uint16_t p0 = buffer[r - 1][c - 1];
926
1.63M
      const uint16_t p1 = buffer[r - 1][c];
927
1.63M
      const uint16_t p2 = buffer[r - 1][c + 1];
928
1.63M
      const uint16_t p3 = buffer[r - 1][c + 2];
929
1.63M
      const uint16_t p4 = buffer[r - 1][c + 3];
930
1.63M
      const uint16_t p5 = buffer[r][c - 1];
931
1.63M
      const uint16_t p6 = buffer[r + 1][c - 1];
932
14.7M
      for (int k = 0; k < 8; ++k) {
933
13.1M
        int r_offset = k >> 2;
934
13.1M
        int c_offset = k & 0x03;
935
13.1M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
936
13.1M
                 av1_filter_intra_taps[mode][k][1] * p1 +
937
13.1M
                 av1_filter_intra_taps[mode][k][2] * p2 +
938
13.1M
                 av1_filter_intra_taps[mode][k][3] * p3 +
939
13.1M
                 av1_filter_intra_taps[mode][k][4] * p4 +
940
13.1M
                 av1_filter_intra_taps[mode][k][5] * p5 +
941
13.1M
                 av1_filter_intra_taps[mode][k][6] * p6;
942
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
943
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
944
        // Since Clip1() clips a negative value to 0, it is safe to replace
945
        // Round2Signed() with Round2().
946
13.1M
        buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
947
13.1M
            ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
948
13.1M
      }
949
1.63M
    }
950
951
1.32M
  for (r = 0; r < bh; ++r) {
952
1.18M
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
953
1.18M
    dst += stride;
954
1.18M
  }
955
144k
}
956
#endif  // CONFIG_AV1_HIGHBITDEPTH
957
958
7.64M
static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
959
7.64M
  if (plane == 0) {
960
3.96M
    const PREDICTION_MODE mode = mbmi->mode;
961
3.96M
    return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
962
3.96M
            mode == SMOOTH_H_PRED);
963
3.96M
  } else {
964
    // uv_mode is not set for inter blocks, so need to explicitly
965
    // detect that case.
966
3.68M
    if (is_inter_block(mbmi)) return 0;
967
968
3.62M
    const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
969
3.62M
    return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
970
3.62M
            uv_mode == UV_SMOOTH_H_PRED);
971
3.68M
  }
972
7.64M
}
973
974
4.49M
static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
975
4.49M
  const MB_MODE_INFO *above;
976
4.49M
  const MB_MODE_INFO *left;
977
978
4.49M
  if (plane == 0) {
979
2.32M
    above = xd->above_mbmi;
980
2.32M
    left = xd->left_mbmi;
981
2.32M
  } else {
982
2.17M
    above = xd->chroma_above_mbmi;
983
2.17M
    left = xd->chroma_left_mbmi;
984
2.17M
  }
985
986
4.49M
  return (above && is_smooth(above, plane)) || (left && is_smooth(left, plane));
987
4.49M
}
988
989
2.15M
static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
990
2.15M
  const int d = abs(delta);
991
2.15M
  int strength = 0;
992
993
2.15M
  const int blk_wh = bs0 + bs1;
994
2.15M
  if (type == 0) {
995
1.62M
    if (blk_wh <= 8) {
996
551k
      if (d >= 56) strength = 1;
997
1.07M
    } else if (blk_wh <= 12) {
998
155k
      if (d >= 40) strength = 1;
999
916k
    } else if (blk_wh <= 16) {
1000
265k
      if (d >= 40) strength = 1;
1001
651k
    } else if (blk_wh <= 24) {
1002
277k
      if (d >= 8) strength = 1;
1003
277k
      if (d >= 16) strength = 2;
1004
277k
      if (d >= 32) strength = 3;
1005
374k
    } else if (blk_wh <= 32) {
1006
161k
      if (d >= 1) strength = 1;
1007
161k
      if (d >= 4) strength = 2;
1008
161k
      if (d >= 32) strength = 3;
1009
213k
    } else {
1010
213k
      if (d >= 1) strength = 3;
1011
213k
    }
1012
1.62M
  } else {
1013
529k
    if (blk_wh <= 8) {
1014
124k
      if (d >= 40) strength = 1;
1015
124k
      if (d >= 64) strength = 2;
1016
404k
    } else if (blk_wh <= 16) {
1017
165k
      if (d >= 20) strength = 1;
1018
165k
      if (d >= 48) strength = 2;
1019
239k
    } else if (blk_wh <= 24) {
1020
119k
      if (d >= 4) strength = 3;
1021
119k
    } else {
1022
119k
      if (d >= 1) strength = 3;
1023
119k
    }
1024
529k
  }
1025
2.15M
  return strength;
1026
2.15M
}
1027
1028
1.47M
void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1029
1.47M
  if (!strength) return;
1030
1031
922k
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1032
922k
                                                         { 0, 5, 6, 5, 0 },
1033
922k
                                                         { 2, 4, 4, 4, 2 } };
1034
922k
  const int filt = strength - 1;
1035
922k
  uint8_t edge[129];
1036
1037
922k
  memcpy(edge, p, sz * sizeof(*p));
1038
17.6M
  for (int i = 1; i < sz; i++) {
1039
16.7M
    int s = 0;
1040
100M
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1041
83.8M
      int k = i - 2 + j;
1042
83.8M
      k = (k < 0) ? 0 : k;
1043
83.8M
      k = (k > sz - 1) ? sz - 1 : k;
1044
83.8M
      s += edge[k] * kernel[filt][j];
1045
83.8M
    }
1046
16.7M
    s = (s + 8) >> 4;
1047
16.7M
    p[i] = s;
1048
16.7M
  }
1049
922k
}
1050
1051
170k
static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1052
170k
  const int kernel[3] = { 5, 6, 5 };
1053
1054
170k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1055
170k
          (p_above[0] * kernel[2]);
1056
170k
  s = (s + 8) >> 4;
1057
170k
  p_above[-1] = s;
1058
170k
  p_left[-1] = s;
1059
170k
}
1060
1061
383k
void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1062
  // interpolate half-sample positions
1063
383k
  assert(sz <= MAX_UPSAMPLE_SZ);
1064
1065
383k
  uint8_t in[MAX_UPSAMPLE_SZ + 3];
1066
  // copy p[-1..(sz-1)] and extend first and last samples
1067
383k
  in[0] = p[-1];
1068
383k
  in[1] = p[-1];
1069
3.49M
  for (int i = 0; i < sz; i++) {
1070
3.10M
    in[i + 2] = p[i];
1071
3.10M
  }
1072
383k
  in[sz + 2] = p[sz - 1];
1073
1074
  // interpolate half-sample edge positions
1075
383k
  p[-2] = in[0];
1076
3.49M
  for (int i = 0; i < sz; i++) {
1077
3.10M
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1078
3.10M
    s = clip_pixel((s + 8) >> 4);
1079
3.10M
    p[2 * i - 1] = s;
1080
3.10M
    p[2 * i] = in[i + 2];
1081
3.10M
  }
1082
383k
}
1083
1084
static void build_directional_and_filter_intra_predictors(
1085
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1086
    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1087
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1088
2.73M
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1089
2.73M
  int i;
1090
2.73M
  const uint8_t *above_ref = ref - ref_stride;
1091
2.73M
  const uint8_t *left_ref = ref - 1;
1092
2.73M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1093
2.73M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1094
2.73M
  uint8_t *const above_row = above_data + 16;
1095
2.73M
  uint8_t *const left_col = left_data + 16;
1096
2.73M
  const int txwpx = tx_size_wide[tx_size];
1097
2.73M
  const int txhpx = tx_size_high[tx_size];
1098
2.73M
  int need_left = extend_modes[mode] & NEED_LEFT;
1099
2.73M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1100
2.73M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1101
2.73M
  const int is_dr_mode = av1_is_directional_mode(mode);
1102
2.73M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1103
2.73M
  assert(use_filter_intra || is_dr_mode);
1104
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1105
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1106
  // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1107
  // be the potential reason for this issue.
1108
2.73M
  memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1109
2.73M
  memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1110
1111
  // The default values if ref pixels are not available:
1112
  // 128 127 127 .. 127 127 127 127 127 127
1113
  // 129  A   B  ..  Y   Z
1114
  // 129  C   D  ..  W   X
1115
  // 129  E   F  ..  U   V
1116
  // 129  G   H  ..  S   T   T   T   T   T
1117
  // ..
1118
1119
2.73M
  if (is_dr_mode) {
1120
2.21M
    if (p_angle <= 90)
1121
820k
      need_above = 1, need_left = 0, need_above_left = 1;
1122
1.39M
    else if (p_angle < 180)
1123
590k
      need_above = 1, need_left = 1, need_above_left = 1;
1124
802k
    else
1125
802k
      need_above = 0, need_left = 1, need_above_left = 1;
1126
2.21M
  }
1127
2.73M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1128
1129
2.73M
  assert(n_top_px >= 0);
1130
2.73M
  assert(n_topright_px >= -1);
1131
2.73M
  assert(n_left_px >= 0);
1132
2.73M
  assert(n_bottomleft_px >= -1);
1133
1134
2.73M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1135
55.5k
    int val;
1136
55.5k
    if (need_left) {
1137
34.5k
      val = (n_top_px > 0) ? above_ref[0] : 129;
1138
34.5k
    } else {
1139
21.0k
      val = (n_left_px > 0) ? left_ref[0] : 127;
1140
21.0k
    }
1141
992k
    for (i = 0; i < txhpx; ++i) {
1142
936k
      memset(dst, val, txwpx);
1143
936k
      dst += dst_stride;
1144
936k
    }
1145
55.5k
    return;
1146
55.5k
  }
1147
1148
  // NEED_LEFT
1149
2.67M
  if (need_left) {
1150
1.88M
    const int num_left_pixels_needed =
1151
1.88M
        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1152
1.88M
    i = 0;
1153
1.88M
    if (n_left_px > 0) {
1154
18.9M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1155
1.84M
      if (n_bottomleft_px > 0) {
1156
106k
        assert(i == txhpx);
1157
1.08M
        for (; i < txhpx + n_bottomleft_px; i++)
1158
974k
          left_col[i] = left_ref[i * ref_stride];
1159
106k
      }
1160
1.84M
      if (i < num_left_pixels_needed)
1161
279k
        memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1162
1.84M
    } else if (n_top_px > 0) {
1163
32.2k
      memset(left_col, above_ref[0], num_left_pixels_needed);
1164
32.2k
    }
1165
1.88M
  }
1166
1167
  // NEED_ABOVE
1168
2.67M
  if (need_above) {
1169
1.91M
    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1170
1.91M
    if (n_top_px > 0) {
1171
1.88M
      memcpy(above_row, above_ref, n_top_px);
1172
1.88M
      i = n_top_px;
1173
1.88M
      if (n_topright_px > 0) {
1174
191k
        assert(n_top_px == txwpx);
1175
191k
        memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1176
191k
        i += n_topright_px;
1177
191k
      }
1178
1.88M
      if (i < num_top_pixels_needed)
1179
178k
        memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1180
1.88M
    } else if (n_left_px > 0) {
1181
26.5k
      memset(above_row, left_ref[0], num_top_pixels_needed);
1182
26.5k
    }
1183
1.91M
  }
1184
1185
2.68M
  if (need_above_left) {
1186
2.68M
    if (n_top_px > 0 && n_left_px > 0) {
1187
2.57M
      above_row[-1] = above_ref[-1];
1188
2.57M
    } else if (n_top_px > 0) {
1189
56.5k
      above_row[-1] = above_ref[0];
1190
56.5k
    } else if (n_left_px > 0) {
1191
47.7k
      above_row[-1] = left_ref[0];
1192
47.7k
    } else {
1193
1.91k
      above_row[-1] = 128;
1194
1.91k
    }
1195
2.68M
    left_col[-1] = above_row[-1];
1196
2.68M
  }
1197
1198
2.67M
  if (use_filter_intra) {
1199
522k
    av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1200
522k
                               filter_intra_mode);
1201
522k
    return;
1202
522k
  }
1203
1204
2.15M
  assert(is_dr_mode);
1205
2.15M
  int upsample_above = 0;
1206
2.15M
  int upsample_left = 0;
1207
2.15M
  if (!disable_edge_filter) {
1208
1.86M
    const int need_right = p_angle < 90;
1209
1.86M
    const int need_bottom = p_angle > 180;
1210
1.86M
    if (p_angle != 90 && p_angle != 180) {
1211
1.01M
      assert(need_above_left);
1212
1.01M
      const int ab_le = 1;
1213
1.01M
      if (need_above && need_left && (txwpx + txhpx >= 24)) {
1214
170k
        filter_intra_edge_corner(above_row, left_col);
1215
170k
      }
1216
1.01M
      if (need_above && n_top_px > 0) {
1217
741k
        const int strength = intra_edge_filter_strength(
1218
741k
            txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1219
741k
        const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1220
741k
        av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1221
741k
      }
1222
1.01M
      if (need_left && n_left_px > 0) {
1223
737k
        const int strength = intra_edge_filter_strength(
1224
737k
            txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1225
737k
        const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1226
737k
        av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1227
737k
      }
1228
1.01M
    }
1229
1.86M
    upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1230
1.86M
                                                 intra_edge_filter_type);
1231
1.86M
    if (need_above && upsample_above) {
1232
174k
      const int n_px = txwpx + (need_right ? txhpx : 0);
1233
174k
      av1_upsample_intra_edge(above_row, n_px);
1234
174k
    }
1235
1.86M
    upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1236
1.86M
                                                intra_edge_filter_type);
1237
1.86M
    if (need_left && upsample_left) {
1238
208k
      const int n_px = txhpx + (need_bottom ? txwpx : 0);
1239
208k
      av1_upsample_intra_edge(left_col, n_px);
1240
208k
    }
1241
1.86M
  }
1242
2.15M
  dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1243
2.15M
               upsample_left, p_angle);
1244
2.15M
}
1245
1246
// This function generates the pred data of a given block for non-directional
1247
// intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H, SMOOTH_V and PAETH).
1248
static void build_non_directional_intra_predictors(
1249
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1250
11.6M
    PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px) {
1251
11.6M
  const uint8_t *above_ref = ref - ref_stride;
1252
11.6M
  const uint8_t *left_ref = ref - 1;
1253
11.6M
  const int txwpx = tx_size_wide[tx_size];
1254
11.6M
  const int txhpx = tx_size_high[tx_size];
1255
11.6M
  const int need_left = extend_modes[mode] & NEED_LEFT;
1256
11.6M
  const int need_above = extend_modes[mode] & NEED_ABOVE;
1257
11.6M
  const int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1258
11.6M
  int i = 0;
1259
11.6M
  assert(n_top_px >= 0);
1260
11.6M
  assert(n_left_px >= 0);
1261
11.6M
  assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1262
11.6M
         mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1263
1264
11.6M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1265
0
    int val = 0;
1266
0
    if (need_left) {
1267
0
      val = (n_top_px > 0) ? above_ref[0] : 129;
1268
0
    } else {
1269
0
      val = (n_left_px > 0) ? left_ref[0] : 127;
1270
0
    }
1271
0
    for (i = 0; i < txhpx; ++i) {
1272
0
      memset(dst, val, txwpx);
1273
0
      dst += dst_stride;
1274
0
    }
1275
0
    return;
1276
0
  }
1277
1278
11.6M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1279
11.6M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1280
11.6M
  uint8_t *const above_row = above_data + 16;
1281
11.6M
  uint8_t *const left_col = left_data + 16;
1282
1283
11.6M
  if (need_left) {
1284
11.6M
    memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1285
11.6M
    if (n_left_px > 0) {
1286
98.4M
      for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1287
11.0M
      if (i < txhpx) memset(&left_col[i], left_col[i - 1], txhpx - i);
1288
11.0M
    } else if (n_top_px > 0) {
1289
634k
      memset(left_col, above_ref[0], txhpx);
1290
634k
    }
1291
11.6M
  }
1292
1293
11.6M
  if (need_above) {
1294
11.6M
    memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1295
11.6M
    if (n_top_px > 0) {
1296
11.2M
      memcpy(above_row, above_ref, n_top_px);
1297
11.2M
      i = n_top_px;
1298
11.2M
      if (i < txwpx) memset(&above_row[i], above_row[i - 1], txwpx - i);
1299
11.2M
    } else if (n_left_px > 0) {
1300
356k
      memset(above_row, left_ref[0], txwpx);
1301
356k
    }
1302
11.6M
  }
1303
1304
11.6M
  if (need_above_left) {
1305
5.48M
    if (n_top_px > 0 && n_left_px > 0) {
1306
5.16M
      above_row[-1] = above_ref[-1];
1307
5.16M
    } else if (n_top_px > 0) {
1308
188k
      above_row[-1] = above_ref[0];
1309
188k
    } else if (n_left_px > 0) {
1310
123k
      above_row[-1] = left_ref[0];
1311
123k
    } else {
1312
2.50k
      above_row[-1] = 128;
1313
2.50k
    }
1314
5.48M
    left_col[-1] = above_row[-1];
1315
5.48M
  }
1316
1317
11.6M
  if (mode == DC_PRED) {
1318
4.87M
    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1319
4.87M
                                                  left_col);
1320
6.80M
  } else {
1321
6.80M
    pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1322
6.80M
  }
1323
11.6M
}
1324
1325
#if CONFIG_AV1_HIGHBITDEPTH
1326
673k
void av1_highbd_filter_intra_edge_c(uint16_t *p, int sz, int strength) {
1327
673k
  if (!strength) return;
1328
1329
379k
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1330
379k
                                                         { 0, 5, 6, 5, 0 },
1331
379k
                                                         { 2, 4, 4, 4, 2 } };
1332
379k
  const int filt = strength - 1;
1333
379k
  uint16_t edge[129];
1334
1335
379k
  memcpy(edge, p, sz * sizeof(*p));
1336
7.68M
  for (int i = 1; i < sz; i++) {
1337
7.30M
    int s = 0;
1338
43.8M
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1339
36.5M
      int k = i - 2 + j;
1340
36.5M
      k = (k < 0) ? 0 : k;
1341
36.5M
      k = (k > sz - 1) ? sz - 1 : k;
1342
36.5M
      s += edge[k] * kernel[filt][j];
1343
36.5M
    }
1344
7.30M
    s = (s + 8) >> 4;
1345
7.30M
    p[i] = s;
1346
7.30M
  }
1347
379k
}
1348
1349
static void highbd_filter_intra_edge_corner(uint16_t *p_above,
1350
75.0k
                                            uint16_t *p_left) {
1351
75.0k
  const int kernel[3] = { 5, 6, 5 };
1352
1353
75.0k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1354
75.0k
          (p_above[0] * kernel[2]);
1355
75.0k
  s = (s + 8) >> 4;
1356
75.0k
  p_above[-1] = s;
1357
75.0k
  p_left[-1] = s;
1358
75.0k
}
1359
1360
245k
void av1_highbd_upsample_intra_edge_c(uint16_t *p, int sz, int bd) {
1361
  // interpolate half-sample positions
1362
245k
  assert(sz <= MAX_UPSAMPLE_SZ);
1363
1364
245k
  uint16_t in[MAX_UPSAMPLE_SZ + 3];
1365
  // copy p[-1..(sz-1)] and extend first and last samples
1366
245k
  in[0] = p[-1];
1367
245k
  in[1] = p[-1];
1368
2.14M
  for (int i = 0; i < sz; i++) {
1369
1.90M
    in[i + 2] = p[i];
1370
1.90M
  }
1371
245k
  in[sz + 2] = p[sz - 1];
1372
1373
  // interpolate half-sample edge positions
1374
245k
  p[-2] = in[0];
1375
2.14M
  for (int i = 0; i < sz; i++) {
1376
1.90M
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1377
1.90M
    s = (s + 8) >> 4;
1378
1.90M
    s = clip_pixel_highbd(s, bd);
1379
1.90M
    p[2 * i - 1] = s;
1380
1.90M
    p[2 * i] = in[i + 2];
1381
1.90M
  }
1382
245k
}
1383
1384
static void highbd_build_directional_and_filter_intra_predictors(
1385
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1386
    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1387
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1388
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1389
1.76M
    int bit_depth) {
1390
1.76M
  int i;
1391
1.76M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1392
1.76M
  const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1393
1.76M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1394
1.76M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1395
1.76M
  uint16_t *const above_row = above_data + 16;
1396
1.76M
  uint16_t *const left_col = left_data + 16;
1397
1.76M
  const int txwpx = tx_size_wide[tx_size];
1398
1.76M
  const int txhpx = tx_size_high[tx_size];
1399
1.76M
  int need_left = extend_modes[mode] & NEED_LEFT;
1400
1.76M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1401
1.76M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1402
1.76M
  const uint16_t *above_ref = ref - ref_stride;
1403
1.76M
  const uint16_t *left_ref = ref - 1;
1404
1.76M
  const int is_dr_mode = av1_is_directional_mode(mode);
1405
1.76M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1406
1.76M
  assert(use_filter_intra || is_dr_mode);
1407
1.76M
  const int base = 128 << (bit_depth - 8);
1408
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1409
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1410
  // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1411
  // seen to be the potential reason for this issue.
1412
1.76M
  aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1413
1.76M
  aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1414
1415
  // The default values if ref pixels are not available:
1416
  // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1417
  // base+1   A      B  ..     Y      Z
1418
  // base+1   C      D  ..     W      X
1419
  // base+1   E      F  ..     U      V
1420
  // base+1   G      H  ..     S      T      T      T      T      T
1421
1422
1.76M
  if (is_dr_mode) {
1423
1.61M
    if (p_angle <= 90)
1424
445k
      need_above = 1, need_left = 0, need_above_left = 1;
1425
1.17M
    else if (p_angle < 180)
1426
571k
      need_above = 1, need_left = 1, need_above_left = 1;
1427
603k
    else
1428
603k
      need_above = 0, need_left = 1, need_above_left = 1;
1429
1.61M
  }
1430
1.76M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1431
1432
1.76M
  assert(n_top_px >= 0);
1433
1.76M
  assert(n_topright_px >= -1);
1434
1.76M
  assert(n_left_px >= 0);
1435
1.76M
  assert(n_bottomleft_px >= -1);
1436
1437
1.76M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1438
58.6k
    int val;
1439
58.6k
    if (need_left) {
1440
36.7k
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1441
36.7k
    } else {
1442
21.8k
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1443
21.8k
    }
1444
1.53M
    for (i = 0; i < txhpx; ++i) {
1445
1.47M
      aom_memset16(dst, val, txwpx);
1446
1.47M
      dst += dst_stride;
1447
1.47M
    }
1448
58.6k
    return;
1449
58.6k
  }
1450
1451
  // NEED_LEFT
1452
1.70M
  if (need_left) {
1453
1.28M
    const int num_left_pixels_needed =
1454
1.28M
        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1455
1.28M
    i = 0;
1456
1.28M
    if (n_left_px > 0) {
1457
14.5M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1458
1.24M
      if (n_bottomleft_px > 0) {
1459
106k
        assert(i == txhpx);
1460
1.09M
        for (; i < txhpx + n_bottomleft_px; i++)
1461
990k
          left_col[i] = left_ref[i * ref_stride];
1462
106k
      }
1463
1.24M
      if (i < num_left_pixels_needed)
1464
256k
        aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1465
1.24M
    } else if (n_top_px > 0) {
1466
30.5k
      aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1467
30.5k
    }
1468
1.28M
  }
1469
1470
  // NEED_ABOVE
1471
1.70M
  if (need_above) {
1472
1.13M
    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1473
1.13M
    if (n_top_px > 0) {
1474
1.10M
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1475
1.10M
      i = n_top_px;
1476
1.10M
      if (n_topright_px > 0) {
1477
175k
        assert(n_top_px == txwpx);
1478
175k
        memcpy(above_row + txwpx, above_ref + txwpx,
1479
175k
               n_topright_px * sizeof(above_ref[0]));
1480
175k
        i += n_topright_px;
1481
175k
      }
1482
1.10M
      if (i < num_top_pixels_needed)
1483
156k
        aom_memset16(&above_row[i], above_row[i - 1],
1484
156k
                     num_top_pixels_needed - i);
1485
1.10M
    } else if (n_left_px > 0) {
1486
28.7k
      aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1487
28.7k
    }
1488
1.13M
  }
1489
1490
1.70M
  if (need_above_left) {
1491
1.70M
    if (n_top_px > 0 && n_left_px > 0) {
1492
1.59M
      above_row[-1] = above_ref[-1];
1493
1.59M
    } else if (n_top_px > 0) {
1494
55.1k
      above_row[-1] = above_ref[0];
1495
56.9k
    } else if (n_left_px > 0) {
1496
52.8k
      above_row[-1] = left_ref[0];
1497
52.8k
    } else {
1498
4.10k
      above_row[-1] = base;
1499
4.10k
    }
1500
1.70M
    left_col[-1] = above_row[-1];
1501
1.70M
  }
1502
1503
1.70M
  if (use_filter_intra) {
1504
144k
    highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1505
144k
                                  filter_intra_mode, bit_depth);
1506
144k
    return;
1507
144k
  }
1508
1509
1.56M
  assert(is_dr_mode);
1510
1.56M
  int upsample_above = 0;
1511
1.56M
  int upsample_left = 0;
1512
1.56M
  if (!disable_edge_filter) {
1513
628k
    const int need_right = p_angle < 90;
1514
628k
    const int need_bottom = p_angle > 180;
1515
628k
    if (p_angle != 90 && p_angle != 180) {
1516
478k
      assert(need_above_left);
1517
478k
      const int ab_le = 1;
1518
478k
      if (need_above && need_left && (txwpx + txhpx >= 24)) {
1519
75.0k
        highbd_filter_intra_edge_corner(above_row, left_col);
1520
75.0k
      }
1521
478k
      if (need_above && n_top_px > 0) {
1522
336k
        const int strength = intra_edge_filter_strength(
1523
336k
            txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1524
336k
        const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1525
336k
        av1_highbd_filter_intra_edge(above_row - ab_le, n_px, strength);
1526
336k
      }
1527
478k
      if (need_left && n_left_px > 0) {
1528
336k
        const int strength = intra_edge_filter_strength(
1529
336k
            txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1530
336k
        const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1531
336k
        av1_highbd_filter_intra_edge(left_col - ab_le, n_px, strength);
1532
336k
      }
1533
478k
    }
1534
628k
    upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1535
628k
                                                 intra_edge_filter_type);
1536
628k
    if (need_above && upsample_above) {
1537
105k
      const int n_px = txwpx + (need_right ? txhpx : 0);
1538
105k
      av1_highbd_upsample_intra_edge(above_row, n_px, bit_depth);
1539
105k
    }
1540
628k
    upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1541
628k
                                                intra_edge_filter_type);
1542
628k
    if (need_left && upsample_left) {
1543
139k
      const int n_px = txhpx + (need_bottom ? txwpx : 0);
1544
139k
      av1_highbd_upsample_intra_edge(left_col, n_px, bit_depth);
1545
139k
    }
1546
628k
  }
1547
1.56M
  highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1548
1.56M
                      upsample_above, upsample_left, p_angle, bit_depth);
1549
1.56M
}
1550
1551
// For HBD encode/decode, this function generates the pred data of a given
1552
// block for non-directional intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H,
1553
// SMOOTH_V and PAETH).
1554
static void highbd_build_non_directional_intra_predictors(
1555
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1556
    PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px,
1557
6.70M
    int bit_depth) {
1558
6.70M
  int i = 0;
1559
6.70M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1560
6.70M
  const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1561
6.70M
  const int txwpx = tx_size_wide[tx_size];
1562
6.70M
  const int txhpx = tx_size_high[tx_size];
1563
6.70M
  int need_left = extend_modes[mode] & NEED_LEFT;
1564
6.70M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1565
6.70M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1566
6.70M
  const uint16_t *above_ref = ref - ref_stride;
1567
6.70M
  const uint16_t *left_ref = ref - 1;
1568
6.70M
  const int base = 128 << (bit_depth - 8);
1569
1570
6.70M
  assert(n_top_px >= 0);
1571
6.70M
  assert(n_left_px >= 0);
1572
6.70M
  assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1573
6.70M
         mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1574
1575
6.70M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1576
0
    int val = 0;
1577
0
    if (need_left) {
1578
0
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1579
0
    } else {
1580
0
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1581
0
    }
1582
0
    for (i = 0; i < txhpx; ++i) {
1583
0
      aom_memset16(dst, val, txwpx);
1584
0
      dst += dst_stride;
1585
0
    }
1586
0
    return;
1587
0
  }
1588
1589
6.70M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1590
6.70M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1591
6.70M
  uint16_t *const above_row = above_data + 16;
1592
6.70M
  uint16_t *const left_col = left_data + 16;
1593
1594
6.70M
  if (need_left) {
1595
6.70M
    aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1596
6.70M
    if (n_left_px > 0) {
1597
70.6M
      for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1598
6.11M
      if (i < txhpx) aom_memset16(&left_col[i], left_col[i - 1], txhpx - i);
1599
6.11M
    } else if (n_top_px > 0) {
1600
548k
      aom_memset16(left_col, above_ref[0], txhpx);
1601
548k
    }
1602
6.70M
  }
1603
1604
6.70M
  if (need_above) {
1605
6.70M
    aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1606
6.70M
    if (n_top_px > 0) {
1607
6.41M
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1608
6.41M
      i = n_top_px;
1609
6.41M
      if (i < txwpx) aom_memset16(&above_row[i], above_row[i - 1], (txwpx - i));
1610
6.41M
    } else if (n_left_px > 0) {
1611
247k
      aom_memset16(above_row, left_ref[0], txwpx);
1612
247k
    }
1613
6.70M
  }
1614
1615
6.70M
  if (need_above_left) {
1616
1.07M
    if (n_top_px > 0 && n_left_px > 0) {
1617
884k
      above_row[-1] = above_ref[-1];
1618
884k
    } else if (n_top_px > 0) {
1619
111k
      above_row[-1] = above_ref[0];
1620
111k
    } else if (n_left_px > 0) {
1621
78.3k
      above_row[-1] = left_ref[0];
1622
78.3k
    } else {
1623
2.73k
      above_row[-1] = base;
1624
2.73k
    }
1625
1.07M
    left_col[-1] = above_row[-1];
1626
1.07M
  }
1627
1628
6.70M
  if (mode == DC_PRED) {
1629
4.57M
    dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1630
4.57M
        dst, dst_stride, above_row, left_col, bit_depth);
1631
4.57M
  } else {
1632
2.13M
    pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1633
2.13M
  }
1634
6.70M
}
1635
#endif  // CONFIG_AV1_HIGHBITDEPTH
1636
1637
static inline BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1638
498k
                                            int subsampling_y) {
1639
498k
  assert(subsampling_x >= 0 && subsampling_x < 2);
1640
498k
  assert(subsampling_y >= 0 && subsampling_y < 2);
1641
498k
  BLOCK_SIZE bs = bsize;
1642
498k
  switch (bsize) {
1643
7.73k
    case BLOCK_4X4:
1644
7.73k
      if (subsampling_x == 1 && subsampling_y == 1)
1645
7.49k
        bs = BLOCK_8X8;
1646
242
      else if (subsampling_x == 1)
1647
242
        bs = BLOCK_8X4;
1648
0
      else if (subsampling_y == 1)
1649
0
        bs = BLOCK_4X8;
1650
7.73k
      break;
1651
15.0k
    case BLOCK_4X8:
1652
15.0k
      if (subsampling_x == 1 && subsampling_y == 1)
1653
15.0k
        bs = BLOCK_8X8;
1654
0
      else if (subsampling_x == 1)
1655
0
        bs = BLOCK_8X8;
1656
0
      else if (subsampling_y == 1)
1657
0
        bs = BLOCK_4X8;
1658
15.0k
      break;
1659
20.0k
    case BLOCK_8X4:
1660
20.0k
      if (subsampling_x == 1 && subsampling_y == 1)
1661
19.9k
        bs = BLOCK_8X8;
1662
96
      else if (subsampling_x == 1)
1663
96
        bs = BLOCK_8X4;
1664
0
      else if (subsampling_y == 1)
1665
0
        bs = BLOCK_8X8;
1666
20.0k
      break;
1667
10.8k
    case BLOCK_4X16:
1668
10.8k
      if (subsampling_x == 1 && subsampling_y == 1)
1669
10.8k
        bs = BLOCK_8X16;
1670
0
      else if (subsampling_x == 1)
1671
0
        bs = BLOCK_8X16;
1672
0
      else if (subsampling_y == 1)
1673
0
        bs = BLOCK_4X16;
1674
10.8k
      break;
1675
27.2k
    case BLOCK_16X4:
1676
27.2k
      if (subsampling_x == 1 && subsampling_y == 1)
1677
27.0k
        bs = BLOCK_16X8;
1678
160
      else if (subsampling_x == 1)
1679
160
        bs = BLOCK_16X4;
1680
0
      else if (subsampling_y == 1)
1681
0
        bs = BLOCK_16X8;
1682
27.2k
      break;
1683
417k
    default: break;
1684
498k
  }
1685
498k
  return bs;
1686
498k
}
1687
1688
void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1689
                             int enable_intra_edge_filter, int wpx, int hpx,
1690
                             TX_SIZE tx_size, PREDICTION_MODE mode,
1691
                             int angle_delta, int use_palette,
1692
                             FILTER_INTRA_MODE filter_intra_mode,
1693
                             const uint8_t *ref, int ref_stride, uint8_t *dst,
1694
                             int dst_stride, int col_off, int row_off,
1695
25.9M
                             int plane) {
1696
25.9M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1697
25.9M
  const int txwpx = tx_size_wide[tx_size];
1698
25.9M
  const int txhpx = tx_size_high[tx_size];
1699
25.9M
  const int x = col_off << MI_SIZE_LOG2;
1700
25.9M
  const int y = row_off << MI_SIZE_LOG2;
1701
25.9M
  const int is_hbd = is_cur_buf_hbd(xd);
1702
1703
25.9M
  assert(mode < INTRA_MODES);
1704
1705
25.9M
  if (use_palette) {
1706
3.11M
    int r, c;
1707
3.11M
    const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1708
3.11M
                               xd->color_index_map_offset[plane != 0];
1709
3.11M
    const uint16_t *const palette =
1710
3.11M
        mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1711
3.11M
    if (is_hbd) {
1712
396k
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1713
5.35M
      for (r = 0; r < txhpx; ++r) {
1714
79.7M
        for (c = 0; c < txwpx; ++c) {
1715
74.8M
          dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1716
74.8M
        }
1717
4.95M
      }
1718
2.72M
    } else {
1719
17.0M
      for (r = 0; r < txhpx; ++r) {
1720
129M
        for (c = 0; c < txwpx; ++c) {
1721
115M
          dst[r * dst_stride + c] =
1722
115M
              (uint8_t)palette[map[(r + y) * wpx + c + x]];
1723
115M
        }
1724
14.3M
      }
1725
2.72M
    }
1726
3.11M
    return;
1727
3.11M
  }
1728
1729
22.8M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
1730
22.8M
  const int ss_x = pd->subsampling_x;
1731
22.8M
  const int ss_y = pd->subsampling_y;
1732
22.8M
  const int have_top =
1733
22.8M
      row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1734
22.8M
  const int have_left =
1735
22.8M
      col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1736
1737
  // Distance between the right edge of this prediction block to
1738
  // the frame right edge
1739
22.8M
  const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1740
  // Distance between the bottom edge of this prediction block to
1741
  // the frame bottom edge
1742
22.8M
  const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1743
22.8M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1744
22.8M
  const int is_dr_mode = av1_is_directional_mode(mode);
1745
1746
  // The computations in this function, as well as in build_intra_predictors(),
1747
  // are generalized for all intra modes. Some of these operations are not
1748
  // required since non-directional intra modes (i.e., DC, SMOOTH, SMOOTH_H,
1749
  // SMOOTH_V, and PAETH) specifically require left and top neighbors. Hence, a
1750
  // separate function build_non_directional_intra_predictors() is introduced
1751
  // for these modes to avoid redundant computations while generating pred data.
1752
1753
22.8M
  const int n_top_px = have_top ? AOMMIN(txwpx, xr + txwpx) : 0;
1754
22.8M
  const int n_left_px = have_left ? AOMMIN(txhpx, yd + txhpx) : 0;
1755
22.8M
  if (!use_filter_intra && !is_dr_mode) {
1756
18.3M
#if CONFIG_AV1_HIGHBITDEPTH
1757
18.3M
    if (is_hbd) {
1758
6.70M
      highbd_build_non_directional_intra_predictors(
1759
6.70M
          ref, ref_stride, dst, dst_stride, mode, tx_size, n_top_px, n_left_px,
1760
6.70M
          xd->bd);
1761
6.70M
      return;
1762
6.70M
    }
1763
11.6M
#endif  // CONFIG_AV1_HIGHBITDEPTH
1764
11.6M
    build_non_directional_intra_predictors(ref, ref_stride, dst, dst_stride,
1765
11.6M
                                           mode, tx_size, n_top_px, n_left_px);
1766
11.6M
    return;
1767
18.3M
  }
1768
1769
4.49M
  const int txw = tx_size_wide_unit[tx_size];
1770
4.49M
  const int txh = tx_size_high_unit[tx_size];
1771
4.49M
  const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1772
4.49M
  const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1773
4.49M
  const int right_available =
1774
4.49M
      mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1775
4.49M
  const int bottom_available =
1776
4.49M
      (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1777
1778
4.49M
  const PARTITION_TYPE partition = mbmi->partition;
1779
1780
4.49M
  BLOCK_SIZE bsize = mbmi->bsize;
1781
  // force 4x4 chroma component block size.
1782
4.49M
  if (ss_x || ss_y) {
1783
498k
    bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1784
498k
  }
1785
1786
4.49M
  int p_angle = 0;
1787
4.49M
  int need_top_right = extend_modes[mode] & NEED_ABOVERIGHT;
1788
4.49M
  int need_bottom_left = extend_modes[mode] & NEED_BOTTOMLEFT;
1789
1790
4.49M
  if (use_filter_intra) {
1791
666k
    need_top_right = 0;
1792
666k
    need_bottom_left = 0;
1793
666k
  }
1794
4.49M
  if (is_dr_mode) {
1795
3.83M
    p_angle = mode_to_angle_map[mode] + angle_delta;
1796
3.83M
    need_top_right = p_angle < 90;
1797
3.83M
    need_bottom_left = p_angle > 180;
1798
3.83M
  }
1799
1800
  // Possible states for have_top_right(TR) and have_bottom_left(BL)
1801
  // -1 : TR and BL are not needed
1802
  //  0 : TR and BL are needed but not available
1803
  // > 0 : TR and BL are needed and pixels are available
1804
4.49M
  const int have_top_right =
1805
4.49M
      need_top_right ? has_top_right(sb_size, bsize, mi_row, mi_col, have_top,
1806
621k
                                     right_available, partition, tx_size,
1807
621k
                                     row_off, col_off, ss_x, ss_y)
1808
4.49M
                     : -1;
1809
4.49M
  const int have_bottom_left =
1810
4.49M
      need_bottom_left ? has_bottom_left(sb_size, bsize, mi_row, mi_col,
1811
675k
                                         bottom_available, have_left, partition,
1812
675k
                                         tx_size, row_off, col_off, ss_x, ss_y)
1813
4.49M
                       : -1;
1814
1815
4.49M
  const int disable_edge_filter = !enable_intra_edge_filter;
1816
4.49M
  const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1817
4.49M
  const int n_topright_px =
1818
4.49M
      have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right;
1819
4.49M
  const int n_bottomleft_px =
1820
4.49M
      have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left;
1821
4.49M
#if CONFIG_AV1_HIGHBITDEPTH
1822
4.49M
  if (is_hbd) {
1823
1.76M
    highbd_build_directional_and_filter_intra_predictors(
1824
1.76M
        ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1825
1.76M
        tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1826
1.76M
        n_bottomleft_px, intra_edge_filter_type, xd->bd);
1827
1.76M
    return;
1828
1.76M
  }
1829
2.73M
#endif
1830
2.73M
  build_directional_and_filter_intra_predictors(
1831
2.73M
      ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1832
2.73M
      tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1833
2.73M
      n_bottomleft_px, intra_edge_filter_type);
1834
2.73M
}
1835
1836
void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1837
                                    int plane, int blk_col, int blk_row,
1838
25.9M
                                    TX_SIZE tx_size) {
1839
25.9M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1840
25.9M
  struct macroblockd_plane *const pd = &xd->plane[plane];
1841
25.9M
  const int dst_stride = pd->dst.stride;
1842
25.9M
  uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1843
25.9M
  const PREDICTION_MODE mode =
1844
25.9M
      (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1845
25.9M
  const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1846
25.9M
  const FILTER_INTRA_MODE filter_intra_mode =
1847
25.9M
      (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1848
25.9M
          ? mbmi->filter_intra_mode_info.filter_intra_mode
1849
25.9M
          : FILTER_INTRA_MODES;
1850
25.9M
  const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1851
25.9M
  const SequenceHeader *seq_params = cm->seq_params;
1852
1853
25.9M
#if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
1854
25.9M
  if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1855
#if CONFIG_DEBUG
1856
    assert(is_cfl_allowed(xd));
1857
    const BLOCK_SIZE plane_bsize =
1858
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1859
    (void)plane_bsize;
1860
    assert(plane_bsize < BLOCK_SIZES_ALL);
1861
    if (!xd->lossless[mbmi->segment_id]) {
1862
      assert(blk_col == 0);
1863
      assert(blk_row == 0);
1864
      assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1865
      assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1866
    }
1867
#endif
1868
2.32M
    CFL_CTX *const cfl = &xd->cfl;
1869
2.32M
    CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1870
2.32M
    if (!cfl->dc_pred_is_cached[pred_plane]) {
1871
2.32M
      av1_predict_intra_block(xd, seq_params->sb_size,
1872
2.32M
                              seq_params->enable_intra_edge_filter, pd->width,
1873
2.32M
                              pd->height, tx_size, mode, angle_delta,
1874
2.32M
                              use_palette, filter_intra_mode, dst, dst_stride,
1875
2.32M
                              dst, dst_stride, blk_col, blk_row, plane);
1876
2.32M
      if (cfl->use_dc_pred_cache) {
1877
0
        cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1878
0
        cfl->dc_pred_is_cached[pred_plane] = true;
1879
0
      }
1880
2.32M
    } else {
1881
3
      cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1882
3
    }
1883
2.32M
    av1_cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1884
2.32M
    return;
1885
2.32M
  }
1886
23.6M
#endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
1887
23.6M
  av1_predict_intra_block(
1888
23.6M
      xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1889
23.6M
      pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1890
23.6M
      dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1891
23.6M
}
1892
1893
28.2k
void av1_init_intra_predictors(void) {
1894
28.2k
  aom_once(init_intra_predictors_internal);
1895
28.2k
}