Coverage Report

Created: 2026-06-14 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/common/reconintra.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <math.h>
14
15
#include "config/aom_config.h"
16
#include "config/aom_dsp_rtcd.h"
17
#include "config/av1_rtcd.h"
18
19
#include "aom_dsp/aom_dsp_common.h"
20
#include "aom_mem/aom_mem.h"
21
#include "aom_ports/aom_once.h"
22
#include "aom_ports/mem.h"
23
#include "av1/common/av1_common_int.h"
24
#include "av1/common/cfl.h"
25
#include "av1/common/reconintra.h"
26
27
enum {
28
  NEED_LEFT = 1 << 1,
29
  NEED_ABOVE = 1 << 2,
30
  NEED_ABOVERIGHT = 1 << 3,
31
  NEED_ABOVELEFT = 1 << 4,
32
  NEED_BOTTOMLEFT = 1 << 5,
33
};
34
35
#define INTRA_EDGE_FILT 3
36
151M
#define INTRA_EDGE_TAPS 5
37
#define MAX_UPSAMPLE_SZ 16
38
40.3M
#define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
39
40
static const uint8_t extend_modes[INTRA_MODES] = {
41
  NEED_ABOVE | NEED_LEFT,                   // DC
42
  NEED_ABOVE,                               // V
43
  NEED_LEFT,                                // H
44
  NEED_ABOVE | NEED_ABOVERIGHT,             // D45
45
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
46
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
47
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
48
  NEED_LEFT | NEED_BOTTOMLEFT,              // D203
49
  NEED_ABOVE | NEED_ABOVERIGHT,             // D67
50
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH
51
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
52
  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
53
  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
54
};
55
56
// Tables to store if the top-right reference pixels are available. The flags
57
// are represented with bits, packed into 8-bit integers. E.g., for the 32x32
58
// blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
59
// order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
60
// i.e. (table[10 / 8] >> (10 % 8)) & 1.
61
//       . . . .
62
//       . . . .
63
//       . . o .
64
//       . . . .
65
static uint8_t has_tr_4x4[128] = {
66
  255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70
  255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72
  255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73
  127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
74
};
75
static uint8_t has_tr_4x8[64] = {
76
  255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
77
  119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
78
  127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
79
  119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
80
  119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
81
};
82
static uint8_t has_tr_8x4[64] = {
83
  255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85
  255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86
  127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
87
};
88
static uint8_t has_tr_8x8[32] = {
89
  255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90
  255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
91
};
92
static uint8_t has_tr_8x16[16] = {
93
  255, 255, 119, 119, 127, 127, 119, 119,
94
  255, 127, 119, 119, 127, 127, 119, 119,
95
};
96
static uint8_t has_tr_16x8[16] = {
97
  255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
98
};
99
static uint8_t has_tr_16x16[8] = {
100
  255, 85, 119, 85, 127, 85, 119, 85,
101
};
102
static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
103
static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
104
static uint8_t has_tr_32x32[2] = { 95, 87 };
105
static uint8_t has_tr_32x64[1] = { 127 };
106
static uint8_t has_tr_64x32[1] = { 19 };
107
static uint8_t has_tr_64x64[1] = { 7 };
108
static uint8_t has_tr_64x128[1] = { 3 };
109
static uint8_t has_tr_128x64[1] = { 1 };
110
static uint8_t has_tr_128x128[1] = { 1 };
111
static uint8_t has_tr_4x16[32] = {
112
  255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
113
  127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
114
  127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
115
};
116
static uint8_t has_tr_16x4[32] = {
117
  255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118
  127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
119
};
120
static uint8_t has_tr_8x32[8] = {
121
  255, 255, 127, 127, 255, 127, 127, 127,
122
};
123
static uint8_t has_tr_32x8[8] = {
124
  15, 0, 5, 0, 7, 0, 5, 0,
125
};
126
static uint8_t has_tr_16x64[2] = { 255, 127 };
127
static uint8_t has_tr_64x16[2] = { 3, 1 };
128
129
static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
130
  // 4X4
131
  has_tr_4x4,
132
  // 4X8,       8X4,            8X8
133
  has_tr_4x8, has_tr_8x4, has_tr_8x8,
134
  // 8X16,      16X8,           16X16
135
  has_tr_8x16, has_tr_16x8, has_tr_16x16,
136
  // 16X32,     32X16,          32X32
137
  has_tr_16x32, has_tr_32x16, has_tr_32x32,
138
  // 32X64,     64X32,          64X64
139
  has_tr_32x64, has_tr_64x32, has_tr_64x64,
140
  // 64x128,    128x64,         128x128
141
  has_tr_64x128, has_tr_128x64, has_tr_128x128,
142
  // 4x16,      16x4,            8x32
143
  has_tr_4x16, has_tr_16x4, has_tr_8x32,
144
  // 32x8,      16x64,           64x16
145
  has_tr_32x8, has_tr_16x64, has_tr_64x16
146
};
147
148
static uint8_t has_tr_vert_8x8[32] = {
149
  255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150
  255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
151
};
152
static uint8_t has_tr_vert_16x16[8] = {
153
  255, 0, 119, 0, 127, 0, 119, 0,
154
};
155
static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
156
static uint8_t has_tr_vert_64x64[1] = { 3 };
157
158
// The _vert_* tables are like the ordinary tables above, but describe the
159
// order we visit square blocks when doing a PARTITION_VERT_A or
160
// PARTITION_VERT_B. This is the same order as normal except for on the last
161
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
162
// as a pair of squares, which means that these tables work correctly for both
163
// mixed vertical partition types.
164
//
165
// There are tables for each of the square sizes. Vertical rectangles (like
166
// BLOCK_16X32) use their respective "non-vert" table
167
static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
168
  // 4X4
169
  NULL,
170
  // 4X8,      8X4,         8X8
171
  has_tr_4x8, NULL, has_tr_vert_8x8,
172
  // 8X16,     16X8,        16X16
173
  has_tr_8x16, NULL, has_tr_vert_16x16,
174
  // 16X32,    32X16,       32X32
175
  has_tr_16x32, NULL, has_tr_vert_32x32,
176
  // 32X64,    64X32,       64X64
177
  has_tr_32x64, NULL, has_tr_vert_64x64,
178
  // 64x128,   128x64,      128x128
179
  has_tr_64x128, NULL, has_tr_128x128
180
};
181
182
static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
183
257k
                                       BLOCK_SIZE bsize) {
184
257k
  const uint8_t *ret = NULL;
185
  // If this is a mixed vertical partition, look up bsize in orders_vert.
186
257k
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
187
21.3k
    assert(bsize < BLOCK_SIZES);
188
21.3k
    ret = has_tr_vert_tables[bsize];
189
236k
  } else {
190
236k
    ret = has_tr_tables[bsize];
191
236k
  }
192
257k
  assert(ret);
193
257k
  return ret;
194
257k
}
195
196
static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
197
                         int mi_col, int top_available, int right_available,
198
                         PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
199
612k
                         int col_off, int ss_x, int ss_y) {
200
612k
  if (!top_available || !right_available) return 0;
201
202
561k
  const int bw_unit = mi_size_wide[bsize];
203
561k
  const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
204
561k
  const int top_right_count_unit = tx_size_wide_unit[txsz];
205
206
561k
  if (row_off > 0) {  // Just need to check if enough pixels on the right.
207
175k
    if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
208
      // Special case: For 128x128 blocks, the transform unit whose
209
      // top-right corner is at the center of the block does in fact have
210
      // pixels available at its top-right corner.
211
72.7k
      if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
212
14.5k
          col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
213
4.94k
        return 1;
214
4.94k
      }
215
67.8k
      const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
216
67.8k
      const int col_off_64 = col_off % plane_bw_unit_64;
217
67.8k
      return col_off_64 + top_right_count_unit < plane_bw_unit_64;
218
72.7k
    }
219
102k
    return col_off + top_right_count_unit < plane_bw_unit;
220
385k
  } else {
221
    // All top-right pixels are in the block above, which is already available.
222
385k
    if (col_off + top_right_count_unit < plane_bw_unit) return 1;
223
224
344k
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
225
344k
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
226
344k
    const int sb_mi_size = mi_size_high[sb_size];
227
344k
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
228
344k
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
229
230
    // Top row of superblock: so top-right pixels are in the top and/or
231
    // top-right superblocks, both of which are already available.
232
344k
    if (blk_row_in_sb == 0) return 1;
233
234
    // Rightmost column of superblock (and not the top row): so top-right pixels
235
    // fall in the right superblock, which is not available yet.
236
299k
    if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
237
41.3k
      return 0;
238
41.3k
    }
239
240
    // General case (neither top row nor rightmost column): check if the
241
    // top-right block is coded before the current block.
242
257k
    const int this_blk_index =
243
257k
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
244
257k
        blk_col_in_sb + 0;
245
257k
    const int idx1 = this_blk_index / 8;
246
257k
    const int idx2 = this_blk_index % 8;
247
257k
    const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
248
257k
    return (has_tr_table[idx1] >> idx2) & 1;
249
299k
  }
250
561k
}
251
252
// Similar to the has_tr_* tables, but store if the bottom-left reference
253
// pixels are available.
254
static uint8_t has_bl_4x4[128] = {
255
  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
256
  85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
257
  17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
258
  85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
259
  0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
260
  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
261
  85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
262
};
263
static uint8_t has_bl_4x8[64] = {
264
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
265
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
266
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
267
  16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
268
};
269
static uint8_t has_bl_8x4[64] = {
270
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
271
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
272
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
273
  254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
274
};
275
static uint8_t has_bl_8x8[32] = {
276
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277
  84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
278
};
279
static uint8_t has_bl_8x16[16] = {
280
  16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
281
};
282
static uint8_t has_bl_16x8[16] = {
283
  254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
284
};
285
static uint8_t has_bl_16x16[8] = {
286
  84, 16, 84, 0, 84, 16, 84, 0,
287
};
288
static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
289
static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
290
static uint8_t has_bl_32x32[2] = { 4, 4 };
291
static uint8_t has_bl_32x64[1] = { 0 };
292
static uint8_t has_bl_64x32[1] = { 34 };
293
static uint8_t has_bl_64x64[1] = { 0 };
294
static uint8_t has_bl_64x128[1] = { 0 };
295
static uint8_t has_bl_128x64[1] = { 0 };
296
static uint8_t has_bl_128x128[1] = { 0 };
297
static uint8_t has_bl_4x16[32] = {
298
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299
  0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
300
};
301
static uint8_t has_bl_16x4[32] = {
302
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303
  254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
304
};
305
static uint8_t has_bl_8x32[8] = {
306
  0, 1, 0, 0, 0, 1, 0, 0,
307
};
308
static uint8_t has_bl_32x8[8] = {
309
  238, 78, 238, 14, 238, 78, 238, 14,
310
};
311
static uint8_t has_bl_16x64[2] = { 0, 0 };
312
static uint8_t has_bl_64x16[2] = { 42, 42 };
313
314
static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
315
  // 4X4
316
  has_bl_4x4,
317
  // 4X8,         8X4,         8X8
318
  has_bl_4x8, has_bl_8x4, has_bl_8x8,
319
  // 8X16,        16X8,        16X16
320
  has_bl_8x16, has_bl_16x8, has_bl_16x16,
321
  // 16X32,       32X16,       32X32
322
  has_bl_16x32, has_bl_32x16, has_bl_32x32,
323
  // 32X64,       64X32,       64X64
324
  has_bl_32x64, has_bl_64x32, has_bl_64x64,
325
  // 64x128,      128x64,      128x128
326
  has_bl_64x128, has_bl_128x64, has_bl_128x128,
327
  // 4x16,        16x4,        8x32
328
  has_bl_4x16, has_bl_16x4, has_bl_8x32,
329
  // 32x8,        16x64,       64x16
330
  has_bl_32x8, has_bl_16x64, has_bl_64x16
331
};
332
333
static uint8_t has_bl_vert_8x8[32] = {
334
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335
  254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
336
};
337
static uint8_t has_bl_vert_16x16[8] = {
338
  254, 16, 254, 0, 254, 16, 254, 0,
339
};
340
static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
341
static uint8_t has_bl_vert_64x64[1] = { 2 };
342
343
// The _vert_* tables are like the ordinary tables above, but describe the
344
// order we visit square blocks when doing a PARTITION_VERT_A or
345
// PARTITION_VERT_B. This is the same order as normal except for on the last
346
// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
347
// as a pair of squares, which means that these tables work correctly for both
348
// mixed vertical partition types.
349
//
350
// There are tables for each of the square sizes. Vertical rectangles (like
351
// BLOCK_16X32) use their respective "non-vert" table
352
static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
353
  // 4X4
354
  NULL,
355
  // 4X8,     8X4,         8X8
356
  has_bl_4x8, NULL, has_bl_vert_8x8,
357
  // 8X16,    16X8,        16X16
358
  has_bl_8x16, NULL, has_bl_vert_16x16,
359
  // 16X32,   32X16,       32X32
360
  has_bl_16x32, NULL, has_bl_vert_32x32,
361
  // 32X64,   64X32,       64X64
362
  has_bl_32x64, NULL, has_bl_vert_64x64,
363
  // 64x128,  128x64,      128x128
364
  has_bl_64x128, NULL, has_bl_128x128
365
};
366
367
static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
368
288k
                                       BLOCK_SIZE bsize) {
369
288k
  const uint8_t *ret = NULL;
370
  // If this is a mixed vertical partition, look up bsize in orders_vert.
371
288k
  if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
372
23.6k
    assert(bsize < BLOCK_SIZES);
373
23.6k
    ret = has_bl_vert_tables[bsize];
374
264k
  } else {
375
264k
    ret = has_bl_tables[bsize];
376
264k
  }
377
288k
  assert(ret);
378
288k
  return ret;
379
288k
}
380
381
static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
382
                           int mi_col, int bottom_available, int left_available,
383
                           PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
384
679k
                           int col_off, int ss_x, int ss_y) {
385
679k
  if (!bottom_available || !left_available) return 0;
386
387
  // Special case for 128x* blocks, when col_off is half the block width.
388
  // This is needed because 128x* superblocks are divided into 64x* blocks in
389
  // raster order
390
635k
  if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
391
89.4k
    const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
392
89.4k
    const int col_off_64 = col_off % plane_bw_unit_64;
393
89.4k
    if (col_off_64 == 0) {
394
      // We are at the left edge of top-right or bottom-right 64x* block.
395
16.8k
      const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
396
16.8k
      const int row_off_64 = row_off % plane_bh_unit_64;
397
16.8k
      const int plane_bh_unit =
398
16.8k
          AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
399
      // Check if all bottom-left pixels are in the left 64x* block (which is
400
      // already coded).
401
16.8k
      return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
402
16.8k
    }
403
89.4k
  }
404
405
618k
  if (col_off > 0) {
406
    // Bottom-left pixels are in the bottom-left block, which is not available.
407
184k
    return 0;
408
434k
  } else {
409
434k
    const int bh_unit = mi_size_high[bsize];
410
434k
    const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
411
434k
    const int bottom_left_count_unit = tx_size_high_unit[txsz];
412
413
    // All bottom-left pixels are in the left block, which is already available.
414
434k
    if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
415
416
395k
    const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
417
395k
    const int bh_in_mi_log2 = mi_size_high_log2[bsize];
418
395k
    const int sb_mi_size = mi_size_high[sb_size];
419
395k
    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
420
395k
    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
421
422
    // Leftmost column of superblock: so bottom-left pixels maybe in the left
423
    // and/or bottom-left superblocks. But only the left superblock is
424
    // available, so check if all required pixels fall in that superblock.
425
395k
    if (blk_col_in_sb == 0) {
426
58.7k
      const int blk_start_row_off =
427
58.7k
          blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
428
58.7k
          ss_y;
429
58.7k
      const int row_off_in_sb = blk_start_row_off + row_off;
430
58.7k
      const int sb_height_unit = sb_mi_size >> ss_y;
431
58.7k
      return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
432
58.7k
    }
433
434
    // Bottom row of superblock (and not the leftmost column): so bottom-left
435
    // pixels fall in the bottom superblock, which is not available yet.
436
337k
    if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
437
438
    // General case (neither leftmost column nor bottom row): check if the
439
    // bottom-left block is coded before the current block.
440
288k
    const int this_blk_index =
441
288k
        ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
442
288k
        blk_col_in_sb + 0;
443
288k
    const int idx1 = this_blk_index / 8;
444
288k
    const int idx2 = this_blk_index % 8;
445
288k
    const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
446
288k
    return (has_bl_table[idx1] >> idx2) & 1;
447
337k
  }
448
618k
}
449
450
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
451
                              const uint8_t *above, const uint8_t *left);
452
453
static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
454
static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
455
456
#if CONFIG_AV1_HIGHBITDEPTH
457
typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
458
                                   const uint16_t *above, const uint16_t *left,
459
                                   int bd);
460
static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
461
static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
462
#endif
463
464
2
static void init_intra_predictors_internal(void) {
465
2
  assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
466
467
#if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
468
#define INIT_RECTANGULAR(p, type)             \
469
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
470
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
471
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
472
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
473
  p[TX_16X32] = aom_##type##_predictor_16x32; \
474
  p[TX_32X16] = aom_##type##_predictor_32x16; \
475
  p[TX_32X64] = aom_##type##_predictor_32x64; \
476
  p[TX_64X32] = aom_##type##_predictor_64x32;
477
#else
478
2
#define INIT_RECTANGULAR(p, type)             \
479
40
  p[TX_4X8] = aom_##type##_predictor_4x8;     \
480
40
  p[TX_8X4] = aom_##type##_predictor_8x4;     \
481
40
  p[TX_8X16] = aom_##type##_predictor_8x16;   \
482
40
  p[TX_16X8] = aom_##type##_predictor_16x8;   \
483
40
  p[TX_16X32] = aom_##type##_predictor_16x32; \
484
40
  p[TX_32X16] = aom_##type##_predictor_32x16; \
485
40
  p[TX_32X64] = aom_##type##_predictor_32x64; \
486
40
  p[TX_64X32] = aom_##type##_predictor_64x32; \
487
40
  p[TX_4X16] = aom_##type##_predictor_4x16;   \
488
40
  p[TX_16X4] = aom_##type##_predictor_16x4;   \
489
40
  p[TX_8X32] = aom_##type##_predictor_8x32;   \
490
40
  p[TX_32X8] = aom_##type##_predictor_32x8;   \
491
40
  p[TX_16X64] = aom_##type##_predictor_16x64; \
492
40
  p[TX_64X16] = aom_##type##_predictor_64x16;
493
2
#endif  // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
494
495
2
#define INIT_NO_4X4(p, type)                  \
496
40
  p[TX_8X8] = aom_##type##_predictor_8x8;     \
497
40
  p[TX_16X16] = aom_##type##_predictor_16x16; \
498
40
  p[TX_32X32] = aom_##type##_predictor_32x32; \
499
40
  p[TX_64X64] = aom_##type##_predictor_64x64; \
500
40
  INIT_RECTANGULAR(p, type)
501
502
2
#define INIT_ALL_SIZES(p, type)           \
503
40
  p[TX_4X4] = aom_##type##_predictor_4x4; \
504
40
  INIT_NO_4X4(p, type)
505
506
2
  INIT_ALL_SIZES(pred[V_PRED], v)
507
2
  INIT_ALL_SIZES(pred[H_PRED], h)
508
2
  INIT_ALL_SIZES(pred[PAETH_PRED], paeth)
509
2
  INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth)
510
2
  INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v)
511
2
  INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h)
512
2
  INIT_ALL_SIZES(dc_pred[0][0], dc_128)
513
2
  INIT_ALL_SIZES(dc_pred[0][1], dc_top)
514
2
  INIT_ALL_SIZES(dc_pred[1][0], dc_left)
515
2
  INIT_ALL_SIZES(dc_pred[1][1], dc)
516
2
#if CONFIG_AV1_HIGHBITDEPTH
517
2
  INIT_ALL_SIZES(pred_high[V_PRED], highbd_v)
518
2
  INIT_ALL_SIZES(pred_high[H_PRED], highbd_h)
519
2
  INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth)
520
2
  INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth)
521
2
  INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v)
522
2
  INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h)
523
2
  INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128)
524
2
  INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top)
525
2
  INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left)
526
2
  INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc)
527
2
#endif
528
2
#undef intra_pred_allsizes
529
2
}
530
531
// Directional prediction, zone 1: 0 < angle < 90
532
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
533
                            const uint8_t *above, const uint8_t *left,
534
333k
                            int upsample_above, int dx, int dy) {
535
333k
  int r, c, x, base, shift, val;
536
537
333k
  (void)left;
538
333k
  (void)dy;
539
333k
  assert(dy == 1);
540
333k
  assert(dx > 0);
541
542
333k
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
543
333k
  const int frac_bits = 6 - upsample_above;
544
333k
  const int base_inc = 1 << upsample_above;
545
333k
  x = dx;
546
3.88M
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
547
3.55M
    base = x >> frac_bits;
548
3.55M
    shift = ((x << upsample_above) & 0x3F) >> 1;
549
550
3.55M
    if (base >= max_base_x) {
551
5.66k
      for (int i = r; i < bh; ++i) {
552
3.71k
        memset(dst, above[max_base_x], bw * sizeof(dst[0]));
553
3.71k
        dst += stride;
554
3.71k
      }
555
1.95k
      return;
556
1.95k
    }
557
558
73.6M
    for (c = 0; c < bw; ++c, base += base_inc) {
559
70.1M
      if (base < max_base_x) {
560
69.6M
        val = above[base] * (32 - shift) + above[base + 1] * shift;
561
69.6M
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
562
69.6M
      } else {
563
456k
        dst[c] = above[max_base_x];
564
456k
      }
565
70.1M
    }
566
3.54M
  }
567
333k
}
568
569
// Directional prediction, zone 2: 90 < angle < 180
570
void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
571
                            const uint8_t *above, const uint8_t *left,
572
                            int upsample_above, int upsample_left, int dx,
573
627k
                            int dy) {
574
627k
  assert(dx > 0);
575
627k
  assert(dy > 0);
576
577
627k
  const int min_base_x = -(1 << upsample_above);
578
627k
  const int min_base_y = -(1 << upsample_left);
579
627k
  (void)min_base_y;
580
627k
  const int frac_bits_x = 6 - upsample_above;
581
627k
  const int frac_bits_y = 6 - upsample_left;
582
583
7.23M
  for (int r = 0; r < bh; ++r) {
584
131M
    for (int c = 0; c < bw; ++c) {
585
125M
      int val;
586
125M
      int y = r + 1;
587
125M
      int x = (c << 6) - y * dx;
588
125M
      const int base_x = x >> frac_bits_x;
589
125M
      if (base_x >= min_base_x) {
590
60.5M
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
591
60.5M
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
592
60.5M
        val = ROUND_POWER_OF_TWO(val, 5);
593
64.7M
      } else {
594
64.7M
        x = c + 1;
595
64.7M
        y = (r << 6) - x * dy;
596
64.7M
        const int base_y = y >> frac_bits_y;
597
64.7M
        assert(base_y >= min_base_y);
598
64.7M
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
599
64.7M
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
600
64.7M
        val = ROUND_POWER_OF_TWO(val, 5);
601
64.7M
      }
602
125M
      dst[c] = val;
603
125M
    }
604
6.60M
    dst += stride;
605
6.60M
  }
606
627k
}
607
608
// Directional prediction, zone 3: 180 < angle < 270
609
void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
610
                            const uint8_t *above, const uint8_t *left,
611
360k
                            int upsample_left, int dx, int dy) {
612
360k
  int r, c, y, base, shift, val;
613
614
360k
  (void)above;
615
360k
  (void)dx;
616
617
360k
  assert(dx == 1);
618
360k
  assert(dy > 0);
619
620
360k
  const int max_base_y = (bw + bh - 1) << upsample_left;
621
360k
  const int frac_bits = 6 - upsample_left;
622
360k
  const int base_inc = 1 << upsample_left;
623
360k
  y = dy;
624
4.46M
  for (c = 0; c < bw; ++c, y += dy) {
625
4.09M
    base = y >> frac_bits;
626
4.09M
    shift = ((y << upsample_left) & 0x3F) >> 1;
627
628
80.5M
    for (r = 0; r < bh; ++r, base += base_inc) {
629
76.4M
      if (base < max_base_y) {
630
76.4M
        val = left[base] * (32 - shift) + left[base + 1] * shift;
631
76.4M
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
632
76.4M
      } else {
633
42
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
634
42
        break;
635
42
      }
636
76.4M
    }
637
4.09M
  }
638
360k
}
639
640
static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
641
                         const uint8_t *above, const uint8_t *left,
642
2.16M
                         int upsample_above, int upsample_left, int angle) {
643
2.16M
  const int dx = av1_get_dx(angle);
644
2.16M
  const int dy = av1_get_dy(angle);
645
2.16M
  const int bw = tx_size_wide[tx_size];
646
2.16M
  const int bh = tx_size_high[tx_size];
647
2.16M
  assert(angle > 0 && angle < 270);
648
649
2.16M
  if (angle > 0 && angle < 90) {
650
333k
    av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
651
333k
                         dy);
652
1.83M
  } else if (angle > 90 && angle < 180) {
653
627k
    av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
654
627k
                         upsample_left, dx, dy);
655
1.20M
  } else if (angle > 180 && angle < 270) {
656
360k
    av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
657
360k
                         dy);
658
843k
  } else if (angle == 90) {
659
420k
    pred[V_PRED][tx_size](dst, stride, above, left);
660
422k
  } else if (angle == 180) {
661
422k
    pred[H_PRED][tx_size](dst, stride, above, left);
662
422k
  }
663
2.16M
}
664
665
#if CONFIG_AV1_HIGHBITDEPTH
666
// Directional prediction, zone 1: 0 < angle < 90
667
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
668
                                   int bh, const uint16_t *above,
669
                                   const uint16_t *left, int upsample_above,
670
257k
                                   int dx, int dy, int bd) {
671
257k
  int r, c, x, base, shift, val;
672
673
257k
  (void)left;
674
257k
  (void)dy;
675
257k
  (void)bd;
676
257k
  assert(dy == 1);
677
257k
  assert(dx > 0);
678
679
257k
  const int max_base_x = ((bw + bh) - 1) << upsample_above;
680
257k
  const int frac_bits = 6 - upsample_above;
681
257k
  const int base_inc = 1 << upsample_above;
682
257k
  x = dx;
683
3.67M
  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
684
3.42M
    base = x >> frac_bits;
685
3.42M
    shift = ((x << upsample_above) & 0x3F) >> 1;
686
687
3.42M
    if (base >= max_base_x) {
688
6.66k
      for (int i = r; i < bh; ++i) {
689
4.45k
        aom_memset16(dst, above[max_base_x], bw);
690
4.45k
        dst += stride;
691
4.45k
      }
692
2.20k
      return;
693
2.20k
    }
694
695
78.4M
    for (c = 0; c < bw; ++c, base += base_inc) {
696
75.0M
      if (base < max_base_x) {
697
74.5M
        val = above[base] * (32 - shift) + above[base + 1] * shift;
698
74.5M
        dst[c] = ROUND_POWER_OF_TWO(val, 5);
699
74.5M
      } else {
700
455k
        dst[c] = above[max_base_x];
701
455k
      }
702
75.0M
    }
703
3.41M
  }
704
257k
}
705
706
// Directional prediction, zone 2: 90 < angle < 180
707
void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
708
                                   int bh, const uint16_t *above,
709
                                   const uint16_t *left, int upsample_above,
710
511k
                                   int upsample_left, int dx, int dy, int bd) {
711
511k
  (void)bd;
712
511k
  assert(dx > 0);
713
511k
  assert(dy > 0);
714
715
511k
  const int min_base_x = -(1 << upsample_above);
716
511k
  const int min_base_y = -(1 << upsample_left);
717
511k
  (void)min_base_y;
718
511k
  const int frac_bits_x = 6 - upsample_above;
719
511k
  const int frac_bits_y = 6 - upsample_left;
720
721
7.39M
  for (int r = 0; r < bh; ++r) {
722
157M
    for (int c = 0; c < bw; ++c) {
723
150M
      int val;
724
150M
      int y = r + 1;
725
150M
      int x = (c << 6) - y * dx;
726
150M
      const int base_x = x >> frac_bits_x;
727
150M
      if (base_x >= min_base_x) {
728
70.5M
        const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
729
70.5M
        val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
730
70.5M
        val = ROUND_POWER_OF_TWO(val, 5);
731
80.2M
      } else {
732
80.2M
        x = c + 1;
733
80.2M
        y = (r << 6) - x * dy;
734
80.2M
        const int base_y = y >> frac_bits_y;
735
80.2M
        assert(base_y >= min_base_y);
736
80.2M
        const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
737
80.2M
        val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
738
80.2M
        val = ROUND_POWER_OF_TWO(val, 5);
739
80.2M
      }
740
150M
      dst[c] = val;
741
150M
    }
742
6.88M
    dst += stride;
743
6.88M
  }
744
511k
}
745
746
// Directional prediction, zone 3: 180 < angle < 270
747
void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
748
                                   int bh, const uint16_t *above,
749
                                   const uint16_t *left, int upsample_left,
750
286k
                                   int dx, int dy, int bd) {
751
286k
  int r, c, y, base, shift, val;
752
753
286k
  (void)above;
754
286k
  (void)dx;
755
286k
  (void)bd;
756
286k
  assert(dx == 1);
757
286k
  assert(dy > 0);
758
759
286k
  const int max_base_y = (bw + bh - 1) << upsample_left;
760
286k
  const int frac_bits = 6 - upsample_left;
761
286k
  const int base_inc = 1 << upsample_left;
762
286k
  y = dy;
763
4.15M
  for (c = 0; c < bw; ++c, y += dy) {
764
3.86M
    base = y >> frac_bits;
765
3.86M
    shift = ((y << upsample_left) & 0x3F) >> 1;
766
767
81.1M
    for (r = 0; r < bh; ++r, base += base_inc) {
768
77.2M
      if (base < max_base_y) {
769
77.2M
        val = left[base] * (32 - shift) + left[base + 1] * shift;
770
77.2M
        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
771
77.2M
      } else {
772
44
        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
773
44
        break;
774
44
      }
775
77.2M
    }
776
3.86M
  }
777
286k
}
778
779
static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
780
                                TX_SIZE tx_size, const uint16_t *above,
781
                                const uint16_t *left, int upsample_above,
782
1.45M
                                int upsample_left, int angle, int bd) {
783
1.45M
  const int dx = av1_get_dx(angle);
784
1.45M
  const int dy = av1_get_dy(angle);
785
1.45M
  const int bw = tx_size_wide[tx_size];
786
1.45M
  const int bh = tx_size_high[tx_size];
787
1.45M
  assert(angle > 0 && angle < 270);
788
789
1.45M
  if (angle > 0 && angle < 90) {
790
257k
    av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
791
257k
                                upsample_above, dx, dy, bd);
792
1.19M
  } else if (angle > 90 && angle < 180) {
793
511k
    av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
794
511k
                                upsample_above, upsample_left, dx, dy, bd);
795
685k
  } else if (angle > 180 && angle < 270) {
796
286k
    av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
797
286k
                                dx, dy, bd);
798
399k
  } else if (angle == 90) {
799
152k
    pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
800
246k
  } else if (angle == 180) {
801
246k
    pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
802
246k
  }
803
1.45M
}
804
#endif  // CONFIG_AV1_HIGHBITDEPTH
805
806
DECLARE_ALIGNED(16, const int8_t,
807
                av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
808
  {
809
      { -6, 10, 0, 0, 0, 12, 0, 0 },
810
      { -5, 2, 10, 0, 0, 9, 0, 0 },
811
      { -3, 1, 1, 10, 0, 7, 0, 0 },
812
      { -3, 1, 1, 2, 10, 5, 0, 0 },
813
      { -4, 6, 0, 0, 0, 2, 12, 0 },
814
      { -3, 2, 6, 0, 0, 2, 9, 0 },
815
      { -3, 2, 2, 6, 0, 2, 7, 0 },
816
      { -3, 1, 2, 2, 6, 3, 5, 0 },
817
  },
818
  {
819
      { -10, 16, 0, 0, 0, 10, 0, 0 },
820
      { -6, 0, 16, 0, 0, 6, 0, 0 },
821
      { -4, 0, 0, 16, 0, 4, 0, 0 },
822
      { -2, 0, 0, 0, 16, 2, 0, 0 },
823
      { -10, 16, 0, 0, 0, 0, 10, 0 },
824
      { -6, 0, 16, 0, 0, 0, 6, 0 },
825
      { -4, 0, 0, 16, 0, 0, 4, 0 },
826
      { -2, 0, 0, 0, 16, 0, 2, 0 },
827
  },
828
  {
829
      { -8, 8, 0, 0, 0, 16, 0, 0 },
830
      { -8, 0, 8, 0, 0, 16, 0, 0 },
831
      { -8, 0, 0, 8, 0, 16, 0, 0 },
832
      { -8, 0, 0, 0, 8, 16, 0, 0 },
833
      { -4, 4, 0, 0, 0, 0, 16, 0 },
834
      { -4, 0, 4, 0, 0, 0, 16, 0 },
835
      { -4, 0, 0, 4, 0, 0, 16, 0 },
836
      { -4, 0, 0, 0, 4, 0, 16, 0 },
837
  },
838
  {
839
      { -2, 8, 0, 0, 0, 10, 0, 0 },
840
      { -1, 3, 8, 0, 0, 6, 0, 0 },
841
      { -1, 2, 3, 8, 0, 4, 0, 0 },
842
      { 0, 1, 2, 3, 8, 2, 0, 0 },
843
      { -1, 4, 0, 0, 0, 3, 10, 0 },
844
      { -1, 3, 4, 0, 0, 4, 6, 0 },
845
      { -1, 2, 3, 4, 0, 4, 4, 0 },
846
      { -1, 2, 2, 3, 4, 3, 3, 0 },
847
  },
848
  {
849
      { -12, 14, 0, 0, 0, 14, 0, 0 },
850
      { -10, 0, 14, 0, 0, 12, 0, 0 },
851
      { -9, 0, 0, 14, 0, 11, 0, 0 },
852
      { -8, 0, 0, 0, 14, 10, 0, 0 },
853
      { -10, 12, 0, 0, 0, 0, 14, 0 },
854
      { -9, 1, 12, 0, 0, 0, 12, 0 },
855
      { -8, 0, 0, 12, 0, 1, 11, 0 },
856
      { -7, 0, 0, 1, 12, 1, 9, 0 },
857
  },
858
};
859
860
void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
861
                                  TX_SIZE tx_size, const uint8_t *above,
862
383k
                                  const uint8_t *left, int mode) {
863
383k
  int r, c;
864
383k
  uint8_t buffer[33][33];
865
383k
  const int bw = tx_size_wide[tx_size];
866
383k
  const int bh = tx_size_high[tx_size];
867
868
383k
  assert(bw <= 32 && bh <= 32);
869
870
3.43M
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
871
383k
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
872
873
1.90M
  for (r = 1; r < bh + 1; r += 2)
874
5.59M
    for (c = 1; c < bw + 1; c += 4) {
875
4.07M
      const uint8_t p0 = buffer[r - 1][c - 1];
876
4.07M
      const uint8_t p1 = buffer[r - 1][c];
877
4.07M
      const uint8_t p2 = buffer[r - 1][c + 1];
878
4.07M
      const uint8_t p3 = buffer[r - 1][c + 2];
879
4.07M
      const uint8_t p4 = buffer[r - 1][c + 3];
880
4.07M
      const uint8_t p5 = buffer[r][c - 1];
881
4.07M
      const uint8_t p6 = buffer[r + 1][c - 1];
882
36.6M
      for (int k = 0; k < 8; ++k) {
883
32.5M
        int r_offset = k >> 2;
884
32.5M
        int c_offset = k & 0x03;
885
32.5M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
886
32.5M
                 av1_filter_intra_taps[mode][k][1] * p1 +
887
32.5M
                 av1_filter_intra_taps[mode][k][2] * p2 +
888
32.5M
                 av1_filter_intra_taps[mode][k][3] * p3 +
889
32.5M
                 av1_filter_intra_taps[mode][k][4] * p4 +
890
32.5M
                 av1_filter_intra_taps[mode][k][5] * p5 +
891
32.5M
                 av1_filter_intra_taps[mode][k][6] * p6;
892
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
893
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
894
        // Since Clip1() clips a negative value to 0, it is safe to replace
895
        // Round2Signed() with Round2().
896
32.5M
        buffer[r + r_offset][c + c_offset] =
897
32.5M
            clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
898
32.5M
      }
899
4.07M
    }
900
901
3.43M
  for (r = 0; r < bh; ++r) {
902
3.05M
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
903
3.05M
    dst += stride;
904
3.05M
  }
905
383k
}
906
907
#if CONFIG_AV1_HIGHBITDEPTH
908
static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
909
                                          TX_SIZE tx_size,
910
                                          const uint16_t *above,
911
                                          const uint16_t *left, int mode,
912
152k
                                          int bd) {
913
152k
  int r, c;
914
152k
  uint16_t buffer[33][33];
915
152k
  const int bw = tx_size_wide[tx_size];
916
152k
  const int bh = tx_size_high[tx_size];
917
918
152k
  assert(bw <= 32 && bh <= 32);
919
920
1.49M
  for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
921
152k
  memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
922
923
823k
  for (r = 1; r < bh + 1; r += 2)
924
2.69M
    for (c = 1; c < bw + 1; c += 4) {
925
2.02M
      const uint16_t p0 = buffer[r - 1][c - 1];
926
2.02M
      const uint16_t p1 = buffer[r - 1][c];
927
2.02M
      const uint16_t p2 = buffer[r - 1][c + 1];
928
2.02M
      const uint16_t p3 = buffer[r - 1][c + 2];
929
2.02M
      const uint16_t p4 = buffer[r - 1][c + 3];
930
2.02M
      const uint16_t p5 = buffer[r][c - 1];
931
2.02M
      const uint16_t p6 = buffer[r + 1][c - 1];
932
18.2M
      for (int k = 0; k < 8; ++k) {
933
16.1M
        int r_offset = k >> 2;
934
16.1M
        int c_offset = k & 0x03;
935
16.1M
        int pr = av1_filter_intra_taps[mode][k][0] * p0 +
936
16.1M
                 av1_filter_intra_taps[mode][k][1] * p1 +
937
16.1M
                 av1_filter_intra_taps[mode][k][2] * p2 +
938
16.1M
                 av1_filter_intra_taps[mode][k][3] * p3 +
939
16.1M
                 av1_filter_intra_taps[mode][k][4] * p4 +
940
16.1M
                 av1_filter_intra_taps[mode][k][5] * p5 +
941
16.1M
                 av1_filter_intra_taps[mode][k][6] * p6;
942
        // Section 7.11.2.3 specifies the right-hand side of the assignment as
943
        //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
944
        // Since Clip1() clips a negative value to 0, it is safe to replace
945
        // Round2Signed() with Round2().
946
16.1M
        buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
947
16.1M
            ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
948
16.1M
      }
949
2.02M
    }
950
951
1.49M
  for (r = 0; r < bh; ++r) {
952
1.34M
    memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
953
1.34M
    dst += stride;
954
1.34M
  }
955
152k
}
956
#endif  // CONFIG_AV1_HIGHBITDEPTH
957
958
7.27M
static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
959
7.27M
  if (plane == 0) {
960
3.71M
    const PREDICTION_MODE mode = mbmi->mode;
961
3.71M
    return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
962
3.32M
            mode == SMOOTH_H_PRED);
963
3.71M
  } else {
964
    // uv_mode is not set for inter blocks, so need to explicitly
965
    // detect that case.
966
3.55M
    if (is_inter_block(mbmi)) return 0;
967
968
3.51M
    const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
969
3.51M
    return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
970
3.18M
            uv_mode == UV_SMOOTH_H_PRED);
971
3.55M
  }
972
7.27M
}
973
974
4.24M
static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
975
4.24M
  const MB_MODE_INFO *above;
976
4.24M
  const MB_MODE_INFO *left;
977
978
4.24M
  if (plane == 0) {
979
2.17M
    above = xd->above_mbmi;
980
2.17M
    left = xd->left_mbmi;
981
2.17M
  } else {
982
2.06M
    above = xd->chroma_above_mbmi;
983
2.06M
    left = xd->chroma_left_mbmi;
984
2.06M
  }
985
986
4.24M
  return (above && is_smooth(above, plane)) || (left && is_smooth(left, plane));
987
4.24M
}
988
989
2.15M
static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
990
2.15M
  const int d = abs(delta);
991
2.15M
  int strength = 0;
992
993
2.15M
  const int blk_wh = bs0 + bs1;
994
2.15M
  if (type == 0) {
995
1.59M
    if (blk_wh <= 8) {
996
628k
      if (d >= 56) strength = 1;
997
965k
    } else if (blk_wh <= 12) {
998
112k
      if (d >= 40) strength = 1;
999
852k
    } else if (blk_wh <= 16) {
1000
217k
      if (d >= 40) strength = 1;
1001
634k
    } else if (blk_wh <= 24) {
1002
253k
      if (d >= 8) strength = 1;
1003
253k
      if (d >= 16) strength = 2;
1004
253k
      if (d >= 32) strength = 3;
1005
381k
    } else if (blk_wh <= 32) {
1006
120k
      if (d >= 1) strength = 1;
1007
120k
      if (d >= 4) strength = 2;
1008
120k
      if (d >= 32) strength = 3;
1009
261k
    } else {
1010
261k
      if (d >= 1) strength = 3;
1011
261k
    }
1012
1.59M
  } else {
1013
560k
    if (blk_wh <= 8) {
1014
192k
      if (d >= 40) strength = 1;
1015
192k
      if (d >= 64) strength = 2;
1016
367k
    } else if (blk_wh <= 16) {
1017
131k
      if (d >= 20) strength = 1;
1018
131k
      if (d >= 48) strength = 2;
1019
235k
    } else if (blk_wh <= 24) {
1020
106k
      if (d >= 4) strength = 3;
1021
129k
    } else {
1022
129k
      if (d >= 1) strength = 3;
1023
129k
    }
1024
560k
  }
1025
2.15M
  return strength;
1026
2.15M
}
1027
1028
1.41M
void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1029
1.41M
  if (!strength) return;
1030
1031
790k
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1032
790k
                                                         { 0, 5, 6, 5, 0 },
1033
790k
                                                         { 2, 4, 4, 4, 2 } };
1034
790k
  const int filt = strength - 1;
1035
790k
  uint8_t edge[129];
1036
1037
790k
  memcpy(edge, p, sz * sizeof(*p));
1038
14.8M
  for (int i = 1; i < sz; i++) {
1039
14.0M
    int s = 0;
1040
84.2M
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1041
70.2M
      int k = i - 2 + j;
1042
70.2M
      k = (k < 0) ? 0 : k;
1043
70.2M
      k = (k > sz - 1) ? sz - 1 : k;
1044
70.2M
      s += edge[k] * kernel[filt][j];
1045
70.2M
    }
1046
14.0M
    s = (s + 8) >> 4;
1047
14.0M
    p[i] = s;
1048
14.0M
  }
1049
790k
}
1050
1051
132k
static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1052
132k
  const int kernel[3] = { 5, 6, 5 };
1053
1054
132k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1055
132k
          (p_above[0] * kernel[2]);
1056
132k
  s = (s + 8) >> 4;
1057
132k
  p_above[-1] = s;
1058
132k
  p_left[-1] = s;
1059
132k
}
1060
1061
457k
void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1062
  // interpolate half-sample positions
1063
457k
  assert(sz <= MAX_UPSAMPLE_SZ);
1064
1065
457k
  uint8_t in[MAX_UPSAMPLE_SZ + 3];
1066
  // copy p[-1..(sz-1)] and extend first and last samples
1067
457k
  in[0] = p[-1];
1068
457k
  in[1] = p[-1];
1069
3.71M
  for (int i = 0; i < sz; i++) {
1070
3.25M
    in[i + 2] = p[i];
1071
3.25M
  }
1072
457k
  in[sz + 2] = p[sz - 1];
1073
1074
  // interpolate half-sample edge positions
1075
457k
  p[-2] = in[0];
1076
3.71M
  for (int i = 0; i < sz; i++) {
1077
3.25M
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1078
3.25M
    s = clip_pixel((s + 8) >> 4);
1079
3.25M
    p[2 * i - 1] = s;
1080
3.25M
    p[2 * i] = in[i + 2];
1081
3.25M
  }
1082
457k
}
1083
1084
static void build_directional_and_filter_intra_predictors(
1085
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1086
    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1087
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1088
2.59M
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1089
2.59M
  int i;
1090
2.59M
  const uint8_t *above_ref = ref - ref_stride;
1091
2.59M
  const uint8_t *left_ref = ref - 1;
1092
2.59M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1093
2.59M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1094
2.59M
  uint8_t *const above_row = above_data + 16;
1095
2.59M
  uint8_t *const left_col = left_data + 16;
1096
2.59M
  const int txwpx = tx_size_wide[tx_size];
1097
2.59M
  const int txhpx = tx_size_high[tx_size];
1098
2.59M
  int need_left = extend_modes[mode] & NEED_LEFT;
1099
2.59M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1100
2.59M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1101
2.59M
  const int is_dr_mode = av1_is_directional_mode(mode);
1102
2.59M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1103
2.59M
  assert(use_filter_intra || is_dr_mode);
1104
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1105
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1106
  // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1107
  // be the potential reason for this issue.
1108
2.59M
  memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1109
2.59M
  memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1110
1111
  // The default values if ref pixels are not available:
1112
  // 128 127 127 .. 127 127 127 127 127 127
1113
  // 129  A   B  ..  Y   Z
1114
  // 129  C   D  ..  W   X
1115
  // 129  E   F  ..  U   V
1116
  // 129  G   H  ..  S   T   T   T   T   T
1117
  // ..
1118
1119
2.59M
  if (is_dr_mode) {
1120
2.20M
    if (p_angle <= 90)
1121
769k
      need_above = 1, need_left = 0, need_above_left = 1;
1122
1.43M
    else if (p_angle < 180)
1123
627k
      need_above = 1, need_left = 1, need_above_left = 1;
1124
811k
    else
1125
811k
      need_above = 0, need_left = 1, need_above_left = 1;
1126
2.20M
  }
1127
2.59M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1128
1129
2.59M
  assert(n_top_px >= 0);
1130
2.59M
  assert(n_topright_px >= -1);
1131
2.59M
  assert(n_left_px >= 0);
1132
2.59M
  assert(n_bottomleft_px >= -1);
1133
1134
2.59M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1135
44.5k
    int val;
1136
44.5k
    if (need_left) {
1137
29.2k
      val = (n_top_px > 0) ? above_ref[0] : 129;
1138
29.2k
    } else {
1139
15.2k
      val = (n_left_px > 0) ? left_ref[0] : 127;
1140
15.2k
    }
1141
860k
    for (i = 0; i < txhpx; ++i) {
1142
816k
      memset(dst, val, txwpx);
1143
816k
      dst += dst_stride;
1144
816k
    }
1145
44.5k
    return;
1146
44.5k
  }
1147
1148
  // NEED_LEFT
1149
2.54M
  if (need_left) {
1150
1.79M
    const int num_left_pixels_needed =
1151
1.79M
        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1152
1.79M
    i = 0;
1153
1.79M
    if (n_left_px > 0) {
1154
18.2M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1155
1.76M
      if (n_bottomleft_px > 0) {
1156
112k
        assert(i == txhpx);
1157
1.21M
        for (; i < txhpx + n_bottomleft_px; i++)
1158
1.10M
          left_col[i] = left_ref[i * ref_stride];
1159
112k
      }
1160
1.76M
      if (i < num_left_pixels_needed)
1161
295k
        memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1162
1.76M
    } else if (n_top_px > 0) {
1163
25.3k
      memset(left_col, above_ref[0], num_left_pixels_needed);
1164
25.3k
    }
1165
1.79M
  }
1166
1167
  // NEED_ABOVE
1168
2.54M
  if (need_above) {
1169
1.76M
    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1170
1.76M
    if (n_top_px > 0) {
1171
1.74M
      memcpy(above_row, above_ref, n_top_px);
1172
1.74M
      i = n_top_px;
1173
1.74M
      if (n_topright_px > 0) {
1174
222k
        assert(n_top_px == txwpx);
1175
222k
        memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1176
222k
        i += n_topright_px;
1177
222k
      }
1178
1.74M
      if (i < num_top_pixels_needed)
1179
160k
        memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1180
1.74M
    } else if (n_left_px > 0) {
1181
18.7k
      memset(above_row, left_ref[0], num_top_pixels_needed);
1182
18.7k
    }
1183
1.76M
  }
1184
1185
2.54M
  if (need_above_left) {
1186
2.54M
    if (n_top_px > 0 && n_left_px > 0) {
1187
2.46M
      above_row[-1] = above_ref[-1];
1188
2.46M
    } else if (n_top_px > 0) {
1189
46.0k
      above_row[-1] = above_ref[0];
1190
46.0k
    } else if (n_left_px > 0) {
1191
35.6k
      above_row[-1] = left_ref[0];
1192
35.6k
    } else {
1193
1.19k
      above_row[-1] = 128;
1194
1.19k
    }
1195
2.54M
    left_col[-1] = above_row[-1];
1196
2.54M
  }
1197
1198
2.54M
  if (use_filter_intra) {
1199
383k
    av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1200
383k
                               filter_intra_mode);
1201
383k
    return;
1202
383k
  }
1203
1204
2.54M
  assert(is_dr_mode);
1205
2.16M
  int upsample_above = 0;
1206
2.16M
  int upsample_left = 0;
1207
2.16M
  if (!disable_edge_filter) {
1208
1.66M
    const int need_right = p_angle < 90;
1209
1.66M
    const int need_bottom = p_angle > 180;
1210
1.66M
    if (p_angle != 90 && p_angle != 180) {
1211
957k
      assert(need_above_left);
1212
957k
      const int ab_le = 1;
1213
957k
      if (need_above && need_left && (txwpx + txhpx >= 24)) {
1214
132k
        filter_intra_edge_corner(above_row, left_col);
1215
132k
      }
1216
957k
      if (need_above && n_top_px > 0) {
1217
698k
        const int strength = intra_edge_filter_strength(
1218
698k
            txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1219
698k
        const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1220
698k
        av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1221
698k
      }
1222
957k
      if (need_left && n_left_px > 0) {
1223
714k
        const int strength = intra_edge_filter_strength(
1224
714k
            txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1225
714k
        const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1226
714k
        av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1227
714k
      }
1228
957k
    }
1229
1.66M
    upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1230
1.66M
                                                 intra_edge_filter_type);
1231
1.66M
    if (need_above && upsample_above) {
1232
182k
      const int n_px = txwpx + (need_right ? txhpx : 0);
1233
182k
      av1_upsample_intra_edge(above_row, n_px);
1234
182k
    }
1235
1.66M
    upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1236
1.66M
                                                intra_edge_filter_type);
1237
1.66M
    if (need_left && upsample_left) {
1238
274k
      const int n_px = txhpx + (need_bottom ? txwpx : 0);
1239
274k
      av1_upsample_intra_edge(left_col, n_px);
1240
274k
    }
1241
1.66M
  }
1242
2.16M
  dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1243
2.16M
               upsample_left, p_angle);
1244
2.16M
}
1245
1246
// This function generates the pred data of a given block for non-directional
1247
// intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H, SMOOTH_V and PAETH).
1248
static void build_non_directional_intra_predictors(
1249
    const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1250
10.6M
    PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px) {
1251
10.6M
  const uint8_t *above_ref = ref - ref_stride;
1252
10.6M
  const uint8_t *left_ref = ref - 1;
1253
10.6M
  const int txwpx = tx_size_wide[tx_size];
1254
10.6M
  const int txhpx = tx_size_high[tx_size];
1255
10.6M
  const int need_left = extend_modes[mode] & NEED_LEFT;
1256
10.6M
  const int need_above = extend_modes[mode] & NEED_ABOVE;
1257
10.6M
  const int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1258
10.6M
  int i = 0;
1259
10.6M
  assert(n_top_px >= 0);
1260
10.6M
  assert(n_left_px >= 0);
1261
10.6M
  assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1262
10.6M
         mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1263
1264
10.6M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1265
0
    int val = 0;
1266
0
    if (need_left) {
1267
0
      val = (n_top_px > 0) ? above_ref[0] : 129;
1268
0
    } else {
1269
0
      val = (n_left_px > 0) ? left_ref[0] : 127;
1270
0
    }
1271
0
    for (i = 0; i < txhpx; ++i) {
1272
0
      memset(dst, val, txwpx);
1273
0
      dst += dst_stride;
1274
0
    }
1275
0
    return;
1276
0
  }
1277
1278
10.6M
  DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1279
10.6M
  DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1280
10.6M
  uint8_t *const above_row = above_data + 16;
1281
10.6M
  uint8_t *const left_col = left_data + 16;
1282
1283
10.6M
  if (need_left) {
1284
10.6M
    memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1285
10.6M
    if (n_left_px > 0) {
1286
103M
      for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1287
10.1M
      if (i < txhpx) memset(&left_col[i], left_col[i - 1], txhpx - i);
1288
10.1M
    } else if (n_top_px > 0) {
1289
538k
      memset(left_col, above_ref[0], txhpx);
1290
538k
    }
1291
10.6M
  }
1292
1293
10.6M
  if (need_above) {
1294
10.6M
    memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1295
10.6M
    if (n_top_px > 0) {
1296
10.4M
      memcpy(above_row, above_ref, n_top_px);
1297
10.4M
      i = n_top_px;
1298
10.4M
      if (i < txwpx) memset(&above_row[i], above_row[i - 1], txwpx - i);
1299
10.4M
    } else if (n_left_px > 0) {
1300
243k
      memset(above_row, left_ref[0], txwpx);
1301
243k
    }
1302
10.6M
  }
1303
1304
10.6M
  if (need_above_left) {
1305
4.57M
    if (n_top_px > 0 && n_left_px > 0) {
1306
4.35M
      above_row[-1] = above_ref[-1];
1307
4.35M
    } else if (n_top_px > 0) {
1308
165k
      above_row[-1] = above_ref[0];
1309
165k
    } else if (n_left_px > 0) {
1310
51.4k
      above_row[-1] = left_ref[0];
1311
51.4k
    } else {
1312
1.23k
      above_row[-1] = 128;
1313
1.23k
    }
1314
4.57M
    left_col[-1] = above_row[-1];
1315
4.57M
  }
1316
1317
10.6M
  if (mode == DC_PRED) {
1318
4.92M
    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1319
4.92M
                                                  left_col);
1320
5.75M
  } else {
1321
5.75M
    pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1322
5.75M
  }
1323
10.6M
}
1324
1325
#if CONFIG_AV1_HIGHBITDEPTH
1326
742k
void av1_highbd_filter_intra_edge_c(uint16_t *p, int sz, int strength) {
1327
742k
  if (!strength) return;
1328
1329
491k
  const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1330
491k
                                                         { 0, 5, 6, 5, 0 },
1331
491k
                                                         { 2, 4, 4, 4, 2 } };
1332
491k
  const int filt = strength - 1;
1333
491k
  uint16_t edge[129];
1334
1335
491k
  memcpy(edge, p, sz * sizeof(*p));
1336
11.7M
  for (int i = 1; i < sz; i++) {
1337
11.2M
    int s = 0;
1338
67.7M
    for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1339
56.4M
      int k = i - 2 + j;
1340
56.4M
      k = (k < 0) ? 0 : k;
1341
56.4M
      k = (k > sz - 1) ? sz - 1 : k;
1342
56.4M
      s += edge[k] * kernel[filt][j];
1343
56.4M
    }
1344
11.2M
    s = (s + 8) >> 4;
1345
11.2M
    p[i] = s;
1346
11.2M
  }
1347
491k
}
1348
1349
static void highbd_filter_intra_edge_corner(uint16_t *p_above,
1350
106k
                                            uint16_t *p_left) {
1351
106k
  const int kernel[3] = { 5, 6, 5 };
1352
1353
106k
  int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1354
106k
          (p_above[0] * kernel[2]);
1355
106k
  s = (s + 8) >> 4;
1356
106k
  p_above[-1] = s;
1357
106k
  p_left[-1] = s;
1358
106k
}
1359
1360
196k
void av1_highbd_upsample_intra_edge_c(uint16_t *p, int sz, int bd) {
1361
  // interpolate half-sample positions
1362
196k
  assert(sz <= MAX_UPSAMPLE_SZ);
1363
1364
196k
  uint16_t in[MAX_UPSAMPLE_SZ + 3];
1365
  // copy p[-1..(sz-1)] and extend first and last samples
1366
196k
  in[0] = p[-1];
1367
196k
  in[1] = p[-1];
1368
1.80M
  for (int i = 0; i < sz; i++) {
1369
1.61M
    in[i + 2] = p[i];
1370
1.61M
  }
1371
196k
  in[sz + 2] = p[sz - 1];
1372
1373
  // interpolate half-sample edge positions
1374
196k
  p[-2] = in[0];
1375
1.80M
  for (int i = 0; i < sz; i++) {
1376
1.61M
    int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1377
1.61M
    s = (s + 8) >> 4;
1378
1.61M
    s = clip_pixel_highbd(s, bd);
1379
1.61M
    p[2 * i - 1] = s;
1380
1.61M
    p[2 * i] = in[i + 2];
1381
1.61M
  }
1382
196k
}
1383
1384
static void highbd_build_directional_and_filter_intra_predictors(
1385
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1386
    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1387
    TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1388
    int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1389
1.65M
    int bit_depth) {
1390
1.65M
  int i;
1391
1.65M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1392
1.65M
  const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1393
1.65M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1394
1.65M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1395
1.65M
  uint16_t *const above_row = above_data + 16;
1396
1.65M
  uint16_t *const left_col = left_data + 16;
1397
1.65M
  const int txwpx = tx_size_wide[tx_size];
1398
1.65M
  const int txhpx = tx_size_high[tx_size];
1399
1.65M
  int need_left = extend_modes[mode] & NEED_LEFT;
1400
1.65M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1401
1.65M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1402
1.65M
  const uint16_t *above_ref = ref - ref_stride;
1403
1.65M
  const uint16_t *left_ref = ref - 1;
1404
1.65M
  const int is_dr_mode = av1_is_directional_mode(mode);
1405
1.65M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1406
1.65M
  assert(use_filter_intra || is_dr_mode);
1407
1.65M
  const int base = 128 << (bit_depth - 8);
1408
  // The left_data, above_data buffers must be zeroed to fix some intermittent
1409
  // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1410
  // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1411
  // seen to be the potential reason for this issue.
1412
1.65M
  aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1413
1.65M
  aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1414
1415
  // The default values if ref pixels are not available:
1416
  // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1417
  // base+1   A      B  ..     Y      Z
1418
  // base+1   C      D  ..     W      X
1419
  // base+1   E      F  ..     U      V
1420
  // base+1   G      H  ..     S      T      T      T      T      T
1421
1422
1.65M
  if (is_dr_mode) {
1423
1.50M
    if (p_angle <= 90)
1424
428k
      need_above = 1, need_left = 0, need_above_left = 1;
1425
1.07M
    else if (p_angle < 180)
1426
511k
      need_above = 1, need_left = 1, need_above_left = 1;
1427
560k
    else
1428
560k
      need_above = 0, need_left = 1, need_above_left = 1;
1429
1.50M
  }
1430
1.65M
  if (use_filter_intra) need_left = need_above = need_above_left = 1;
1431
1432
1.65M
  assert(n_top_px >= 0);
1433
1.65M
  assert(n_topright_px >= -1);
1434
1.65M
  assert(n_left_px >= 0);
1435
1.65M
  assert(n_bottomleft_px >= -1);
1436
1437
1.65M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1438
45.9k
    int val;
1439
45.9k
    if (need_left) {
1440
27.6k
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1441
27.6k
    } else {
1442
18.2k
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1443
18.2k
    }
1444
1.21M
    for (i = 0; i < txhpx; ++i) {
1445
1.16M
      aom_memset16(dst, val, txwpx);
1446
1.16M
      dst += dst_stride;
1447
1.16M
    }
1448
45.9k
    return;
1449
45.9k
  }
1450
1451
  // NEED_LEFT
1452
1.60M
  if (need_left) {
1453
1.19M
    const int num_left_pixels_needed =
1454
1.19M
        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1455
1.19M
    i = 0;
1456
1.19M
    if (n_left_px > 0) {
1457
15.0M
      for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1458
1.16M
      if (n_bottomleft_px > 0) {
1459
103k
        assert(i == txhpx);
1460
1.18M
        for (; i < txhpx + n_bottomleft_px; i++)
1461
1.07M
          left_col[i] = left_ref[i * ref_stride];
1462
103k
      }
1463
1.16M
      if (i < num_left_pixels_needed)
1464
234k
        aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1465
1.16M
    } else if (n_top_px > 0) {
1466
24.8k
      aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1467
24.8k
    }
1468
1.19M
  }
1469
1470
  // NEED_ABOVE
1471
1.60M
  if (need_above) {
1472
1.07M
    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1473
1.07M
    if (n_top_px > 0) {
1474
1.04M
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1475
1.04M
      i = n_top_px;
1476
1.04M
      if (n_topright_px > 0) {
1477
154k
        assert(n_top_px == txwpx);
1478
154k
        memcpy(above_row + txwpx, above_ref + txwpx,
1479
154k
               n_topright_px * sizeof(above_ref[0]));
1480
154k
        i += n_topright_px;
1481
154k
      }
1482
1.04M
      if (i < num_top_pixels_needed)
1483
153k
        aom_memset16(&above_row[i], above_row[i - 1],
1484
153k
                     num_top_pixels_needed - i);
1485
1.04M
    } else if (n_left_px > 0) {
1486
25.7k
      aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1487
25.7k
    }
1488
1.07M
  }
1489
1490
1.60M
  if (need_above_left) {
1491
1.60M
    if (n_top_px > 0 && n_left_px > 0) {
1492
1.51M
      above_row[-1] = above_ref[-1];
1493
1.51M
    } else if (n_top_px > 0) {
1494
43.9k
      above_row[-1] = above_ref[0];
1495
49.1k
    } else if (n_left_px > 0) {
1496
46.4k
      above_row[-1] = left_ref[0];
1497
46.4k
    } else {
1498
2.78k
      above_row[-1] = base;
1499
2.78k
    }
1500
1.60M
    left_col[-1] = above_row[-1];
1501
1.60M
  }
1502
1503
1.60M
  if (use_filter_intra) {
1504
152k
    highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1505
152k
                                  filter_intra_mode, bit_depth);
1506
152k
    return;
1507
152k
  }
1508
1509
1.60M
  assert(is_dr_mode);
1510
1.45M
  int upsample_above = 0;
1511
1.45M
  int upsample_left = 0;
1512
1.45M
  if (!disable_edge_filter) {
1513
718k
    const int need_right = p_angle < 90;
1514
718k
    const int need_bottom = p_angle > 180;
1515
718k
    if (p_angle != 90 && p_angle != 180) {
1516
520k
      assert(need_above_left);
1517
520k
      const int ab_le = 1;
1518
520k
      if (need_above && need_left && (txwpx + txhpx >= 24)) {
1519
106k
        highbd_filter_intra_edge_corner(above_row, left_col);
1520
106k
      }
1521
520k
      if (need_above && n_top_px > 0) {
1522
355k
        const int strength = intra_edge_filter_strength(
1523
355k
            txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1524
355k
        const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1525
355k
        av1_highbd_filter_intra_edge(above_row - ab_le, n_px, strength);
1526
355k
      }
1527
520k
      if (need_left && n_left_px > 0) {
1528
386k
        const int strength = intra_edge_filter_strength(
1529
386k
            txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1530
386k
        const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1531
386k
        av1_highbd_filter_intra_edge(left_col - ab_le, n_px, strength);
1532
386k
      }
1533
520k
    }
1534
718k
    upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1535
718k
                                                 intra_edge_filter_type);
1536
718k
    if (need_above && upsample_above) {
1537
73.9k
      const int n_px = txwpx + (need_right ? txhpx : 0);
1538
73.9k
      av1_highbd_upsample_intra_edge(above_row, n_px, bit_depth);
1539
73.9k
    }
1540
718k
    upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1541
718k
                                                intra_edge_filter_type);
1542
718k
    if (need_left && upsample_left) {
1543
123k
      const int n_px = txhpx + (need_bottom ? txwpx : 0);
1544
123k
      av1_highbd_upsample_intra_edge(left_col, n_px, bit_depth);
1545
123k
    }
1546
718k
  }
1547
1.45M
  highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1548
1.45M
                      upsample_above, upsample_left, p_angle, bit_depth);
1549
1.45M
}
1550
1551
// For HBD encode/decode, this function generates the pred data of a given
1552
// block for non-directional intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H,
1553
// SMOOTH_V and PAETH).
1554
static void highbd_build_non_directional_intra_predictors(
1555
    const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1556
    PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px,
1557
5.25M
    int bit_depth) {
1558
5.25M
  int i = 0;
1559
5.25M
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1560
5.25M
  const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1561
5.25M
  const int txwpx = tx_size_wide[tx_size];
1562
5.25M
  const int txhpx = tx_size_high[tx_size];
1563
5.25M
  int need_left = extend_modes[mode] & NEED_LEFT;
1564
5.25M
  int need_above = extend_modes[mode] & NEED_ABOVE;
1565
5.25M
  int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1566
5.25M
  const uint16_t *above_ref = ref - ref_stride;
1567
5.25M
  const uint16_t *left_ref = ref - 1;
1568
5.25M
  const int base = 128 << (bit_depth - 8);
1569
1570
5.25M
  assert(n_top_px >= 0);
1571
5.25M
  assert(n_left_px >= 0);
1572
5.25M
  assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1573
5.25M
         mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1574
1575
5.25M
  if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1576
0
    int val = 0;
1577
0
    if (need_left) {
1578
0
      val = (n_top_px > 0) ? above_ref[0] : base + 1;
1579
0
    } else {
1580
0
      val = (n_left_px > 0) ? left_ref[0] : base - 1;
1581
0
    }
1582
0
    for (i = 0; i < txhpx; ++i) {
1583
0
      aom_memset16(dst, val, txwpx);
1584
0
      dst += dst_stride;
1585
0
    }
1586
0
    return;
1587
0
  }
1588
1589
5.25M
  DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1590
5.25M
  DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1591
5.25M
  uint16_t *const above_row = above_data + 16;
1592
5.25M
  uint16_t *const left_col = left_data + 16;
1593
1594
5.25M
  if (need_left) {
1595
5.25M
    aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1596
5.25M
    if (n_left_px > 0) {
1597
69.6M
      for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1598
4.77M
      if (i < txhpx) aom_memset16(&left_col[i], left_col[i - 1], txhpx - i);
1599
4.77M
    } else if (n_top_px > 0) {
1600
441k
      aom_memset16(left_col, above_ref[0], txhpx);
1601
441k
    }
1602
5.25M
  }
1603
1604
5.25M
  if (need_above) {
1605
5.25M
    aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1606
5.25M
    if (n_top_px > 0) {
1607
5.02M
      memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1608
5.02M
      i = n_top_px;
1609
5.02M
      if (i < txwpx) aom_memset16(&above_row[i], above_row[i - 1], (txwpx - i));
1610
5.02M
    } else if (n_left_px > 0) {
1611
193k
      aom_memset16(above_row, left_ref[0], txwpx);
1612
193k
    }
1613
5.25M
  }
1614
1615
5.25M
  if (need_above_left) {
1616
769k
    if (n_top_px > 0 && n_left_px > 0) {
1617
637k
      above_row[-1] = above_ref[-1];
1618
637k
    } else if (n_top_px > 0) {
1619
105k
      above_row[-1] = above_ref[0];
1620
105k
    } else if (n_left_px > 0) {
1621
24.2k
      above_row[-1] = left_ref[0];
1622
24.2k
    } else {
1623
2.31k
      above_row[-1] = base;
1624
2.31k
    }
1625
769k
    left_col[-1] = above_row[-1];
1626
769k
  }
1627
1628
5.25M
  if (mode == DC_PRED) {
1629
3.52M
    dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1630
3.52M
        dst, dst_stride, above_row, left_col, bit_depth);
1631
3.52M
  } else {
1632
1.72M
    pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1633
1.72M
  }
1634
5.25M
}
1635
#endif  // CONFIG_AV1_HIGHBITDEPTH
1636
1637
static inline BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1638
437k
                                            int subsampling_y) {
1639
437k
  assert(subsampling_x >= 0 && subsampling_x < 2);
1640
437k
  assert(subsampling_y >= 0 && subsampling_y < 2);
1641
437k
  BLOCK_SIZE bs = bsize;
1642
437k
  switch (bsize) {
1643
6.60k
    case BLOCK_4X4:
1644
6.60k
      if (subsampling_x == 1 && subsampling_y == 1)
1645
6.47k
        bs = BLOCK_8X8;
1646
122
      else if (subsampling_x == 1)
1647
122
        bs = BLOCK_8X4;
1648
0
      else if (subsampling_y == 1)
1649
0
        bs = BLOCK_4X8;
1650
6.60k
      break;
1651
12.0k
    case BLOCK_4X8:
1652
12.0k
      if (subsampling_x == 1 && subsampling_y == 1)
1653
12.0k
        bs = BLOCK_8X8;
1654
0
      else if (subsampling_x == 1)
1655
0
        bs = BLOCK_8X8;
1656
0
      else if (subsampling_y == 1)
1657
0
        bs = BLOCK_4X8;
1658
12.0k
      break;
1659
20.7k
    case BLOCK_8X4:
1660
20.7k
      if (subsampling_x == 1 && subsampling_y == 1)
1661
20.5k
        bs = BLOCK_8X8;
1662
128
      else if (subsampling_x == 1)
1663
128
        bs = BLOCK_8X4;
1664
0
      else if (subsampling_y == 1)
1665
0
        bs = BLOCK_8X8;
1666
20.7k
      break;
1667
10.8k
    case BLOCK_4X16:
1668
10.8k
      if (subsampling_x == 1 && subsampling_y == 1)
1669
10.8k
        bs = BLOCK_8X16;
1670
0
      else if (subsampling_x == 1)
1671
0
        bs = BLOCK_8X16;
1672
0
      else if (subsampling_y == 1)
1673
0
        bs = BLOCK_4X16;
1674
10.8k
      break;
1675
23.6k
    case BLOCK_16X4:
1676
23.6k
      if (subsampling_x == 1 && subsampling_y == 1)
1677
23.3k
        bs = BLOCK_16X8;
1678
328
      else if (subsampling_x == 1)
1679
328
        bs = BLOCK_16X4;
1680
0
      else if (subsampling_y == 1)
1681
0
        bs = BLOCK_16X8;
1682
23.6k
      break;
1683
363k
    default: break;
1684
437k
  }
1685
437k
  return bs;
1686
437k
}
1687
1688
void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1689
                             int enable_intra_edge_filter, int wpx, int hpx,
1690
                             TX_SIZE tx_size, PREDICTION_MODE mode,
1691
                             int angle_delta, int use_palette,
1692
                             FILTER_INTRA_MODE filter_intra_mode,
1693
                             const uint8_t *ref, int ref_stride, uint8_t *dst,
1694
                             int dst_stride, int col_off, int row_off,
1695
22.4M
                             int plane) {
1696
22.4M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1697
22.4M
  const int txwpx = tx_size_wide[tx_size];
1698
22.4M
  const int txhpx = tx_size_high[tx_size];
1699
22.4M
  const int x = col_off << MI_SIZE_LOG2;
1700
22.4M
  const int y = row_off << MI_SIZE_LOG2;
1701
22.4M
  const int is_hbd = is_cur_buf_hbd(xd);
1702
1703
22.4M
  assert(mode < INTRA_MODES);
1704
1705
22.4M
  if (use_palette) {
1706
2.25M
    int r, c;
1707
2.25M
    const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1708
2.25M
                               xd->color_index_map_offset[plane != 0];
1709
2.25M
    const uint16_t *const palette =
1710
2.25M
        mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1711
2.25M
    if (is_hbd) {
1712
232k
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1713
3.17M
      for (r = 0; r < txhpx; ++r) {
1714
50.7M
        for (c = 0; c < txwpx; ++c) {
1715
47.7M
          dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1716
47.7M
        }
1717
2.93M
      }
1718
2.01M
    } else {
1719
11.9M
      for (r = 0; r < txhpx; ++r) {
1720
81.2M
        for (c = 0; c < txwpx; ++c) {
1721
71.3M
          dst[r * dst_stride + c] =
1722
71.3M
              (uint8_t)palette[map[(r + y) * wpx + c + x]];
1723
71.3M
        }
1724
9.89M
      }
1725
2.01M
    }
1726
2.25M
    return;
1727
2.25M
  }
1728
1729
20.1M
  const struct macroblockd_plane *const pd = &xd->plane[plane];
1730
20.1M
  const int ss_x = pd->subsampling_x;
1731
20.1M
  const int ss_y = pd->subsampling_y;
1732
20.1M
  const int have_top =
1733
20.1M
      row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1734
20.1M
  const int have_left =
1735
20.1M
      col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1736
1737
  // Distance between the right edge of this prediction block to
1738
  // the frame right edge
1739
20.1M
  const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1740
  // Distance between the bottom edge of this prediction block to
1741
  // the frame bottom edge
1742
20.1M
  const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1743
20.1M
  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1744
20.1M
  const int is_dr_mode = av1_is_directional_mode(mode);
1745
1746
  // The computations in this function, as well as in build_intra_predictors(),
1747
  // are generalized for all intra modes. Some of these operations are not
1748
  // required since non-directional intra modes (i.e., DC, SMOOTH, SMOOTH_H,
1749
  // SMOOTH_V, and PAETH) specifically require left and top neighbors. Hence, a
1750
  // separate function build_non_directional_intra_predictors() is introduced
1751
  // for these modes to avoid redundant computations while generating pred data.
1752
1753
20.1M
  const int n_top_px = have_top ? AOMMIN(txwpx, xr + txwpx) : 0;
1754
20.1M
  const int n_left_px = have_left ? AOMMIN(txhpx, yd + txhpx) : 0;
1755
20.1M
  if (!use_filter_intra && !is_dr_mode) {
1756
15.9M
#if CONFIG_AV1_HIGHBITDEPTH
1757
15.9M
    if (is_hbd) {
1758
5.25M
      highbd_build_non_directional_intra_predictors(
1759
5.25M
          ref, ref_stride, dst, dst_stride, mode, tx_size, n_top_px, n_left_px,
1760
5.25M
          xd->bd);
1761
5.25M
      return;
1762
5.25M
    }
1763
10.6M
#endif  // CONFIG_AV1_HIGHBITDEPTH
1764
10.6M
    build_non_directional_intra_predictors(ref, ref_stride, dst, dst_stride,
1765
10.6M
                                           mode, tx_size, n_top_px, n_left_px);
1766
10.6M
    return;
1767
15.9M
  }
1768
1769
4.24M
  const int txw = tx_size_wide_unit[tx_size];
1770
4.24M
  const int txh = tx_size_high_unit[tx_size];
1771
4.24M
  const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1772
4.24M
  const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1773
4.24M
  const int right_available =
1774
4.24M
      mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1775
4.24M
  const int bottom_available =
1776
4.24M
      (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1777
1778
4.24M
  const PARTITION_TYPE partition = mbmi->partition;
1779
1780
4.24M
  BLOCK_SIZE bsize = mbmi->bsize;
1781
  // force 4x4 chroma component block size.
1782
4.24M
  if (ss_x || ss_y) {
1783
437k
    bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1784
437k
  }
1785
1786
4.24M
  int p_angle = 0;
1787
4.24M
  int need_top_right = extend_modes[mode] & NEED_ABOVERIGHT;
1788
4.24M
  int need_bottom_left = extend_modes[mode] & NEED_BOTTOMLEFT;
1789
1790
4.24M
  if (use_filter_intra) {
1791
535k
    need_top_right = 0;
1792
535k
    need_bottom_left = 0;
1793
535k
  }
1794
4.24M
  if (is_dr_mode) {
1795
3.70M
    p_angle = mode_to_angle_map[mode] + angle_delta;
1796
3.70M
    need_top_right = p_angle < 90;
1797
3.70M
    need_bottom_left = p_angle > 180;
1798
3.70M
  }
1799
1800
  // Possible states for have_top_right(TR) and have_bottom_left(BL)
1801
  // -1 : TR and BL are not needed
1802
  //  0 : TR and BL are needed but not available
1803
  // > 0 : TR and BL are needed and pixels are available
1804
4.24M
  const int have_top_right =
1805
4.24M
      need_top_right ? has_top_right(sb_size, bsize, mi_row, mi_col, have_top,
1806
612k
                                     right_available, partition, tx_size,
1807
612k
                                     row_off, col_off, ss_x, ss_y)
1808
4.24M
                     : -1;
1809
4.24M
  const int have_bottom_left =
1810
4.24M
      need_bottom_left ? has_bottom_left(sb_size, bsize, mi_row, mi_col,
1811
679k
                                         bottom_available, have_left, partition,
1812
679k
                                         tx_size, row_off, col_off, ss_x, ss_y)
1813
4.24M
                       : -1;
1814
1815
4.24M
  const int disable_edge_filter = !enable_intra_edge_filter;
1816
4.24M
  const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1817
4.24M
  const int n_topright_px =
1818
4.24M
      have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right;
1819
4.24M
  const int n_bottomleft_px =
1820
4.24M
      have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left;
1821
4.24M
#if CONFIG_AV1_HIGHBITDEPTH
1822
4.24M
  if (is_hbd) {
1823
1.65M
    highbd_build_directional_and_filter_intra_predictors(
1824
1.65M
        ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1825
1.65M
        tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1826
1.65M
        n_bottomleft_px, intra_edge_filter_type, xd->bd);
1827
1.65M
    return;
1828
1.65M
  }
1829
2.59M
#endif
1830
2.59M
  build_directional_and_filter_intra_predictors(
1831
2.59M
      ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1832
2.59M
      tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1833
2.59M
      n_bottomleft_px, intra_edge_filter_type);
1834
2.59M
}
1835
1836
void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1837
                                    int plane, int blk_col, int blk_row,
1838
22.4M
                                    TX_SIZE tx_size) {
1839
22.4M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
1840
22.4M
  struct macroblockd_plane *const pd = &xd->plane[plane];
1841
22.4M
  const int dst_stride = pd->dst.stride;
1842
22.4M
  uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1843
22.4M
  const PREDICTION_MODE mode =
1844
22.4M
      (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1845
22.4M
  const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1846
22.4M
  const FILTER_INTRA_MODE filter_intra_mode =
1847
22.4M
      (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1848
22.4M
          ? mbmi->filter_intra_mode_info.filter_intra_mode
1849
22.4M
          : FILTER_INTRA_MODES;
1850
22.4M
  const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1851
22.4M
  const SequenceHeader *seq_params = cm->seq_params;
1852
1853
22.4M
#if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
1854
22.4M
  if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1855
#if CONFIG_DEBUG
1856
    assert(is_cfl_allowed(xd));
1857
    const BLOCK_SIZE plane_bsize =
1858
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1859
    (void)plane_bsize;
1860
    assert(plane_bsize < BLOCK_SIZES_ALL);
1861
    if (!xd->lossless[mbmi->segment_id]) {
1862
      assert(blk_col == 0);
1863
      assert(blk_row == 0);
1864
      assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1865
      assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1866
    }
1867
#endif
1868
2.01M
    CFL_CTX *const cfl = &xd->cfl;
1869
2.01M
    CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1870
2.01M
    if (!cfl->dc_pred_is_cached[pred_plane]) {
1871
2.01M
      av1_predict_intra_block(xd, seq_params->sb_size,
1872
2.01M
                              seq_params->enable_intra_edge_filter, pd->width,
1873
2.01M
                              pd->height, tx_size, mode, angle_delta,
1874
2.01M
                              use_palette, filter_intra_mode, dst, dst_stride,
1875
2.01M
                              dst, dst_stride, blk_col, blk_row, plane);
1876
2.01M
      if (cfl->use_dc_pred_cache) {
1877
0
        cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1878
0
        cfl->dc_pred_is_cached[pred_plane] = true;
1879
0
      }
1880
2.01M
    } else {
1881
2
      cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1882
2
    }
1883
2.01M
    av1_cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1884
2.01M
    return;
1885
2.01M
  }
1886
20.4M
#endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
1887
20.4M
  av1_predict_intra_block(
1888
20.4M
      xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1889
20.4M
      pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1890
20.4M
      dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1891
20.4M
}
1892
1893
18.5k
void av1_init_intra_predictors(void) {
1894
18.5k
  aom_once(init_intra_predictors_internal);
1895
18.5k
}