Coverage Report

Created: 2024-06-18 06:48

/src/aom/av1/common/av1_loopfilter.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <math.h>
13
14
#include "config/aom_config.h"
15
#include "config/aom_dsp_rtcd.h"
16
17
#include "aom_dsp/aom_dsp_common.h"
18
#include "aom_mem/aom_mem.h"
19
#include "aom_ports/mem.h"
20
#include "av1/common/av1_common_int.h"
21
#include "av1/common/av1_loopfilter.h"
22
#include "av1/common/reconinter.h"
23
#include "av1/common/seg_common.h"
24
25
enum {
26
  USE_SINGLE,
27
  USE_DUAL,
28
  USE_QUAD,
29
} UENUM1BYTE(USE_FILTER_TYPE);
30
31
static const SEG_LVL_FEATURES seg_lvl_lf_lut[MAX_MB_PLANE][2] = {
32
  { SEG_LVL_ALT_LF_Y_V, SEG_LVL_ALT_LF_Y_H },
33
  { SEG_LVL_ALT_LF_U, SEG_LVL_ALT_LF_U },
34
  { SEG_LVL_ALT_LF_V, SEG_LVL_ALT_LF_V }
35
};
36
37
static const int delta_lf_id_lut[MAX_MB_PLANE][2] = { { 0, 1 },
38
                                                      { 2, 2 },
39
                                                      { 3, 3 } };
40
41
static const int mode_lf_lut[] = {
42
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // INTRA_MODES
43
  1, 1, 0, 1,                             // INTER_MODES (GLOBALMV == 0)
44
  1, 1, 1, 1, 1, 1, 0, 1  // INTER_COMPOUND_MODES (GLOBAL_GLOBALMV == 0)
45
};
46
47
52.3k
static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
48
52.3k
  int lvl;
49
50
  // For each possible value for the loop filter fill out limits
51
3.40M
  for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
52
    // Set loop filter parameters that control sharpness.
53
3.35M
    int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
54
55
3.35M
    if (sharpness_lvl > 0) {
56
1.12M
      if (block_inside_limit > (9 - sharpness_lvl))
57
864k
        block_inside_limit = (9 - sharpness_lvl);
58
1.12M
    }
59
60
3.35M
    if (block_inside_limit < 1) block_inside_limit = 1;
61
62
3.35M
    memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
63
3.35M
    memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
64
3.35M
           SIMD_WIDTH);
65
3.35M
  }
66
52.3k
}
67
68
uint8_t av1_get_filter_level(const AV1_COMMON *cm,
69
                             const loop_filter_info_n *lfi_n, const int dir_idx,
70
212M
                             int plane, const MB_MODE_INFO *mbmi) {
71
212M
  const int segment_id = mbmi->segment_id;
72
212M
  if (cm->delta_q_info.delta_lf_present_flag) {
73
104M
    int8_t delta_lf;
74
104M
    if (cm->delta_q_info.delta_lf_multi) {
75
14.2M
      const int delta_lf_idx = delta_lf_id_lut[plane][dir_idx];
76
14.2M
      delta_lf = mbmi->delta_lf[delta_lf_idx];
77
90.1M
    } else {
78
90.1M
      delta_lf = mbmi->delta_lf_from_base;
79
90.1M
    }
80
104M
    int base_level;
81
104M
    if (plane == 0)
82
53.0M
      base_level = cm->lf.filter_level[dir_idx];
83
51.3M
    else if (plane == 1)
84
49.4M
      base_level = cm->lf.filter_level_u;
85
1.89M
    else
86
1.89M
      base_level = cm->lf.filter_level_v;
87
104M
    int lvl_seg = clamp(delta_lf + base_level, 0, MAX_LOOP_FILTER);
88
104M
    assert(plane >= 0 && plane <= 2);
89
101M
    const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir_idx];
90
101M
    if (segfeature_active(&cm->seg, segment_id, seg_lf_feature_id)) {
91
81.8M
      const int data = get_segdata(&cm->seg, segment_id, seg_lf_feature_id);
92
81.8M
      lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
93
81.8M
    }
94
95
101M
    if (cm->lf.mode_ref_delta_enabled) {
96
49.2M
      const int scale = 1 << (lvl_seg >> 5);
97
49.2M
      lvl_seg += cm->lf.ref_deltas[mbmi->ref_frame[0]] * scale;
98
49.2M
      if (mbmi->ref_frame[0] > INTRA_FRAME)
99
8.97M
        lvl_seg += cm->lf.mode_deltas[mode_lf_lut[mbmi->mode]] * scale;
100
49.2M
      lvl_seg = clamp(lvl_seg, 0, MAX_LOOP_FILTER);
101
49.2M
    }
102
101M
    return lvl_seg;
103
107M
  } else {
104
107M
    return lfi_n->lvl[plane][segment_id][dir_idx][mbmi->ref_frame[0]]
105
107M
                     [mode_lf_lut[mbmi->mode]];
106
107M
  }
107
212M
}
108
109
16.8k
void av1_loop_filter_init(AV1_COMMON *cm) {
110
16.8k
  assert(MB_MODE_COUNT == NELEMENTS(mode_lf_lut));
111
16.8k
  loop_filter_info_n *lfi = &cm->lf_info;
112
16.8k
  struct loopfilter *lf = &cm->lf;
113
16.8k
  int lvl;
114
115
  // init limits for given sharpness
116
16.8k
  update_sharpness(lfi, lf->sharpness_level);
117
118
  // init hev threshold const vectors
119
1.09M
  for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
120
1.07M
    memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
121
16.8k
}
122
123
// Update the loop filter for the current frame.
124
// This should be called before loop_filter_rows(),
125
// av1_loop_filter_frame() calls this function directly.
126
void av1_loop_filter_frame_init(AV1_COMMON *cm, int plane_start,
127
35.5k
                                int plane_end) {
128
35.5k
  int filt_lvl[MAX_MB_PLANE], filt_lvl_r[MAX_MB_PLANE];
129
35.5k
  int plane;
130
35.5k
  int seg_id;
131
  // n_shift is the multiplier for lf_deltas
132
  // the multiplier is 1 for when filter_lvl is between 0 and 31;
133
  // 2 when filter_lvl is between 32 and 63
134
35.5k
  loop_filter_info_n *const lfi = &cm->lf_info;
135
35.5k
  struct loopfilter *const lf = &cm->lf;
136
35.5k
  const struct segmentation *const seg = &cm->seg;
137
138
  // update sharpness limits
139
35.5k
  update_sharpness(lfi, lf->sharpness_level);
140
141
35.5k
  filt_lvl[0] = cm->lf.filter_level[0];
142
35.5k
  filt_lvl[1] = cm->lf.filter_level_u;
143
35.5k
  filt_lvl[2] = cm->lf.filter_level_v;
144
145
35.5k
  filt_lvl_r[0] = cm->lf.filter_level[1];
146
35.5k
  filt_lvl_r[1] = cm->lf.filter_level_u;
147
35.5k
  filt_lvl_r[2] = cm->lf.filter_level_v;
148
149
35.5k
  assert(plane_start >= AOM_PLANE_Y);
150
35.5k
  assert(plane_end <= MAX_MB_PLANE);
151
152
138k
  for (plane = plane_start; plane < plane_end; plane++) {
153
102k
    if (plane == 0 && !filt_lvl[0] && !filt_lvl_r[0])
154
0
      break;
155
102k
    else if (plane == 1 && !filt_lvl[1])
156
5.86k
      continue;
157
97.0k
    else if (plane == 2 && !filt_lvl[2])
158
10.8k
      continue;
159
160
775k
    for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
161
2.06M
      for (int dir = 0; dir < 2; ++dir) {
162
1.37M
        int lvl_seg = (dir == 0) ? filt_lvl[plane] : filt_lvl_r[plane];
163
1.37M
        const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir];
164
1.37M
        if (segfeature_active(seg, seg_id, seg_lf_feature_id)) {
165
134k
          const int data = get_segdata(&cm->seg, seg_id, seg_lf_feature_id);
166
134k
          lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
167
134k
        }
168
169
1.37M
        if (!lf->mode_ref_delta_enabled) {
170
          // we could get rid of this if we assume that deltas are set to
171
          // zero when not in use; encoder always uses deltas
172
700k
          memset(lfi->lvl[plane][seg_id][dir], lvl_seg,
173
700k
                 sizeof(lfi->lvl[plane][seg_id][dir]));
174
700k
        } else {
175
677k
          int ref, mode;
176
677k
          const int scale = 1 << (lvl_seg >> 5);
177
677k
          const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
178
677k
          lfi->lvl[plane][seg_id][dir][INTRA_FRAME][0] =
179
677k
              clamp(intra_lvl, 0, MAX_LOOP_FILTER);
180
181
5.41M
          for (ref = LAST_FRAME; ref < REF_FRAMES; ++ref) {
182
14.2M
            for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
183
9.47M
              const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
184
9.47M
                                    lf->mode_deltas[mode] * scale;
185
9.47M
              lfi->lvl[plane][seg_id][dir][ref][mode] =
186
9.47M
                  clamp(inter_lvl, 0, MAX_LOOP_FILTER);
187
9.47M
            }
188
4.73M
          }
189
677k
        }
190
1.37M
      }
191
688k
    }
192
86.1k
  }
193
35.5k
}
194
195
static AOM_FORCE_INLINE TX_SIZE
196
get_transform_size(const MACROBLOCKD *const xd, const MB_MODE_INFO *const mbmi,
197
                   const int mi_row, const int mi_col, const int plane,
198
165M
                   const int ss_x, const int ss_y) {
199
165M
  assert(mbmi != NULL);
200
183M
  if (xd && xd->lossless[mbmi->segment_id]) return TX_4X4;
201
202
74.0M
  TX_SIZE tx_size = (plane == AOM_PLANE_Y)
203
74.0M
                        ? mbmi->tx_size
204
74.0M
                        : av1_get_max_uv_txsize(mbmi->bsize, ss_x, ss_y);
205
74.0M
  assert(tx_size < TX_SIZES_ALL);
206
105M
  if ((plane == AOM_PLANE_Y) && is_inter_block(mbmi) && !mbmi->skip_txfm) {
207
5.61M
    const BLOCK_SIZE sb_type = mbmi->bsize;
208
5.61M
    const int blk_row = mi_row & (mi_size_high[sb_type] - 1);
209
5.61M
    const int blk_col = mi_col & (mi_size_wide[sb_type] - 1);
210
5.61M
    const TX_SIZE mb_tx_size =
211
5.61M
        mbmi->inter_tx_size[av1_get_txb_size_index(sb_type, blk_row, blk_col)];
212
5.61M
    assert(mb_tx_size < TX_SIZES_ALL);
213
5.67M
    tx_size = mb_tx_size;
214
5.67M
  }
215
216
105M
  return tx_size;
217
105M
}
218
219
static const int tx_dim_to_filter_length[TX_SIZES] = { 4, 8, 14, 14, 14 };
220
221
// Return TX_SIZE from get_transform_size(), so it is plane and direction
222
// aware
223
static TX_SIZE set_lpf_parameters(
224
    AV1_DEBLOCKING_PARAMETERS *const params, const ptrdiff_t mode_step,
225
    const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
226
    const EDGE_DIR edge_dir, const uint32_t x, const uint32_t y,
227
122M
    const int plane, const struct macroblockd_plane *const plane_ptr) {
228
  // reset to initial values
229
122M
  params->filter_length = 0;
230
231
  // no deblocking is required
232
122M
  const uint32_t width = plane_ptr->dst.width;
233
122M
  const uint32_t height = plane_ptr->dst.height;
234
125M
  if ((width <= x) || (height <= y)) {
235
    // just return the smallest transform unit size
236
2.37M
    return TX_4X4;
237
2.37M
  }
238
239
119M
  const uint32_t scale_horz = plane_ptr->subsampling_x;
240
119M
  const uint32_t scale_vert = plane_ptr->subsampling_y;
241
  // for sub8x8 block, chroma prediction mode is obtained from the bottom/right
242
  // mi structure of the co-located 8x8 luma block. so for chroma plane, mi_row
243
  // and mi_col should map to the bottom/right mi structure, i.e, both mi_row
244
  // and mi_col should be odd number for chroma plane.
245
119M
  const int mi_row = scale_vert | ((y << scale_vert) >> MI_SIZE_LOG2);
246
119M
  const int mi_col = scale_horz | ((x << scale_horz) >> MI_SIZE_LOG2);
247
119M
  MB_MODE_INFO **mi =
248
119M
      cm->mi_params.mi_grid_base + mi_row * cm->mi_params.mi_stride + mi_col;
249
119M
  const MB_MODE_INFO *mbmi = mi[0];
250
  // If current mbmi is not correctly setup, return an invalid value to stop
251
  // filtering. One example is that if this tile is not coded, then its mbmi
252
  // it not set up.
253
119M
  if (mbmi == NULL) return TX_INVALID;
254
255
119M
  const TX_SIZE ts = get_transform_size(xd, mi[0], mi_row, mi_col, plane,
256
119M
                                        scale_horz, scale_vert);
257
258
119M
  {
259
119M
    const uint32_t coord = (VERT_EDGE == edge_dir) ? (x) : (y);
260
119M
    const uint32_t transform_masks =
261
119M
        edge_dir == VERT_EDGE ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1;
262
119M
    const int32_t tu_edge = (coord & transform_masks) ? (0) : (1);
263
264
119M
    if (!tu_edge) return ts;
265
266
    // prepare outer edge parameters. deblock the edge if it's an edge of a TU
267
119M
    {
268
119M
      const uint32_t curr_level =
269
119M
          av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
270
119M
      const int curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi);
271
119M
      uint32_t level = curr_level;
272
131M
      if (coord) {
273
131M
        {
274
131M
          const MB_MODE_INFO *const mi_prev = *(mi - mode_step);
275
131M
          if (mi_prev == NULL) return TX_INVALID;
276
131M
          const int pv_row =
277
131M
              (VERT_EDGE == edge_dir) ? (mi_row) : (mi_row - (1 << scale_vert));
278
131M
          const int pv_col =
279
131M
              (VERT_EDGE == edge_dir) ? (mi_col - (1 << scale_horz)) : (mi_col);
280
131M
          const TX_SIZE pv_ts = get_transform_size(
281
131M
              xd, mi_prev, pv_row, pv_col, plane, scale_horz, scale_vert);
282
283
131M
          const uint32_t pv_lvl =
284
131M
              av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mi_prev);
285
286
131M
          const int pv_skip_txfm =
287
131M
              mi_prev->skip_txfm && is_inter_block(mi_prev);
288
131M
          const BLOCK_SIZE bsize = get_plane_block_size(
289
131M
              mbmi->bsize, plane_ptr->subsampling_x, plane_ptr->subsampling_y);
290
131M
          assert(bsize < BLOCK_SIZES_ALL);
291
136M
          const int prediction_masks = edge_dir == VERT_EDGE
292
136M
                                           ? block_size_wide[bsize] - 1
293
136M
                                           : block_size_high[bsize] - 1;
294
136M
          const int32_t pu_edge = !(coord & prediction_masks);
295
          // if the current and the previous blocks are skipped,
296
          // deblock the edge if the edge belongs to a PU's edge only.
297
136M
          if ((curr_level || pv_lvl) &&
298
136M
              (!pv_skip_txfm || !curr_skipped || pu_edge)) {
299
125M
            const int dim = (VERT_EDGE == edge_dir)
300
125M
                                ? AOMMIN(tx_size_wide_unit_log2[ts],
301
125M
                                         tx_size_wide_unit_log2[pv_ts])
302
125M
                                : AOMMIN(tx_size_high_unit_log2[ts],
303
125M
                                         tx_size_high_unit_log2[pv_ts]);
304
125M
            if (plane) {
305
66.1M
              params->filter_length = (dim == 0) ? 4 : 6;
306
66.1M
            } else {
307
59.1M
              assert(dim < TX_SIZES);
308
67.5M
              assert(dim >= 0);
309
67.5M
              params->filter_length = tx_dim_to_filter_length[dim];
310
67.5M
            }
311
312
            // update the level if the current block is skipped,
313
            // but the previous one is not
314
133M
            level = (curr_level) ? (curr_level) : (pv_lvl);
315
133M
          }
316
136M
        }
317
136M
      }
318
      // prepare common parameters
319
133M
      if (params->filter_length) {
320
120M
        const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
321
120M
        params->lfthr = limits;
322
120M
      }
323
133M
    }
324
133M
  }
325
326
0
  return ts;
327
119M
}
328
329
static const uint32_t vert_filter_length_luma[TX_SIZES_ALL][TX_SIZES_ALL] = {
330
  // TX_4X4
331
  {
332
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
333
  },
334
  // TX_8X8
335
  {
336
      4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,
337
  },
338
  // TX_16X16
339
  {
340
      4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
341
  },
342
  // TX_32X32
343
  {
344
      4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
345
  },
346
  // TX_64X64
347
  {
348
      4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
349
  },
350
  // TX_4X8
351
  {
352
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
353
  },
354
  // TX_8X4
355
  {
356
      4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,
357
  },
358
  // TX_8X16
359
  {
360
      4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,
361
  },
362
  // TX_16X8
363
  {
364
      4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
365
  },
366
  // TX_16X32
367
  {
368
      4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
369
  },
370
  // TX_32X16
371
  {
372
      4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
373
  },
374
  // TX_32X64
375
  {
376
      4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
377
  },
378
  // TX_64X32
379
  {
380
      4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
381
  },
382
  // TX_4X16
383
  {
384
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
385
  },
386
  // TX_16X4
387
  {
388
      4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
389
  },
390
  // TX_8X32
391
  {
392
      4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8,
393
  },
394
  // TX_32X8
395
  {
396
      4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
397
  },
398
  // TX_16X64
399
  {
400
      4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
401
  },
402
  // TX_64X16
403
  {
404
      4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14,
405
  },
406
};
407
408
static const uint32_t horz_filter_length_luma[TX_SIZES_ALL][TX_SIZES_ALL] = {
409
  // TX_4X4
410
  {
411
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
412
  },
413
  // TX_8X8
414
  {
415
      4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8,
416
  },
417
  // TX_16X16
418
  {
419
      4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
420
  },
421
  // TX_32X32
422
  {
423
      4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
424
  },
425
  // TX_64X64
426
  {
427
      4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
428
  },
429
  // TX_4X8
430
  {
431
      4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8,
432
  },
433
  // TX_8X4
434
  {
435
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
436
  },
437
  // TX_8X16
438
  {
439
      4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
440
  },
441
  // TX_16X8
442
  {
443
      4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8,
444
  },
445
  // TX_16X32
446
  {
447
      4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
448
  },
449
  // TX_32X16
450
  {
451
      4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
452
  },
453
  // TX_32X64
454
  {
455
      4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
456
  },
457
  // TX_64X32
458
  {
459
      4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
460
  },
461
  // TX_4X16
462
  {
463
      4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
464
  },
465
  // TX_16X4
466
  {
467
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
468
  },
469
  // TX_8X32
470
  {
471
      4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
472
  },
473
  // TX_32X8
474
  {
475
      4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8,
476
  },
477
  // TX_16X64
478
  {
479
      4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
480
  },
481
  // TX_64X16
482
  {
483
      4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14,
484
  },
485
};
486
487
static const uint32_t vert_filter_length_chroma[TX_SIZES_ALL][TX_SIZES_ALL] = {
488
  // TX_4X4
489
  {
490
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
491
  },
492
  // TX_8X8
493
  {
494
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
495
  },
496
  // TX_16X16
497
  {
498
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
499
  },
500
  // TX_32X32
501
  {
502
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
503
  },
504
  // TX_64X64
505
  {
506
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
507
  },
508
  // TX_4X8
509
  {
510
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
511
  },
512
  // TX_8X4
513
  {
514
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
515
  },
516
  // TX_8X16
517
  {
518
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
519
  },
520
  // TX_16X8
521
  {
522
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
523
  },
524
  // TX_16X32
525
  {
526
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
527
  },
528
  // TX_32X16
529
  {
530
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
531
  },
532
  // TX_32X64
533
  {
534
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
535
  },
536
  // TX_64X32
537
  {
538
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
539
  },
540
  // TX_4X16
541
  {
542
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
543
  },
544
  // TX_16X4
545
  {
546
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
547
  },
548
  // TX_8X32
549
  {
550
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
551
  },
552
  // TX_32X8
553
  {
554
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
555
  },
556
  // TX_16X64
557
  {
558
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
559
  },
560
  // TX_64X16
561
  {
562
      4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6,
563
  },
564
};
565
566
static const uint32_t horz_filter_length_chroma[TX_SIZES_ALL][TX_SIZES_ALL] = {
567
  // TX_4X4
568
  {
569
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
570
  },
571
  // TX_8X8
572
  {
573
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
574
  },
575
  // TX_16X16
576
  {
577
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
578
  },
579
  // TX_32X32
580
  {
581
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
582
  },
583
  // TX_64X64
584
  {
585
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
586
  },
587
  // TX_4X8
588
  {
589
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
590
  },
591
  // TX_8X4
592
  {
593
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
594
  },
595
  // TX_8X16
596
  {
597
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
598
  },
599
  // TX_16X8
600
  {
601
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
602
  },
603
  // TX_16X32
604
  {
605
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
606
  },
607
  // TX_32X16
608
  {
609
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
610
  },
611
  // TX_32X64
612
  {
613
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
614
  },
615
  // TX_64X32
616
  {
617
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
618
  },
619
  // TX_4X16
620
  {
621
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
622
  },
623
  // TX_16X4
624
  {
625
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
626
  },
627
  // TX_8X32
628
  {
629
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
630
  },
631
  // TX_32X8
632
  {
633
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
634
  },
635
  // TX_16X64
636
  {
637
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
638
  },
639
  // TX_64X16
640
  {
641
      4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6,
642
  },
643
};
644
645
static AOM_FORCE_INLINE void set_one_param_for_line_luma(
646
    AV1_DEBLOCKING_PARAMETERS *const params, TX_SIZE *tx_size,
647
    const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
648
    const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row,
649
    const struct macroblockd_plane *const plane_ptr, int coord,
650
    bool is_first_block, TX_SIZE prev_tx_size, const ptrdiff_t mode_step,
651
0
    int *min_dim) {
652
0
  (void)plane_ptr;
653
0
  assert(mi_col << MI_SIZE_LOG2 < (uint32_t)plane_ptr->dst.width &&
654
0
         mi_row << MI_SIZE_LOG2 < (uint32_t)plane_ptr->dst.height);
655
0
  const int is_vert = edge_dir == VERT_EDGE;
656
  // reset to initial values
657
0
  params->filter_length = 0;
658
659
0
  MB_MODE_INFO **mi =
660
0
      cm->mi_params.mi_grid_base + mi_row * cm->mi_params.mi_stride + mi_col;
661
0
  const MB_MODE_INFO *mbmi = mi[0];
662
0
  assert(mbmi);
663
664
0
  const TX_SIZE ts =
665
0
      get_transform_size(xd, mi[0], mi_row, mi_col, AOM_PLANE_Y, 0, 0);
666
667
0
#ifndef NDEBUG
668
0
  const uint32_t transform_masks =
669
0
      is_vert ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1;
670
0
  const int32_t tu_edge = ((coord * MI_SIZE) & transform_masks) ? (0) : (1);
671
0
  assert(tu_edge);
672
0
#endif  // NDEBUG
673
  // If we are not the first block, then coord is always true, so
674
  // !is_first_block is technically redundant. But we are keeping it here so the
675
  // compiler can compile away this conditional if we pass in is_first_block :=
676
  // false
677
0
  bool curr_skipped = false;
678
0
  if (!is_first_block || coord) {
679
0
    const MB_MODE_INFO *const mi_prev = *(mi - mode_step);
680
0
    const int pv_row = is_vert ? mi_row : (mi_row - 1);
681
0
    const int pv_col = is_vert ? (mi_col - 1) : mi_col;
682
0
    const TX_SIZE pv_ts =
683
0
        is_first_block
684
0
            ? get_transform_size(xd, mi_prev, pv_row, pv_col, AOM_PLANE_Y, 0, 0)
685
0
            : prev_tx_size;
686
0
    if (is_first_block) {
687
0
      *min_dim = is_vert ? block_size_high[mi_prev->bsize]
688
0
                         : block_size_wide[mi_prev->bsize];
689
0
    }
690
0
    assert(mi_prev);
691
0
    uint8_t level =
692
0
        av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_Y, mbmi);
693
0
    if (!level) {
694
0
      level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_Y,
695
0
                                   mi_prev);
696
0
    }
697
698
0
    const int32_t pu_edge = mi_prev != mbmi;
699
700
    // The quad loop filter assumes that all the transform blocks within a
701
    // 8x16/16x8/16x16 prediction block are of the same size.
702
0
    assert(IMPLIES(
703
0
        !pu_edge && (mbmi->bsize >= BLOCK_8X16 && mbmi->bsize <= BLOCK_16X16),
704
0
        pv_ts == ts));
705
706
0
    if (!pu_edge) {
707
0
      curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi);
708
0
    }
709
0
    if ((pu_edge || !curr_skipped) && level) {
710
0
      params->filter_length = is_vert ? vert_filter_length_luma[ts][pv_ts]
711
0
                                      : horz_filter_length_luma[ts][pv_ts];
712
713
      // prepare common parameters
714
0
      const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
715
0
      params->lfthr = limits;
716
0
    }
717
0
  }
718
0
  const int block_dim =
719
0
      is_vert ? block_size_high[mbmi->bsize] : block_size_wide[mbmi->bsize];
720
0
  *min_dim = AOMMIN(*min_dim, block_dim);
721
722
0
  *tx_size = ts;
723
0
}
724
725
// Similar to set_lpf_parameters, but does so one row/col at a time to reduce
726
// calls to \ref get_transform_size and \ref av1_get_filter_level
727
static AOM_FORCE_INLINE void set_lpf_parameters_for_line_luma(
728
    AV1_DEBLOCKING_PARAMETERS *const params_buf, TX_SIZE *tx_buf,
729
    const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
730
    const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row,
731
    const struct macroblockd_plane *const plane_ptr, const uint32_t mi_range,
732
0
    const ptrdiff_t mode_step, int *min_dim) {
733
0
  const int is_vert = edge_dir == VERT_EDGE;
734
735
0
  AV1_DEBLOCKING_PARAMETERS *params = params_buf;
736
0
  TX_SIZE *tx_size = tx_buf;
737
0
  uint32_t *counter_ptr = is_vert ? &mi_col : &mi_row;
738
0
  TX_SIZE prev_tx_size = TX_INVALID;
739
740
  // Unroll the first iteration of the loop
741
0
  set_one_param_for_line_luma(params, tx_size, cm, xd, edge_dir, mi_col, mi_row,
742
0
                              plane_ptr, *counter_ptr, true, prev_tx_size,
743
0
                              mode_step, min_dim);
744
745
  // Advance
746
0
  int advance_units =
747
0
      is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size];
748
0
  prev_tx_size = *tx_size;
749
0
  *counter_ptr += advance_units;
750
0
  params += advance_units;
751
0
  tx_size += advance_units;
752
753
0
  while (*counter_ptr < mi_range) {
754
0
    set_one_param_for_line_luma(params, tx_size, cm, xd, edge_dir, mi_col,
755
0
                                mi_row, plane_ptr, *counter_ptr, false,
756
0
                                prev_tx_size, mode_step, min_dim);
757
758
    // Advance
759
0
    advance_units =
760
0
        is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size];
761
0
    prev_tx_size = *tx_size;
762
0
    *counter_ptr += advance_units;
763
0
    params += advance_units;
764
0
    tx_size += advance_units;
765
0
  }
766
0
}
767
768
static AOM_FORCE_INLINE void set_one_param_for_line_chroma(
769
    AV1_DEBLOCKING_PARAMETERS *const params, TX_SIZE *tx_size,
770
    const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
771
    const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row, int coord,
772
    bool is_first_block, TX_SIZE prev_tx_size,
773
    const struct macroblockd_plane *const plane_ptr, const ptrdiff_t mode_step,
774
    const int scale_horz, const int scale_vert, int *min_dim, int plane,
775
0
    int joint_filter_chroma) {
776
0
  const int is_vert = edge_dir == VERT_EDGE;
777
0
  (void)plane_ptr;
778
0
  assert((mi_col << MI_SIZE_LOG2) <
779
0
             (uint32_t)(plane_ptr->dst.width << scale_horz) &&
780
0
         (mi_row << MI_SIZE_LOG2) <
781
0
             (uint32_t)(plane_ptr->dst.height << scale_vert));
782
  // reset to initial values
783
0
  params->filter_length = 0;
784
785
  // for sub8x8 block, chroma prediction mode is obtained from the
786
  // bottom/right mi structure of the co-located 8x8 luma block. so for chroma
787
  // plane, mi_row and mi_col should map to the bottom/right mi structure,
788
  // i.e, both mi_row and mi_col should be odd number for chroma plane.
789
0
  mi_row |= scale_vert;
790
0
  mi_col |= scale_horz;
791
0
  MB_MODE_INFO **mi =
792
0
      cm->mi_params.mi_grid_base + mi_row * cm->mi_params.mi_stride + mi_col;
793
0
  const MB_MODE_INFO *mbmi = mi[0];
794
0
  assert(mbmi);
795
796
0
  const TX_SIZE ts = get_transform_size(xd, mi[0], mi_row, mi_col, plane,
797
0
                                        scale_horz, scale_vert);
798
0
  *tx_size = ts;
799
800
0
#ifndef NDEBUG
801
0
  const uint32_t transform_masks =
802
0
      is_vert ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1;
803
0
  const int32_t tu_edge = ((coord * MI_SIZE) & transform_masks) ? (0) : (1);
804
0
  assert(tu_edge);
805
0
#endif  // NDEBUG
806
807
  // If we are not the first block, then coord is always true, so
808
  // !is_first_block is technically redundant. But we are keeping it here so the
809
  // compiler can compile away this conditional if we pass in is_first_block :=
810
  // false
811
0
  bool curr_skipped = false;
812
0
  if (!is_first_block || coord) {
813
0
    const MB_MODE_INFO *const mi_prev = *(mi - mode_step);
814
0
    assert(mi_prev);
815
0
    const int pv_row = is_vert ? (mi_row) : (mi_row - (1 << scale_vert));
816
0
    const int pv_col = is_vert ? (mi_col - (1 << scale_horz)) : (mi_col);
817
0
    const TX_SIZE pv_ts =
818
0
        is_first_block ? get_transform_size(xd, mi_prev, pv_row, pv_col, plane,
819
0
                                            scale_horz, scale_vert)
820
0
                       : prev_tx_size;
821
0
    if (is_first_block) {
822
0
      *min_dim = is_vert ? tx_size_high[pv_ts] : tx_size_wide[pv_ts];
823
0
    }
824
825
0
    uint8_t level =
826
0
        av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
827
0
    if (!level) {
828
0
      level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mi_prev);
829
0
    }
830
0
#ifndef NDEBUG
831
0
    if (joint_filter_chroma) {
832
0
      uint8_t v_level =
833
0
          av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_V, mbmi);
834
0
      if (!v_level) {
835
0
        v_level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_V,
836
0
                                       mi_prev);
837
0
      }
838
0
      assert(level == v_level);
839
0
    }
840
#else
841
    (void)joint_filter_chroma;
842
#endif  // NDEBUG
843
0
    const int32_t pu_edge = mi_prev != mbmi;
844
845
0
    if (!pu_edge) {
846
0
      curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi);
847
0
    }
848
    // For realtime mode, u and v have the same level
849
0
    if ((!curr_skipped || pu_edge) && level) {
850
0
      params->filter_length = is_vert ? vert_filter_length_chroma[ts][pv_ts]
851
0
                                      : horz_filter_length_chroma[ts][pv_ts];
852
853
0
      const loop_filter_thresh *const limits = cm->lf_info.lfthr;
854
0
      params->lfthr = limits + level;
855
0
    }
856
0
  }
857
0
  const int tx_dim = is_vert ? tx_size_high[ts] : tx_size_wide[ts];
858
0
  *min_dim = AOMMIN(*min_dim, tx_dim);
859
0
}
860
861
static AOM_FORCE_INLINE void set_lpf_parameters_for_line_chroma(
862
    AV1_DEBLOCKING_PARAMETERS *const params_buf, TX_SIZE *tx_buf,
863
    const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
864
    const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row,
865
    const struct macroblockd_plane *const plane_ptr, const uint32_t mi_range,
866
    const ptrdiff_t mode_step, const int scale_horz, const int scale_vert,
867
0
    int *min_dim, int plane, int joint_filter_chroma) {
868
0
  const int is_vert = edge_dir == VERT_EDGE;
869
870
0
  AV1_DEBLOCKING_PARAMETERS *params = params_buf;
871
0
  TX_SIZE *tx_size = tx_buf;
872
0
  uint32_t *counter_ptr = is_vert ? &mi_col : &mi_row;
873
0
  const uint32_t scale = is_vert ? scale_horz : scale_vert;
874
0
  TX_SIZE prev_tx_size = TX_INVALID;
875
876
  // Unroll the first iteration of the loop
877
0
  set_one_param_for_line_chroma(params, tx_size, cm, xd, edge_dir, mi_col,
878
0
                                mi_row, *counter_ptr, true, prev_tx_size,
879
0
                                plane_ptr, mode_step, scale_horz, scale_vert,
880
0
                                min_dim, plane, joint_filter_chroma);
881
882
  // Advance
883
0
  int advance_units =
884
0
      is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size];
885
0
  prev_tx_size = *tx_size;
886
0
  *counter_ptr += advance_units << scale;
887
0
  params += advance_units;
888
0
  tx_size += advance_units;
889
890
0
  while (*counter_ptr < mi_range) {
891
0
    set_one_param_for_line_chroma(params, tx_size, cm, xd, edge_dir, mi_col,
892
0
                                  mi_row, *counter_ptr, false, prev_tx_size,
893
0
                                  plane_ptr, mode_step, scale_horz, scale_vert,
894
0
                                  min_dim, plane, joint_filter_chroma);
895
896
    // Advance
897
0
    advance_units =
898
0
        is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size];
899
0
    prev_tx_size = *tx_size;
900
0
    *counter_ptr += advance_units << scale;
901
0
    params += advance_units;
902
0
    tx_size += advance_units;
903
0
  }
904
0
}
905
906
static AOM_INLINE void filter_vert(uint8_t *dst, int dst_stride,
907
                                   const AV1_DEBLOCKING_PARAMETERS *params,
908
                                   const SequenceHeader *seq_params,
909
66.3M
                                   USE_FILTER_TYPE use_filter_type) {
910
66.3M
  const loop_filter_thresh *limits = params->lfthr;
911
66.3M
#if CONFIG_AV1_HIGHBITDEPTH
912
66.3M
  const int use_highbitdepth = seq_params->use_highbitdepth;
913
66.3M
  const aom_bit_depth_t bit_depth = seq_params->bit_depth;
914
66.3M
  if (use_highbitdepth) {
915
47.8M
    uint16_t *dst_shortptr = CONVERT_TO_SHORTPTR(dst);
916
47.8M
    if (use_filter_type == USE_QUAD) {
917
0
      switch (params->filter_length) {
918
        // apply 4-tap filtering
919
0
        case 4:
920
0
          aom_highbd_lpf_vertical_4_dual(
921
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
922
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
923
0
              bit_depth);
924
0
          aom_highbd_lpf_vertical_4_dual(
925
0
              dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
926
0
              limits->mblim, limits->lim, limits->hev_thr, limits->mblim,
927
0
              limits->lim, limits->hev_thr, bit_depth);
928
0
          break;
929
0
        case 6:  // apply 6-tap filter for chroma plane only
930
0
          aom_highbd_lpf_vertical_6_dual(
931
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
932
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
933
0
              bit_depth);
934
0
          aom_highbd_lpf_vertical_6_dual(
935
0
              dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
936
0
              limits->mblim, limits->lim, limits->hev_thr, limits->mblim,
937
0
              limits->lim, limits->hev_thr, bit_depth);
938
0
          break;
939
        // apply 8-tap filtering
940
0
        case 8:
941
0
          aom_highbd_lpf_vertical_8_dual(
942
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
943
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
944
0
              bit_depth);
945
0
          aom_highbd_lpf_vertical_8_dual(
946
0
              dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
947
0
              limits->mblim, limits->lim, limits->hev_thr, limits->mblim,
948
0
              limits->lim, limits->hev_thr, bit_depth);
949
0
          break;
950
        // apply 14-tap filtering
951
0
        case 14:
952
0
          aom_highbd_lpf_vertical_14_dual(
953
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
954
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
955
0
              bit_depth);
956
0
          aom_highbd_lpf_vertical_14_dual(
957
0
              dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
958
0
              limits->mblim, limits->lim, limits->hev_thr, limits->mblim,
959
0
              limits->lim, limits->hev_thr, bit_depth);
960
0
          break;
961
        // no filtering
962
0
        default: break;
963
0
      }
964
47.8M
    } else if (use_filter_type == USE_DUAL) {
965
0
      switch (params->filter_length) {
966
        // apply 4-tap filtering
967
0
        case 4:
968
0
          aom_highbd_lpf_vertical_4_dual(
969
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
970
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
971
0
              bit_depth);
972
0
          break;
973
0
        case 6:  // apply 6-tap filter for chroma plane only
974
0
          aom_highbd_lpf_vertical_6_dual(
975
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
976
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
977
0
              bit_depth);
978
0
          break;
979
        // apply 8-tap filtering
980
0
        case 8:
981
0
          aom_highbd_lpf_vertical_8_dual(
982
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
983
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
984
0
              bit_depth);
985
0
          break;
986
        // apply 14-tap filtering
987
0
        case 14:
988
0
          aom_highbd_lpf_vertical_14_dual(
989
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
990
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
991
0
              bit_depth);
992
0
          break;
993
        // no filtering
994
0
        default: break;
995
0
      }
996
47.8M
    } else {
997
47.8M
      assert(use_filter_type == USE_SINGLE);
998
47.9M
      switch (params->filter_length) {
999
        // apply 4-tap filtering
1000
28.0M
        case 4:
1001
28.0M
          aom_highbd_lpf_vertical_4(dst_shortptr, dst_stride, limits->mblim,
1002
28.0M
                                    limits->lim, limits->hev_thr, bit_depth);
1003
28.0M
          break;
1004
5.58M
        case 6:  // apply 6-tap filter for chroma plane only
1005
5.58M
          aom_highbd_lpf_vertical_6(dst_shortptr, dst_stride, limits->mblim,
1006
5.58M
                                    limits->lim, limits->hev_thr, bit_depth);
1007
5.58M
          break;
1008
        // apply 8-tap filtering
1009
340k
        case 8:
1010
340k
          aom_highbd_lpf_vertical_8(dst_shortptr, dst_stride, limits->mblim,
1011
340k
                                    limits->lim, limits->hev_thr, bit_depth);
1012
340k
          break;
1013
        // apply 14-tap filtering
1014
6.19M
        case 14:
1015
6.19M
          aom_highbd_lpf_vertical_14(dst_shortptr, dst_stride, limits->mblim,
1016
6.19M
                                     limits->lim, limits->hev_thr, bit_depth);
1017
6.19M
          break;
1018
        // no filtering
1019
12.8M
        default: break;
1020
47.9M
      }
1021
47.9M
    }
1022
39.7M
    return;
1023
47.8M
  }
1024
18.4M
#endif  // CONFIG_AV1_HIGHBITDEPTH
1025
18.4M
  if (use_filter_type == USE_QUAD) {
1026
    // Only one set of loop filter parameters (mblim, lim and hev_thr) is
1027
    // passed as argument to quad loop filter because quad loop filter is
1028
    // called for those cases where all the 4 set of loop filter parameters
1029
    // are equal.
1030
0
    switch (params->filter_length) {
1031
      // apply 4-tap filtering
1032
0
      case 4:
1033
0
        aom_lpf_vertical_4_quad(dst, dst_stride, limits->mblim, limits->lim,
1034
0
                                limits->hev_thr);
1035
0
        break;
1036
0
      case 6:  // apply 6-tap filter for chroma plane only
1037
0
        aom_lpf_vertical_6_quad(dst, dst_stride, limits->mblim, limits->lim,
1038
0
                                limits->hev_thr);
1039
0
        break;
1040
      // apply 8-tap filtering
1041
0
      case 8:
1042
0
        aom_lpf_vertical_8_quad(dst, dst_stride, limits->mblim, limits->lim,
1043
0
                                limits->hev_thr);
1044
0
        break;
1045
      // apply 14-tap filtering
1046
0
      case 14:
1047
0
        aom_lpf_vertical_14_quad(dst, dst_stride, limits->mblim, limits->lim,
1048
0
                                 limits->hev_thr);
1049
0
        break;
1050
      // no filtering
1051
0
      default: break;
1052
0
    }
1053
18.4M
  } else if (use_filter_type == USE_DUAL) {
1054
0
    switch (params->filter_length) {
1055
      // apply 4-tap filtering
1056
0
      case 4:
1057
0
        aom_lpf_vertical_4_dual(dst, dst_stride, limits->mblim, limits->lim,
1058
0
                                limits->hev_thr, limits->mblim, limits->lim,
1059
0
                                limits->hev_thr);
1060
0
        break;
1061
0
      case 6:  // apply 6-tap filter for chroma plane only
1062
0
        aom_lpf_vertical_6_dual(dst, dst_stride, limits->mblim, limits->lim,
1063
0
                                limits->hev_thr, limits->mblim, limits->lim,
1064
0
                                limits->hev_thr);
1065
0
        break;
1066
      // apply 8-tap filtering
1067
0
      case 8:
1068
0
        aom_lpf_vertical_8_dual(dst, dst_stride, limits->mblim, limits->lim,
1069
0
                                limits->hev_thr, limits->mblim, limits->lim,
1070
0
                                limits->hev_thr);
1071
0
        break;
1072
      // apply 14-tap filtering
1073
0
      case 14:
1074
0
        aom_lpf_vertical_14_dual(dst, dst_stride, limits->mblim, limits->lim,
1075
0
                                 limits->hev_thr, limits->mblim, limits->lim,
1076
0
                                 limits->hev_thr);
1077
0
        break;
1078
      // no filtering
1079
0
      default: break;
1080
0
    }
1081
18.4M
  } else {
1082
18.4M
    assert(use_filter_type == USE_SINGLE);
1083
18.6M
    switch (params->filter_length) {
1084
      // apply 4-tap filtering
1085
2.30M
      case 4:
1086
2.30M
        aom_lpf_vertical_4(dst, dst_stride, limits->mblim, limits->lim,
1087
2.30M
                           limits->hev_thr);
1088
2.30M
        break;
1089
8.48M
      case 6:  // apply 6-tap filter for chroma plane only
1090
8.48M
        aom_lpf_vertical_6(dst, dst_stride, limits->mblim, limits->lim,
1091
8.48M
                           limits->hev_thr);
1092
8.48M
        break;
1093
      // apply 8-tap filtering
1094
2.10M
      case 8:
1095
2.10M
        aom_lpf_vertical_8(dst, dst_stride, limits->mblim, limits->lim,
1096
2.10M
                           limits->hev_thr);
1097
2.10M
        break;
1098
      // apply 14-tap filtering
1099
6.51M
      case 14:
1100
6.51M
        aom_lpf_vertical_14(dst, dst_stride, limits->mblim, limits->lim,
1101
6.51M
                            limits->hev_thr);
1102
6.51M
        break;
1103
      // no filtering
1104
2.08M
      default: break;
1105
18.6M
    }
1106
18.6M
  }
1107
#if !CONFIG_AV1_HIGHBITDEPTH
1108
  (void)seq_params;
1109
#endif  // !CONFIG_AV1_HIGHBITDEPTH
1110
18.4M
}
1111
1112
static AOM_INLINE void filter_vert_chroma(
1113
    uint8_t *u_dst, uint8_t *v_dst, int dst_stride,
1114
    const AV1_DEBLOCKING_PARAMETERS *params, const SequenceHeader *seq_params,
1115
0
    USE_FILTER_TYPE use_filter_type) {
1116
0
  const loop_filter_thresh *u_limits = params->lfthr;
1117
0
  const loop_filter_thresh *v_limits = params->lfthr;
1118
0
#if CONFIG_AV1_HIGHBITDEPTH
1119
0
  const int use_highbitdepth = seq_params->use_highbitdepth;
1120
0
  const aom_bit_depth_t bit_depth = seq_params->bit_depth;
1121
0
  if (use_highbitdepth) {
1122
0
    uint16_t *u_dst_shortptr = CONVERT_TO_SHORTPTR(u_dst);
1123
0
    uint16_t *v_dst_shortptr = CONVERT_TO_SHORTPTR(v_dst);
1124
0
    if (use_filter_type == USE_QUAD) {
1125
0
      switch (params->filter_length) {
1126
        // apply 4-tap filtering
1127
0
        case 4:
1128
0
          aom_highbd_lpf_vertical_4_dual(
1129
0
              u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1130
0
              u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1131
0
              u_limits->hev_thr, bit_depth);
1132
0
          aom_highbd_lpf_vertical_4_dual(
1133
0
              u_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
1134
0
              u_limits->mblim, u_limits->lim, u_limits->hev_thr,
1135
0
              u_limits->mblim, u_limits->lim, u_limits->hev_thr, bit_depth);
1136
0
          aom_highbd_lpf_vertical_4_dual(
1137
0
              v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1138
0
              v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1139
0
              v_limits->hev_thr, bit_depth);
1140
0
          aom_highbd_lpf_vertical_4_dual(
1141
0
              v_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
1142
0
              v_limits->mblim, v_limits->lim, v_limits->hev_thr,
1143
0
              v_limits->mblim, v_limits->lim, v_limits->hev_thr, bit_depth);
1144
0
          break;
1145
0
        case 6:  // apply 6-tap filter for chroma plane only
1146
0
          aom_highbd_lpf_vertical_6_dual(
1147
0
              u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1148
0
              u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1149
0
              u_limits->hev_thr, bit_depth);
1150
0
          aom_highbd_lpf_vertical_6_dual(
1151
0
              u_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
1152
0
              u_limits->mblim, u_limits->lim, u_limits->hev_thr,
1153
0
              u_limits->mblim, u_limits->lim, u_limits->hev_thr, bit_depth);
1154
0
          aom_highbd_lpf_vertical_6_dual(
1155
0
              v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1156
0
              v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1157
0
              v_limits->hev_thr, bit_depth);
1158
0
          aom_highbd_lpf_vertical_6_dual(
1159
0
              v_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride,
1160
0
              v_limits->mblim, v_limits->lim, v_limits->hev_thr,
1161
0
              v_limits->mblim, v_limits->lim, v_limits->hev_thr, bit_depth);
1162
0
          break;
1163
0
        case 8:
1164
0
        case 14: assert(0);
1165
        // no filtering
1166
0
        default: break;
1167
0
      }
1168
0
    } else if (use_filter_type == USE_DUAL) {
1169
0
      switch (params->filter_length) {
1170
        // apply 4-tap filtering
1171
0
        case 4:
1172
0
          aom_highbd_lpf_vertical_4_dual(
1173
0
              u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1174
0
              u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1175
0
              u_limits->hev_thr, bit_depth);
1176
0
          aom_highbd_lpf_vertical_4_dual(
1177
0
              v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1178
0
              v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1179
0
              v_limits->hev_thr, bit_depth);
1180
0
          break;
1181
0
        case 6:  // apply 6-tap filter for chroma plane only
1182
0
          aom_highbd_lpf_vertical_6_dual(
1183
0
              u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1184
0
              u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1185
0
              u_limits->hev_thr, bit_depth);
1186
0
          aom_highbd_lpf_vertical_6_dual(
1187
0
              v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1188
0
              v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1189
0
              v_limits->hev_thr, bit_depth);
1190
0
          break;
1191
0
        case 8:
1192
0
        case 14: assert(0);
1193
        // no filtering
1194
0
        default: break;
1195
0
      }
1196
0
    } else {
1197
0
      assert(use_filter_type == USE_SINGLE);
1198
0
      switch (params->filter_length) {
1199
        // apply 4-tap filtering
1200
0
        case 4:
1201
0
          aom_highbd_lpf_vertical_4(u_dst_shortptr, dst_stride, u_limits->mblim,
1202
0
                                    u_limits->lim, u_limits->hev_thr,
1203
0
                                    bit_depth);
1204
0
          aom_highbd_lpf_vertical_4(v_dst_shortptr, dst_stride, v_limits->mblim,
1205
0
                                    v_limits->lim, v_limits->hev_thr,
1206
0
                                    bit_depth);
1207
0
          break;
1208
0
        case 6:  // apply 6-tap filter for chroma plane only
1209
0
          aom_highbd_lpf_vertical_6(u_dst_shortptr, dst_stride, u_limits->mblim,
1210
0
                                    u_limits->lim, u_limits->hev_thr,
1211
0
                                    bit_depth);
1212
0
          aom_highbd_lpf_vertical_6(v_dst_shortptr, dst_stride, v_limits->mblim,
1213
0
                                    v_limits->lim, v_limits->hev_thr,
1214
0
                                    bit_depth);
1215
0
          break;
1216
0
        case 8:
1217
0
        case 14: assert(0); break;
1218
        // no filtering
1219
0
        default: break;
1220
0
      }
1221
0
    }
1222
0
    return;
1223
0
  }
1224
0
#endif  // CONFIG_AV1_HIGHBITDEPTH
1225
0
  if (use_filter_type == USE_QUAD) {
1226
    // Only one set of loop filter parameters (mblim, lim and hev_thr) is
1227
    // passed as argument to quad loop filter because quad loop filter is
1228
    // called for those cases where all the 4 set of loop filter parameters
1229
    // are equal.
1230
0
    switch (params->filter_length) {
1231
      // apply 4-tap filtering
1232
0
      case 4:
1233
0
        aom_lpf_vertical_4_quad(u_dst, dst_stride, u_limits->mblim,
1234
0
                                u_limits->lim, u_limits->hev_thr);
1235
0
        aom_lpf_vertical_4_quad(v_dst, dst_stride, v_limits->mblim,
1236
0
                                v_limits->lim, v_limits->hev_thr);
1237
0
        break;
1238
0
      case 6:  // apply 6-tap filter for chroma plane only
1239
0
        aom_lpf_vertical_6_quad(u_dst, dst_stride, u_limits->mblim,
1240
0
                                u_limits->lim, u_limits->hev_thr);
1241
0
        aom_lpf_vertical_6_quad(v_dst, dst_stride, v_limits->mblim,
1242
0
                                v_limits->lim, v_limits->hev_thr);
1243
0
        break;
1244
0
      case 8:
1245
0
      case 14: assert(0);
1246
      // no filtering
1247
0
      default: break;
1248
0
    }
1249
0
  } else if (use_filter_type == USE_DUAL) {
1250
0
    switch (params->filter_length) {
1251
      // apply 4-tap filtering
1252
0
      case 4:
1253
0
        aom_lpf_vertical_4_dual(u_dst, dst_stride, u_limits->mblim,
1254
0
                                u_limits->lim, u_limits->hev_thr,
1255
0
                                u_limits->mblim, u_limits->lim,
1256
0
                                u_limits->hev_thr);
1257
0
        aom_lpf_vertical_4_dual(v_dst, dst_stride, v_limits->mblim,
1258
0
                                v_limits->lim, v_limits->hev_thr,
1259
0
                                v_limits->mblim, v_limits->lim,
1260
0
                                v_limits->hev_thr);
1261
0
        break;
1262
0
      case 6:  // apply 6-tap filter for chroma plane only
1263
0
        aom_lpf_vertical_6_dual(u_dst, dst_stride, u_limits->mblim,
1264
0
                                u_limits->lim, u_limits->hev_thr,
1265
0
                                u_limits->mblim, u_limits->lim,
1266
0
                                u_limits->hev_thr);
1267
0
        aom_lpf_vertical_6_dual(v_dst, dst_stride, v_limits->mblim,
1268
0
                                v_limits->lim, v_limits->hev_thr,
1269
0
                                v_limits->mblim, v_limits->lim,
1270
0
                                v_limits->hev_thr);
1271
0
        break;
1272
0
      case 8:
1273
0
      case 14: assert(0);
1274
      // no filtering
1275
0
      default: break;
1276
0
    }
1277
0
  } else {
1278
0
    assert(use_filter_type == USE_SINGLE);
1279
0
    switch (params->filter_length) {
1280
      // apply 4-tap filtering
1281
0
      case 4:
1282
0
        aom_lpf_vertical_4(u_dst, dst_stride, u_limits->mblim, u_limits->lim,
1283
0
                           u_limits->hev_thr);
1284
0
        aom_lpf_vertical_4(v_dst, dst_stride, v_limits->mblim, v_limits->lim,
1285
0
                           u_limits->hev_thr);
1286
0
        break;
1287
0
      case 6:  // apply 6-tap filter for chroma plane only
1288
0
        aom_lpf_vertical_6(u_dst, dst_stride, u_limits->mblim, u_limits->lim,
1289
0
                           u_limits->hev_thr);
1290
0
        aom_lpf_vertical_6(v_dst, dst_stride, v_limits->mblim, v_limits->lim,
1291
0
                           v_limits->hev_thr);
1292
0
        break;
1293
0
      case 8:
1294
0
      case 14: assert(0); break;
1295
      // no filtering
1296
0
      default: break;
1297
0
    }
1298
0
  }
1299
#if !CONFIG_AV1_HIGHBITDEPTH
1300
  (void)seq_params;
1301
#endif  // !CONFIG_AV1_HIGHBITDEPTH
1302
0
}
1303
1304
void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
1305
                                 const MACROBLOCKD *const xd, const int plane,
1306
                                 const MACROBLOCKD_PLANE *const plane_ptr,
1307
689k
                                 const uint32_t mi_row, const uint32_t mi_col) {
1308
689k
  const uint32_t scale_horz = plane_ptr->subsampling_x;
1309
689k
  const uint32_t scale_vert = plane_ptr->subsampling_y;
1310
689k
  uint8_t *const dst_ptr = plane_ptr->dst.buf;
1311
689k
  const int dst_stride = plane_ptr->dst.stride;
1312
689k
  const int plane_mi_rows =
1313
689k
      ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
1314
689k
  const int plane_mi_cols =
1315
689k
      ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
1316
689k
  const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
1317
689k
                             (MAX_MIB_SIZE >> scale_vert));
1318
689k
  const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
1319
689k
                             (MAX_MIB_SIZE >> scale_horz));
1320
1321
11.6M
  for (int y = 0; y < y_range; y++) {
1322
10.9M
    uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
1323
65.3M
    for (int x = 0; x < x_range;) {
1324
      // inner loop always filter vertical edges in a MI block. If MI size
1325
      // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
1326
      // If 4x4 transform is used, it will then filter the internal edge
1327
      //  aligned with a 4x4 block
1328
54.4M
      const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
1329
54.4M
      const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
1330
54.4M
      uint32_t advance_units;
1331
54.4M
      TX_SIZE tx_size;
1332
54.4M
      AV1_DEBLOCKING_PARAMETERS params;
1333
54.4M
      memset(&params, 0, sizeof(params));
1334
1335
54.4M
      tx_size =
1336
54.4M
          set_lpf_parameters(&params, ((ptrdiff_t)1 << scale_horz), cm, xd,
1337
54.4M
                             VERT_EDGE, curr_x, curr_y, plane, plane_ptr);
1338
54.4M
      if (tx_size == TX_INVALID) {
1339
0
        params.filter_length = 0;
1340
0
        tx_size = TX_4X4;
1341
0
      }
1342
1343
54.4M
      filter_vert(p, dst_stride, &params, cm->seq_params, USE_SINGLE);
1344
1345
      // advance the destination pointer
1346
54.4M
      advance_units = tx_size_wide_unit[tx_size];
1347
54.4M
      x += advance_units;
1348
54.4M
      p += advance_units * MI_SIZE;
1349
54.4M
    }
1350
10.9M
  }
1351
689k
}
1352
1353
void av1_filter_block_plane_vert_opt(
1354
    const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
1355
    const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
1356
    const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
1357
0
    TX_SIZE *tx_buf, int num_mis_in_lpf_unit_height_log2) {
1358
0
  uint8_t *const dst_ptr = plane_ptr->dst.buf;
1359
0
  const int dst_stride = plane_ptr->dst.stride;
1360
  // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
1361
  // to MI_SIZE.
1362
0
  const int plane_mi_cols =
1363
0
      CEIL_POWER_OF_TWO(plane_ptr->dst.width, MI_SIZE_LOG2);
1364
0
  const int plane_mi_rows =
1365
0
      CEIL_POWER_OF_TWO(plane_ptr->dst.height, MI_SIZE_LOG2);
1366
  // Whenever 'pipeline_lpf_mt_with_enc' is enabled, height of the unit to
1367
  // filter (i.e., y_range) is calculated based on the size of the superblock
1368
  // used.
1369
0
  const int y_range = AOMMIN((int)(plane_mi_rows - mi_row),
1370
0
                             (1 << num_mis_in_lpf_unit_height_log2));
1371
  // Width of the unit to filter (i.e., x_range) should always be calculated
1372
  // based on maximum superblock size as this function is called for mi_col = 0,
1373
  // MAX_MIB_SIZE, 2 * MAX_MIB_SIZE etc.
1374
0
  const int x_range = AOMMIN((int)(plane_mi_cols - mi_col), MAX_MIB_SIZE);
1375
0
  const ptrdiff_t mode_step = 1;
1376
0
  for (int y = 0; y < y_range; y++) {
1377
0
    const uint32_t curr_y = mi_row + y;
1378
0
    const uint32_t x_start = mi_col;
1379
0
    const uint32_t x_end = mi_col + x_range;
1380
0
    int min_block_height = block_size_high[BLOCK_128X128];
1381
0
    set_lpf_parameters_for_line_luma(params_buf, tx_buf, cm, xd, VERT_EDGE,
1382
0
                                     x_start, curr_y, plane_ptr, x_end,
1383
0
                                     mode_step, &min_block_height);
1384
1385
0
    AV1_DEBLOCKING_PARAMETERS *params = params_buf;
1386
0
    TX_SIZE *tx_size = tx_buf;
1387
0
    USE_FILTER_TYPE use_filter_type = USE_SINGLE;
1388
1389
0
    uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
1390
1391
0
    if ((y & 3) == 0 && (y + 3) < y_range && min_block_height >= 16) {
1392
      // If we are on a row which is a multiple of 4, and the minimum height is
1393
      // 16 pixels, then the current and right 3 cols must contain the same
1394
      // prediction block. This is because dim 16 can only happen every unit of
1395
      // 4 mi's.
1396
0
      use_filter_type = USE_QUAD;
1397
0
      y += 3;
1398
0
    } else if ((y + 1) < y_range && min_block_height >= 8) {
1399
0
      use_filter_type = USE_DUAL;
1400
0
      y += 1;
1401
0
    }
1402
1403
0
    for (int x = 0; x < x_range;) {
1404
0
      if (*tx_size == TX_INVALID) {
1405
0
        params->filter_length = 0;
1406
0
        *tx_size = TX_4X4;
1407
0
      }
1408
1409
0
      filter_vert(p, dst_stride, params, cm->seq_params, use_filter_type);
1410
1411
      // advance the destination pointer
1412
0
      const uint32_t advance_units = tx_size_wide_unit[*tx_size];
1413
0
      x += advance_units;
1414
0
      p += advance_units * MI_SIZE;
1415
0
      params += advance_units;
1416
0
      tx_size += advance_units;
1417
0
    }
1418
0
  }
1419
0
}
1420
1421
void av1_filter_block_plane_vert_opt_chroma(
1422
    const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
1423
    const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
1424
    const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
1425
    TX_SIZE *tx_buf, int plane, bool joint_filter_chroma,
1426
0
    int num_mis_in_lpf_unit_height_log2) {
1427
0
  const uint32_t scale_horz = plane_ptr->subsampling_x;
1428
0
  const uint32_t scale_vert = plane_ptr->subsampling_y;
1429
0
  const int dst_stride = plane_ptr->dst.stride;
1430
  // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
1431
  // to MI_SIZE.
1432
0
  const int mi_cols =
1433
0
      ((plane_ptr->dst.width << scale_horz) + MI_SIZE - 1) >> MI_SIZE_LOG2;
1434
0
  const int mi_rows =
1435
0
      ((plane_ptr->dst.height << scale_vert) + MI_SIZE - 1) >> MI_SIZE_LOG2;
1436
0
  const int plane_mi_rows = ROUND_POWER_OF_TWO(mi_rows, scale_vert);
1437
0
  const int plane_mi_cols = ROUND_POWER_OF_TWO(mi_cols, scale_horz);
1438
0
  const int y_range =
1439
0
      AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
1440
0
             ((1 << num_mis_in_lpf_unit_height_log2) >> scale_vert));
1441
0
  const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
1442
0
                             (MAX_MIB_SIZE >> scale_horz));
1443
0
  const ptrdiff_t mode_step = (ptrdiff_t)1 << scale_horz;
1444
1445
0
  for (int y = 0; y < y_range; y++) {
1446
0
    const uint32_t curr_y = mi_row + (y << scale_vert);
1447
0
    const uint32_t x_start = mi_col + (0 << scale_horz);
1448
0
    const uint32_t x_end = mi_col + (x_range << scale_horz);
1449
0
    int min_height = tx_size_high[TX_64X64];
1450
0
    set_lpf_parameters_for_line_chroma(params_buf, tx_buf, cm, xd, VERT_EDGE,
1451
0
                                       x_start, curr_y, plane_ptr, x_end,
1452
0
                                       mode_step, scale_horz, scale_vert,
1453
0
                                       &min_height, plane, joint_filter_chroma);
1454
1455
0
    AV1_DEBLOCKING_PARAMETERS *params = params_buf;
1456
0
    TX_SIZE *tx_size = tx_buf;
1457
0
    int use_filter_type = USE_SINGLE;
1458
0
    int y_inc = 0;
1459
1460
0
    if ((y & 3) == 0 && (y + 3) < y_range && min_height >= 16) {
1461
      // If we are on a row which is a multiple of 4, and the minimum height is
1462
      // 16 pixels, then the current and below 3 rows must contain the same tx
1463
      // block. This is because dim 16 can only happen every unit of 4 mi's.
1464
0
      use_filter_type = USE_QUAD;
1465
0
      y_inc = 3;
1466
0
    } else if (y % 2 == 0 && (y + 1) < y_range && min_height >= 8) {
1467
      // If we are on an even row, and the minimum height is 8 pixels, then the
1468
      // current and below rows must contain the same tx block. This is because
1469
      // dim 4 can only happen every unit of 2**0, and 8 every unit of 2**1,
1470
      // etc.
1471
0
      use_filter_type = USE_DUAL;
1472
0
      y_inc = 1;
1473
0
    }
1474
1475
0
    for (int x = 0; x < x_range;) {
1476
      // inner loop always filter vertical edges in a MI block. If MI size
1477
      // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
1478
      // If 4x4 transform is used, it will then filter the internal edge
1479
      //  aligned with a 4x4 block
1480
0
      if (*tx_size == TX_INVALID) {
1481
0
        params->filter_length = 0;
1482
0
        *tx_size = TX_4X4;
1483
0
      }
1484
1485
0
      const int offset = y * MI_SIZE * dst_stride + x * MI_SIZE;
1486
0
      if (joint_filter_chroma) {
1487
0
        uint8_t *u_dst = plane_ptr[0].dst.buf + offset;
1488
0
        uint8_t *v_dst = plane_ptr[1].dst.buf + offset;
1489
0
        filter_vert_chroma(u_dst, v_dst, dst_stride, params, cm->seq_params,
1490
0
                           use_filter_type);
1491
0
      } else {
1492
0
        uint8_t *dst_ptr = plane_ptr->dst.buf + offset;
1493
0
        filter_vert(dst_ptr, dst_stride, params, cm->seq_params,
1494
0
                    use_filter_type);
1495
0
      }
1496
1497
      // advance the destination pointer
1498
0
      const uint32_t advance_units = tx_size_wide_unit[*tx_size];
1499
0
      x += advance_units;
1500
0
      params += advance_units;
1501
0
      tx_size += advance_units;
1502
0
    }
1503
0
    y += y_inc;
1504
0
  }
1505
0
}
1506
1507
static AOM_INLINE void filter_horz(uint8_t *dst, int dst_stride,
1508
                                   const AV1_DEBLOCKING_PARAMETERS *params,
1509
                                   const SequenceHeader *seq_params,
1510
81.1M
                                   USE_FILTER_TYPE use_filter_type) {
1511
81.1M
  const loop_filter_thresh *limits = params->lfthr;
1512
81.1M
#if CONFIG_AV1_HIGHBITDEPTH
1513
81.1M
  const int use_highbitdepth = seq_params->use_highbitdepth;
1514
81.1M
  const aom_bit_depth_t bit_depth = seq_params->bit_depth;
1515
81.1M
  if (use_highbitdepth) {
1516
57.1M
    uint16_t *dst_shortptr = CONVERT_TO_SHORTPTR(dst);
1517
57.1M
    if (use_filter_type == USE_QUAD) {
1518
0
      switch (params->filter_length) {
1519
        // apply 4-tap filtering
1520
0
        case 4:
1521
0
          aom_highbd_lpf_horizontal_4_dual(
1522
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
1523
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1524
0
              bit_depth);
1525
0
          aom_highbd_lpf_horizontal_4_dual(
1526
0
              dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim,
1527
0
              limits->lim, limits->hev_thr, limits->mblim, limits->lim,
1528
0
              limits->hev_thr, bit_depth);
1529
0
          break;
1530
0
        case 6:  // apply 6-tap filter for chroma plane only
1531
0
          aom_highbd_lpf_horizontal_6_dual(
1532
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
1533
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1534
0
              bit_depth);
1535
0
          aom_highbd_lpf_horizontal_6_dual(
1536
0
              dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim,
1537
0
              limits->lim, limits->hev_thr, limits->mblim, limits->lim,
1538
0
              limits->hev_thr, bit_depth);
1539
0
          break;
1540
        // apply 8-tap filtering
1541
0
        case 8:
1542
0
          aom_highbd_lpf_horizontal_8_dual(
1543
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
1544
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1545
0
              bit_depth);
1546
0
          aom_highbd_lpf_horizontal_8_dual(
1547
0
              dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim,
1548
0
              limits->lim, limits->hev_thr, limits->mblim, limits->lim,
1549
0
              limits->hev_thr, bit_depth);
1550
0
          break;
1551
        // apply 14-tap filtering
1552
0
        case 14:
1553
0
          aom_highbd_lpf_horizontal_14_dual(
1554
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
1555
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1556
0
              bit_depth);
1557
0
          aom_highbd_lpf_horizontal_14_dual(
1558
0
              dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim,
1559
0
              limits->lim, limits->hev_thr, limits->mblim, limits->lim,
1560
0
              limits->hev_thr, bit_depth);
1561
0
          break;
1562
        // no filtering
1563
0
        default: break;
1564
0
      }
1565
57.1M
    } else if (use_filter_type == USE_DUAL) {
1566
0
      switch (params->filter_length) {
1567
        // apply 4-tap filtering
1568
0
        case 4:
1569
0
          aom_highbd_lpf_horizontal_4_dual(
1570
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
1571
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1572
0
              bit_depth);
1573
0
          break;
1574
0
        case 6:  // apply 6-tap filter for chroma plane only
1575
0
          aom_highbd_lpf_horizontal_6_dual(
1576
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
1577
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1578
0
              bit_depth);
1579
0
          break;
1580
        // apply 8-tap filtering
1581
0
        case 8:
1582
0
          aom_highbd_lpf_horizontal_8_dual(
1583
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
1584
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1585
0
              bit_depth);
1586
0
          break;
1587
        // apply 14-tap filtering
1588
0
        case 14:
1589
0
          aom_highbd_lpf_horizontal_14_dual(
1590
0
              dst_shortptr, dst_stride, limits->mblim, limits->lim,
1591
0
              limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr,
1592
0
              bit_depth);
1593
0
          break;
1594
        // no filtering
1595
0
        default: break;
1596
0
      }
1597
57.1M
    } else {
1598
57.1M
      assert(use_filter_type == USE_SINGLE);
1599
57.0M
      switch (params->filter_length) {
1600
        // apply 4-tap filtering
1601
33.2M
        case 4:
1602
33.2M
          aom_highbd_lpf_horizontal_4(dst_shortptr, dst_stride, limits->mblim,
1603
33.2M
                                      limits->lim, limits->hev_thr, bit_depth);
1604
33.2M
          break;
1605
7.14M
        case 6:  // apply 6-tap filter for chroma plane only
1606
7.14M
          aom_highbd_lpf_horizontal_6(dst_shortptr, dst_stride, limits->mblim,
1607
7.14M
                                      limits->lim, limits->hev_thr, bit_depth);
1608
7.14M
          break;
1609
        // apply 8-tap filtering
1610
3.14M
        case 8:
1611
3.14M
          aom_highbd_lpf_horizontal_8(dst_shortptr, dst_stride, limits->mblim,
1612
3.14M
                                      limits->lim, limits->hev_thr, bit_depth);
1613
3.14M
          break;
1614
        // apply 14-tap filtering
1615
8.08M
        case 14:
1616
8.08M
          aom_highbd_lpf_horizontal_14(dst_shortptr, dst_stride, limits->mblim,
1617
8.08M
                                       limits->lim, limits->hev_thr, bit_depth);
1618
8.08M
          break;
1619
        // no filtering
1620
12.7M
        default: break;
1621
57.0M
      }
1622
57.0M
    }
1623
51.5M
    return;
1624
57.1M
  }
1625
24.0M
#endif  // CONFIG_AV1_HIGHBITDEPTH
1626
24.0M
  if (use_filter_type == USE_QUAD) {
1627
    // Only one set of loop filter parameters (mblim, lim and hev_thr) is
1628
    // passed as argument to quad loop filter because quad loop filter is
1629
    // called for those cases where all the 4 set of loop filter parameters
1630
    // are equal.
1631
0
    switch (params->filter_length) {
1632
      // apply 4-tap filtering
1633
0
      case 4:
1634
0
        aom_lpf_horizontal_4_quad(dst, dst_stride, limits->mblim, limits->lim,
1635
0
                                  limits->hev_thr);
1636
0
        break;
1637
0
      case 6:  // apply 6-tap filter for chroma plane only
1638
0
        aom_lpf_horizontal_6_quad(dst, dst_stride, limits->mblim, limits->lim,
1639
0
                                  limits->hev_thr);
1640
0
        break;
1641
      // apply 8-tap filtering
1642
0
      case 8:
1643
0
        aom_lpf_horizontal_8_quad(dst, dst_stride, limits->mblim, limits->lim,
1644
0
                                  limits->hev_thr);
1645
0
        break;
1646
      // apply 14-tap filtering
1647
0
      case 14:
1648
0
        aom_lpf_horizontal_14_quad(dst, dst_stride, limits->mblim, limits->lim,
1649
0
                                   limits->hev_thr);
1650
0
        break;
1651
      // no filtering
1652
0
      default: break;
1653
0
    }
1654
24.0M
  } else if (use_filter_type == USE_DUAL) {
1655
0
    switch (params->filter_length) {
1656
      // apply 4-tap filtering
1657
0
      case 4:
1658
0
        aom_lpf_horizontal_4_dual(dst, dst_stride, limits->mblim, limits->lim,
1659
0
                                  limits->hev_thr, limits->mblim, limits->lim,
1660
0
                                  limits->hev_thr);
1661
0
        break;
1662
0
      case 6:  // apply 6-tap filter for chroma plane only
1663
0
        aom_lpf_horizontal_6_dual(dst, dst_stride, limits->mblim, limits->lim,
1664
0
                                  limits->hev_thr, limits->mblim, limits->lim,
1665
0
                                  limits->hev_thr);
1666
0
        break;
1667
      // apply 8-tap filtering
1668
0
      case 8:
1669
0
        aom_lpf_horizontal_8_dual(dst, dst_stride, limits->mblim, limits->lim,
1670
0
                                  limits->hev_thr, limits->mblim, limits->lim,
1671
0
                                  limits->hev_thr);
1672
0
        break;
1673
      // apply 14-tap filtering
1674
0
      case 14:
1675
0
        aom_lpf_horizontal_14_dual(dst, dst_stride, limits->mblim, limits->lim,
1676
0
                                   limits->hev_thr, limits->mblim, limits->lim,
1677
0
                                   limits->hev_thr);
1678
0
        break;
1679
      // no filtering
1680
0
      default: break;
1681
0
    }
1682
24.0M
  } else {
1683
24.0M
    assert(use_filter_type == USE_SINGLE);
1684
25.6M
    switch (params->filter_length) {
1685
      // apply 4-tap filtering
1686
4.64M
      case 4:
1687
4.64M
        aom_lpf_horizontal_4(dst, dst_stride, limits->mblim, limits->lim,
1688
4.64M
                             limits->hev_thr);
1689
4.64M
        break;
1690
9.91M
      case 6:  // apply 6-tap filter for chroma plane only
1691
9.91M
        aom_lpf_horizontal_6(dst, dst_stride, limits->mblim, limits->lim,
1692
9.91M
                             limits->hev_thr);
1693
9.91M
        break;
1694
      // apply 8-tap filtering
1695
4.19M
      case 8:
1696
4.19M
        aom_lpf_horizontal_8(dst, dst_stride, limits->mblim, limits->lim,
1697
4.19M
                             limits->hev_thr);
1698
4.19M
        break;
1699
      // apply 14-tap filtering
1700
8.24M
      case 14:
1701
8.24M
        aom_lpf_horizontal_14(dst, dst_stride, limits->mblim, limits->lim,
1702
8.24M
                              limits->hev_thr);
1703
8.24M
        break;
1704
      // no filtering
1705
2.35M
      default: break;
1706
25.6M
    }
1707
25.6M
  }
1708
#if !CONFIG_AV1_HIGHBITDEPTH
1709
  (void)seq_params;
1710
#endif  // !CONFIG_AV1_HIGHBITDEPTH
1711
24.0M
}
1712
1713
static AOM_INLINE void filter_horz_chroma(
1714
    uint8_t *u_dst, uint8_t *v_dst, int dst_stride,
1715
    const AV1_DEBLOCKING_PARAMETERS *params, const SequenceHeader *seq_params,
1716
0
    USE_FILTER_TYPE use_filter_type) {
1717
0
  const loop_filter_thresh *u_limits = params->lfthr;
1718
0
  const loop_filter_thresh *v_limits = params->lfthr;
1719
0
#if CONFIG_AV1_HIGHBITDEPTH
1720
0
  const int use_highbitdepth = seq_params->use_highbitdepth;
1721
0
  const aom_bit_depth_t bit_depth = seq_params->bit_depth;
1722
0
  if (use_highbitdepth) {
1723
0
    uint16_t *u_dst_shortptr = CONVERT_TO_SHORTPTR(u_dst);
1724
0
    uint16_t *v_dst_shortptr = CONVERT_TO_SHORTPTR(v_dst);
1725
0
    if (use_filter_type == USE_QUAD) {
1726
0
      switch (params->filter_length) {
1727
        // apply 4-tap filtering
1728
0
        case 4:
1729
0
          aom_highbd_lpf_horizontal_4_dual(
1730
0
              u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1731
0
              u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1732
0
              u_limits->hev_thr, bit_depth);
1733
0
          aom_highbd_lpf_horizontal_4_dual(
1734
0
              u_dst_shortptr + (2 * MI_SIZE), dst_stride, u_limits->mblim,
1735
0
              u_limits->lim, u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1736
0
              u_limits->hev_thr, bit_depth);
1737
0
          aom_highbd_lpf_horizontal_4_dual(
1738
0
              v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1739
0
              v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1740
0
              v_limits->hev_thr, bit_depth);
1741
0
          aom_highbd_lpf_horizontal_4_dual(
1742
0
              v_dst_shortptr + (2 * MI_SIZE), dst_stride, v_limits->mblim,
1743
0
              v_limits->lim, v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1744
0
              v_limits->hev_thr, bit_depth);
1745
0
          break;
1746
0
        case 6:  // apply 6-tap filter for chroma plane only
1747
0
          aom_highbd_lpf_horizontal_6_dual(
1748
0
              u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1749
0
              u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1750
0
              u_limits->hev_thr, bit_depth);
1751
0
          aom_highbd_lpf_horizontal_6_dual(
1752
0
              u_dst_shortptr + (2 * MI_SIZE), dst_stride, u_limits->mblim,
1753
0
              u_limits->lim, u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1754
0
              u_limits->hev_thr, bit_depth);
1755
0
          aom_highbd_lpf_horizontal_6_dual(
1756
0
              v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1757
0
              v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1758
0
              v_limits->hev_thr, bit_depth);
1759
0
          aom_highbd_lpf_horizontal_6_dual(
1760
0
              v_dst_shortptr + (2 * MI_SIZE), dst_stride, v_limits->mblim,
1761
0
              v_limits->lim, v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1762
0
              v_limits->hev_thr, bit_depth);
1763
0
          break;
1764
0
        case 8:
1765
0
        case 14: assert(0);
1766
        // no filtering
1767
0
        default: break;
1768
0
      }
1769
0
    } else if (use_filter_type == USE_DUAL) {
1770
0
      switch (params->filter_length) {
1771
        // apply 4-tap filtering
1772
0
        case 4:
1773
0
          aom_highbd_lpf_horizontal_4_dual(
1774
0
              u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1775
0
              u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1776
0
              u_limits->hev_thr, bit_depth);
1777
0
          aom_highbd_lpf_horizontal_4_dual(
1778
0
              v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1779
0
              v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1780
0
              v_limits->hev_thr, bit_depth);
1781
0
          break;
1782
0
        case 6:  // apply 6-tap filter for chroma plane only
1783
0
          aom_highbd_lpf_horizontal_6_dual(
1784
0
              u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim,
1785
0
              u_limits->hev_thr, u_limits->mblim, u_limits->lim,
1786
0
              u_limits->hev_thr, bit_depth);
1787
0
          aom_highbd_lpf_horizontal_6_dual(
1788
0
              v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim,
1789
0
              v_limits->hev_thr, v_limits->mblim, v_limits->lim,
1790
0
              v_limits->hev_thr, bit_depth);
1791
0
          break;
1792
0
        case 8:
1793
0
        case 14: assert(0);
1794
        // no filtering
1795
0
        default: break;
1796
0
      }
1797
0
    } else {
1798
0
      assert(use_filter_type == USE_SINGLE);
1799
0
      switch (params->filter_length) {
1800
        // apply 4-tap filtering
1801
0
        case 4:
1802
0
          aom_highbd_lpf_horizontal_4(u_dst_shortptr, dst_stride,
1803
0
                                      u_limits->mblim, u_limits->lim,
1804
0
                                      u_limits->hev_thr, bit_depth);
1805
0
          aom_highbd_lpf_horizontal_4(v_dst_shortptr, dst_stride,
1806
0
                                      v_limits->mblim, v_limits->lim,
1807
0
                                      v_limits->hev_thr, bit_depth);
1808
0
          break;
1809
0
        case 6:  // apply 6-tap filter for chroma plane only
1810
0
          aom_highbd_lpf_horizontal_6(u_dst_shortptr, dst_stride,
1811
0
                                      u_limits->mblim, u_limits->lim,
1812
0
                                      u_limits->hev_thr, bit_depth);
1813
0
          aom_highbd_lpf_horizontal_6(v_dst_shortptr, dst_stride,
1814
0
                                      v_limits->mblim, v_limits->lim,
1815
0
                                      v_limits->hev_thr, bit_depth);
1816
0
          break;
1817
0
        case 8:
1818
0
        case 14: assert(0); break;
1819
        // no filtering
1820
0
        default: break;
1821
0
      }
1822
0
    }
1823
0
    return;
1824
0
  }
1825
0
#endif  // CONFIG_AV1_HIGHBITDEPTH
1826
0
  if (use_filter_type == USE_QUAD) {
1827
    // Only one set of loop filter parameters (mblim, lim and hev_thr) is
1828
    // passed as argument to quad loop filter because quad loop filter is
1829
    // called for those cases where all the 4 set of loop filter parameters
1830
    // are equal.
1831
0
    switch (params->filter_length) {
1832
      // apply 4-tap filtering
1833
0
      case 4:
1834
0
        aom_lpf_horizontal_4_quad(u_dst, dst_stride, u_limits->mblim,
1835
0
                                  u_limits->lim, u_limits->hev_thr);
1836
0
        aom_lpf_horizontal_4_quad(v_dst, dst_stride, v_limits->mblim,
1837
0
                                  v_limits->lim, v_limits->hev_thr);
1838
0
        break;
1839
0
      case 6:  // apply 6-tap filter for chroma plane only
1840
0
        aom_lpf_horizontal_6_quad(u_dst, dst_stride, u_limits->mblim,
1841
0
                                  u_limits->lim, u_limits->hev_thr);
1842
0
        aom_lpf_horizontal_6_quad(v_dst, dst_stride, v_limits->mblim,
1843
0
                                  v_limits->lim, v_limits->hev_thr);
1844
0
        break;
1845
0
      case 8:
1846
0
      case 14: assert(0);
1847
      // no filtering
1848
0
      default: break;
1849
0
    }
1850
0
  } else if (use_filter_type == USE_DUAL) {
1851
0
    switch (params->filter_length) {
1852
      // apply 4-tap filtering
1853
0
      case 4:
1854
0
        aom_lpf_horizontal_4_dual(u_dst, dst_stride, u_limits->mblim,
1855
0
                                  u_limits->lim, u_limits->hev_thr,
1856
0
                                  u_limits->mblim, u_limits->lim,
1857
0
                                  u_limits->hev_thr);
1858
0
        aom_lpf_horizontal_4_dual(v_dst, dst_stride, v_limits->mblim,
1859
0
                                  v_limits->lim, v_limits->hev_thr,
1860
0
                                  v_limits->mblim, v_limits->lim,
1861
0
                                  v_limits->hev_thr);
1862
0
        break;
1863
0
      case 6:  // apply 6-tap filter for chroma plane only
1864
0
        aom_lpf_horizontal_6_dual(u_dst, dst_stride, u_limits->mblim,
1865
0
                                  u_limits->lim, u_limits->hev_thr,
1866
0
                                  u_limits->mblim, u_limits->lim,
1867
0
                                  u_limits->hev_thr);
1868
0
        aom_lpf_horizontal_6_dual(v_dst, dst_stride, v_limits->mblim,
1869
0
                                  v_limits->lim, v_limits->hev_thr,
1870
0
                                  v_limits->mblim, v_limits->lim,
1871
0
                                  v_limits->hev_thr);
1872
0
        break;
1873
0
      case 8:
1874
0
      case 14: assert(0);
1875
      // no filtering
1876
0
      default: break;
1877
0
    }
1878
0
  } else {
1879
0
    assert(use_filter_type == USE_SINGLE);
1880
0
    switch (params->filter_length) {
1881
      // apply 4-tap filtering
1882
0
      case 4:
1883
0
        aom_lpf_horizontal_4(u_dst, dst_stride, u_limits->mblim, u_limits->lim,
1884
0
                             u_limits->hev_thr);
1885
0
        aom_lpf_horizontal_4(v_dst, dst_stride, v_limits->mblim, v_limits->lim,
1886
0
                             u_limits->hev_thr);
1887
0
        break;
1888
0
      case 6:  // apply 6-tap filter for chroma plane only
1889
0
        aom_lpf_horizontal_6(u_dst, dst_stride, u_limits->mblim, u_limits->lim,
1890
0
                             u_limits->hev_thr);
1891
0
        aom_lpf_horizontal_6(v_dst, dst_stride, v_limits->mblim, v_limits->lim,
1892
0
                             v_limits->hev_thr);
1893
0
        break;
1894
0
      case 8:
1895
0
      case 14: assert(0); break;
1896
      // no filtering
1897
0
      default: break;
1898
0
    }
1899
0
  }
1900
#if !CONFIG_AV1_HIGHBITDEPTH
1901
  (void)seq_params;
1902
#endif  // !CONFIG_AV1_HIGHBITDEPTH
1903
0
}
1904
1905
void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
1906
                                 const MACROBLOCKD *const xd, const int plane,
1907
                                 const MACROBLOCKD_PLANE *const plane_ptr,
1908
693k
                                 const uint32_t mi_row, const uint32_t mi_col) {
1909
693k
  const uint32_t scale_horz = plane_ptr->subsampling_x;
1910
693k
  const uint32_t scale_vert = plane_ptr->subsampling_y;
1911
693k
  uint8_t *const dst_ptr = plane_ptr->dst.buf;
1912
693k
  const int dst_stride = plane_ptr->dst.stride;
1913
693k
  const int plane_mi_rows =
1914
693k
      ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
1915
693k
  const int plane_mi_cols =
1916
693k
      ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
1917
693k
  const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
1918
693k
                             (MAX_MIB_SIZE >> scale_vert));
1919
693k
  const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
1920
693k
                             (MAX_MIB_SIZE >> scale_horz));
1921
14.4M
  for (int x = 0; x < x_range; x++) {
1922
13.7M
    uint8_t *p = dst_ptr + x * MI_SIZE;
1923
88.6M
    for (int y = 0; y < y_range;) {
1924
      // inner loop always filter vertical edges in a MI block. If MI size
1925
      // is 8x8, it will first filter the vertical edge aligned with a 8x8
1926
      // block. If 4x4 transform is used, it will then filter the internal
1927
      // edge aligned with a 4x4 block
1928
74.8M
      const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
1929
74.8M
      const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
1930
74.8M
      uint32_t advance_units;
1931
74.8M
      TX_SIZE tx_size;
1932
74.8M
      AV1_DEBLOCKING_PARAMETERS params;
1933
74.8M
      memset(&params, 0, sizeof(params));
1934
1935
74.8M
      tx_size = set_lpf_parameters(
1936
74.8M
          &params, (cm->mi_params.mi_stride << scale_vert), cm, xd, HORZ_EDGE,
1937
74.8M
          curr_x, curr_y, plane, plane_ptr);
1938
74.8M
      if (tx_size == TX_INVALID) {
1939
0
        params.filter_length = 0;
1940
0
        tx_size = TX_4X4;
1941
0
      }
1942
1943
74.8M
      filter_horz(p, dst_stride, &params, cm->seq_params, USE_SINGLE);
1944
1945
      // advance the destination pointer
1946
74.8M
      advance_units = tx_size_high_unit[tx_size];
1947
74.8M
      y += advance_units;
1948
74.8M
      p += advance_units * dst_stride * MI_SIZE;
1949
74.8M
    }
1950
13.7M
  }
1951
693k
}
1952
1953
void av1_filter_block_plane_horz_opt(
1954
    const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
1955
    const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
1956
    const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
1957
0
    TX_SIZE *tx_buf, int num_mis_in_lpf_unit_height_log2) {
1958
0
  uint8_t *const dst_ptr = plane_ptr->dst.buf;
1959
0
  const int dst_stride = plane_ptr->dst.stride;
1960
  // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
1961
  // to MI_SIZE.
1962
0
  const int plane_mi_cols =
1963
0
      CEIL_POWER_OF_TWO(plane_ptr->dst.width, MI_SIZE_LOG2);
1964
0
  const int plane_mi_rows =
1965
0
      CEIL_POWER_OF_TWO(plane_ptr->dst.height, MI_SIZE_LOG2);
1966
0
  const int y_range = AOMMIN((int)(plane_mi_rows - mi_row),
1967
0
                             (1 << num_mis_in_lpf_unit_height_log2));
1968
0
  const int x_range = AOMMIN((int)(plane_mi_cols - mi_col), MAX_MIB_SIZE);
1969
1970
0
  const ptrdiff_t mode_step = cm->mi_params.mi_stride;
1971
0
  for (int x = 0; x < x_range; x++) {
1972
0
    const uint32_t curr_x = mi_col + x;
1973
0
    const uint32_t y_start = mi_row;
1974
0
    const uint32_t y_end = mi_row + y_range;
1975
0
    int min_block_width = block_size_high[BLOCK_128X128];
1976
0
    set_lpf_parameters_for_line_luma(params_buf, tx_buf, cm, xd, HORZ_EDGE,
1977
0
                                     curr_x, y_start, plane_ptr, y_end,
1978
0
                                     mode_step, &min_block_width);
1979
1980
0
    AV1_DEBLOCKING_PARAMETERS *params = params_buf;
1981
0
    TX_SIZE *tx_size = tx_buf;
1982
0
    USE_FILTER_TYPE filter_type = USE_SINGLE;
1983
1984
0
    uint8_t *p = dst_ptr + x * MI_SIZE;
1985
1986
0
    if ((x & 3) == 0 && (x + 3) < x_range && min_block_width >= 16) {
1987
      // If we are on a col which is a multiple of 4, and the minimum width is
1988
      // 16 pixels, then the current and right 3 cols must contain the same
1989
      // prediction block. This is because dim 16 can only happen every unit of
1990
      // 4 mi's.
1991
0
      filter_type = USE_QUAD;
1992
0
      x += 3;
1993
0
    } else if ((x + 1) < x_range && min_block_width >= 8) {
1994
0
      filter_type = USE_DUAL;
1995
0
      x += 1;
1996
0
    }
1997
1998
0
    for (int y = 0; y < y_range;) {
1999
0
      if (*tx_size == TX_INVALID) {
2000
0
        params->filter_length = 0;
2001
0
        *tx_size = TX_4X4;
2002
0
      }
2003
2004
0
      filter_horz(p, dst_stride, params, cm->seq_params, filter_type);
2005
2006
      // advance the destination pointer
2007
0
      const uint32_t advance_units = tx_size_high_unit[*tx_size];
2008
0
      y += advance_units;
2009
0
      p += advance_units * dst_stride * MI_SIZE;
2010
0
      params += advance_units;
2011
0
      tx_size += advance_units;
2012
0
    }
2013
0
  }
2014
0
}
2015
2016
void av1_filter_block_plane_horz_opt_chroma(
2017
    const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
2018
    const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
2019
    const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
2020
    TX_SIZE *tx_buf, int plane, bool joint_filter_chroma,
2021
0
    int num_mis_in_lpf_unit_height_log2) {
2022
0
  const uint32_t scale_horz = plane_ptr->subsampling_x;
2023
0
  const uint32_t scale_vert = plane_ptr->subsampling_y;
2024
0
  const int dst_stride = plane_ptr->dst.stride;
2025
  // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
2026
  // to MI_SIZE.
2027
0
  const int mi_cols =
2028
0
      ((plane_ptr->dst.width << scale_horz) + MI_SIZE - 1) >> MI_SIZE_LOG2;
2029
0
  const int mi_rows =
2030
0
      ((plane_ptr->dst.height << scale_vert) + MI_SIZE - 1) >> MI_SIZE_LOG2;
2031
0
  const int plane_mi_rows = ROUND_POWER_OF_TWO(mi_rows, scale_vert);
2032
0
  const int plane_mi_cols = ROUND_POWER_OF_TWO(mi_cols, scale_horz);
2033
0
  const int y_range =
2034
0
      AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
2035
0
             ((1 << num_mis_in_lpf_unit_height_log2) >> scale_vert));
2036
0
  const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
2037
0
                             (MAX_MIB_SIZE >> scale_horz));
2038
0
  const ptrdiff_t mode_step = cm->mi_params.mi_stride << scale_vert;
2039
0
  for (int x = 0; x < x_range; x++) {
2040
0
    const uint32_t y_start = mi_row + (0 << scale_vert);
2041
0
    const uint32_t curr_x = mi_col + (x << scale_horz);
2042
0
    const uint32_t y_end = mi_row + (y_range << scale_vert);
2043
0
    int min_width = tx_size_wide[TX_64X64];
2044
0
    set_lpf_parameters_for_line_chroma(params_buf, tx_buf, cm, xd, HORZ_EDGE,
2045
0
                                       curr_x, y_start, plane_ptr, y_end,
2046
0
                                       mode_step, scale_horz, scale_vert,
2047
0
                                       &min_width, plane, joint_filter_chroma);
2048
2049
0
    AV1_DEBLOCKING_PARAMETERS *params = params_buf;
2050
0
    TX_SIZE *tx_size = tx_buf;
2051
0
    USE_FILTER_TYPE use_filter_type = USE_SINGLE;
2052
0
    int x_inc = 0;
2053
2054
0
    if ((x & 3) == 0 && (x + 3) < x_range && min_width >= 16) {
2055
      // If we are on a col which is a multiple of 4, and the minimum width is
2056
      // 16 pixels, then the current and right 3 cols must contain the same tx
2057
      // block. This is because dim 16 can only happen every unit of 4 mi's.
2058
0
      use_filter_type = USE_QUAD;
2059
0
      x_inc = 3;
2060
0
    } else if (x % 2 == 0 && (x + 1) < x_range && min_width >= 8) {
2061
      // If we are on an even col, and the minimum width is 8 pixels, then the
2062
      // current and left cols must contain the same tx block. This is because
2063
      // dim 4 can only happen every unit of 2**0, and 8 every unit of 2**1,
2064
      // etc.
2065
0
      use_filter_type = USE_DUAL;
2066
0
      x_inc = 1;
2067
0
    }
2068
2069
0
    for (int y = 0; y < y_range;) {
2070
      // inner loop always filter vertical edges in a MI block. If MI size
2071
      // is 8x8, it will first filter the vertical edge aligned with a 8x8
2072
      // block. If 4x4 transform is used, it will then filter the internal
2073
      // edge aligned with a 4x4 block
2074
0
      if (*tx_size == TX_INVALID) {
2075
0
        params->filter_length = 0;
2076
0
        *tx_size = TX_4X4;
2077
0
      }
2078
2079
0
      const int offset = y * MI_SIZE * dst_stride + x * MI_SIZE;
2080
0
      if (joint_filter_chroma) {
2081
0
        uint8_t *u_dst = plane_ptr[0].dst.buf + offset;
2082
0
        uint8_t *v_dst = plane_ptr[1].dst.buf + offset;
2083
0
        filter_horz_chroma(u_dst, v_dst, dst_stride, params, cm->seq_params,
2084
0
                           use_filter_type);
2085
0
      } else {
2086
0
        uint8_t *dst_ptr = plane_ptr->dst.buf + offset;
2087
0
        filter_horz(dst_ptr, dst_stride, params, cm->seq_params,
2088
0
                    use_filter_type);
2089
0
      }
2090
2091
      // advance the destination pointer
2092
0
      const int advance_units = tx_size_high_unit[*tx_size];
2093
0
      y += advance_units;
2094
0
      params += advance_units;
2095
0
      tx_size += advance_units;
2096
0
    }
2097
0
    x += x_inc;
2098
0
  }
2099
0
}