Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/cdef.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10
 */
11
12
#include "cdef.h"
13
#include "common_dsp_rtcd.h"
14
#include "bitstream_unit.h"
15
16
0
static INLINE int32_t sign(int32_t i) {
17
0
    return i < 0 ? -1 : 1;
18
0
}
19
20
0
static INLINE int32_t constrain(int32_t diff, int32_t threshold, int32_t damping) {
21
0
    if (!threshold) {
22
0
        return 0;
23
0
    }
24
25
0
    const int32_t shift = AOMMAX(0, damping - get_msb(threshold));
26
0
    return sign(diff) * AOMMIN(abs(diff), AOMMAX(0, threshold - (abs(diff) >> shift)));
27
0
}
28
29
/*
30
This is Cdef_Directions (section 7.15.3) with 2 padding entries at the
31
beginning and end of the table. The cdef direction range is [0, 7] and the
32
first index is offset +/-2. This removes the need to constrain the first
33
index to the same range using e.g., & 7.
34
*/
35
DECLARE_ALIGNED(16, const int, eb_cdef_directions_padded[12][2]) = {
36
    /* Padding: svt_aom_eb_cdef_directions[6] */
37
    {1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0},
38
    /* Padding: svt_aom_eb_cdef_directions[7] */
39
    {1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1},
40
41
    /* Begin svt_aom_eb_cdef_directions */
42
    {-1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2},
43
    {0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2},
44
    {0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2},
45
    {0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2},
46
    {1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2},
47
    {1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1},
48
    {1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0},
49
    {1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1},
50
    /* End svt_aom_eb_cdef_directions */
51
52
    /* Padding: svt_aom_eb_cdef_directions[0] */
53
    {-1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2},
54
    /* Padding: svt_aom_eb_cdef_directions[1] */
55
    {0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2},
56
};
57
58
const int (*const svt_aom_eb_cdef_directions)[2] = eb_cdef_directions_padded + 2;
59
60
/* Compute the primary filter strength for an 8x8 block based on the
61
directional variance difference. A high variance difference means
62
that we have a highly directional pattern (e.g. a high contrast
63
edge), so we can apply more deringing. A low variance means that we
64
either have a low contrast edge, or a non-directional texture, so
65
we want to be careful not to blur. */
66
0
static INLINE int32_t adjust_strength(int32_t strength, int32_t var) {
67
0
    const int32_t i = (var >> 6) ? AOMMIN(get_msb(var >> 6), 12) : 0;
68
    /* We use the variance of 8x8 blocks to adjust the strength. */
69
0
    return var ? (strength * (4 + i) + 8) >> 4 : 0;
70
0
}
71
72
void svt_aom_copy_rect8_8bit_to_16bit_c(uint16_t* dst, int32_t dstride, const uint8_t* src, int32_t sstride, int32_t v,
73
0
                                        int32_t h) {
74
0
    for (int32_t i = 0; i < v; i++) {
75
0
        for (int32_t j = 0; j < h; j++) {
76
0
            dst[i * dstride + j] = src[i * sstride + j];
77
0
        }
78
0
    }
79
0
}
80
81
/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
82
The search minimizes the weighted variance along all the lines in a
83
particular direction, i.e. the squared error between the input and a
84
"predicted" block where each pixel is replaced by the average along a line
85
in a particular direction. Since each direction have the same sum(x^2) term,
86
that term is never computed. See Section 2, step 2, of:
87
http://jmvalin.ca/notes/intra_paint.pdf */
88
0
uint8_t svt_aom_cdef_find_dir_c(const uint16_t* img, int32_t stride, int32_t* var, int32_t coeff_shift) {
89
0
    int32_t cost[8]        = {0};
90
0
    int32_t partial[8][15] = {{0}};
91
0
    int32_t best_cost      = 0;
92
0
    uint8_t i;
93
0
    uint8_t best_dir = 0;
94
    /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
95
    The output is then 840 times larger, but we don't care for finding
96
    the max. */
97
0
    static const int32_t div_table[] = {0, 840, 420, 280, 210, 168, 140, 120, 105};
98
0
    for (i = 0; i < 8; i++) {
99
0
        int32_t j;
100
0
        for (j = 0; j < 8; j++) {
101
0
            int32_t x;
102
            /* We subtract 128 here to reduce the maximum range of the squared
103
            partial sums. */
104
0
            x = (img[i * stride + j] >> coeff_shift) - 128;
105
0
            partial[0][i + j] += x;
106
0
            partial[1][i + j / 2] += x;
107
0
            partial[2][i] += x;
108
0
            partial[3][3 + i - j / 2] += x;
109
0
            partial[4][7 + i - j] += x;
110
0
            partial[5][3 - i / 2 + j] += x;
111
0
            partial[6][j] += x;
112
0
            partial[7][i / 2 + j] += x;
113
0
        }
114
0
    }
115
0
    for (i = 0; i < 8; i++) {
116
0
        cost[2] += partial[2][i] * partial[2][i];
117
0
        cost[6] += partial[6][i] * partial[6][i];
118
0
    }
119
0
    cost[2] *= div_table[8];
120
0
    cost[6] *= div_table[8];
121
0
    for (i = 0; i < 7; i++) {
122
0
        cost[0] += (partial[0][i] * partial[0][i] + partial[0][14 - i] * partial[0][14 - i]) * div_table[i + 1];
123
0
        cost[4] += (partial[4][i] * partial[4][i] + partial[4][14 - i] * partial[4][14 - i]) * div_table[i + 1];
124
0
    }
125
0
    cost[0] += partial[0][7] * partial[0][7] * div_table[8];
126
0
    cost[4] += partial[4][7] * partial[4][7] * div_table[8];
127
0
    for (i = 1; i < 8; i += 2) {
128
0
        int32_t j;
129
0
        for (j = 0; j < 4 + 1; j++) {
130
0
            cost[i] += partial[i][3 + j] * partial[i][3 + j];
131
0
        }
132
0
        cost[i] *= div_table[8];
133
0
        for (j = 0; j < 4 - 1; j++) {
134
0
            cost[i] += (partial[i][j] * partial[i][j] + partial[i][10 - j] * partial[i][10 - j]) * div_table[2 * j + 2];
135
0
        }
136
0
    }
137
0
    for (i = 0; i < 8; i++) {
138
0
        if (cost[i] > best_cost) {
139
0
            best_cost = cost[i];
140
0
            best_dir  = i;
141
0
        }
142
0
    }
143
    /* Difference between the optimal variance and the variance along the
144
    orthogonal direction. Again, the sum(x^2) terms cancel out. */
145
0
    *var = best_cost - cost[(best_dir + 4) & 7];
146
    /* We'd normally divide by 840, but dividing by 1024 is close enough
147
    for what we're going to do with this. */
148
0
    *var >>= 10;
149
0
    return best_dir;
150
0
}
151
152
void svt_aom_cdef_find_dir_dual_c(const uint16_t* img1, const uint16_t* img2, int stride, int32_t* var1, int32_t* var2,
153
0
                                  int32_t coeff_shift, uint8_t* out1, uint8_t* out2) {
154
0
    *out1 = svt_aom_cdef_find_dir_c(img1, stride, var1, coeff_shift);
155
0
    *out2 = svt_aom_cdef_find_dir_c(img2, stride, var2, coeff_shift);
156
0
}
157
158
static AOM_INLINE void cdef_find_dir(uint16_t* in, CdefList* dlist, int32_t var[CDEF_NBLOCKS][CDEF_NBLOCKS],
159
0
                                     int32_t cdef_count, int32_t coeff_shift, uint8_t dir[CDEF_NBLOCKS][CDEF_NBLOCKS]) {
160
0
    int bi;
161
162
    // Find direction of two 8x8 blocks together.
163
0
    for (bi = 0; bi < cdef_count - 1; bi += 2) {
164
0
        const uint8_t by   = dlist[bi].by;
165
0
        const uint8_t bx   = dlist[bi].bx;
166
0
        const uint8_t by2  = dlist[bi + 1].by;
167
0
        const uint8_t bx2  = dlist[bi + 1].bx;
168
0
        const int     pos1 = 8 * by * CDEF_BSTRIDE + 8 * bx;
169
0
        const int     pos2 = 8 * by2 * CDEF_BSTRIDE + 8 * bx2;
170
0
        svt_aom_cdef_find_dir_dual(&in[pos1],
171
0
                                   &in[pos2],
172
0
                                   CDEF_BSTRIDE,
173
0
                                   &var[by][bx],
174
0
                                   &var[by2][bx2],
175
0
                                   coeff_shift,
176
0
                                   &dir[by][bx],
177
0
                                   &dir[by2][bx2]);
178
0
    }
179
180
    // Process remaining 8x8 blocks here. One 8x8 at a time.
181
0
    if (cdef_count % 2) {
182
0
        const uint8_t by = dlist[bi].by;
183
0
        const uint8_t bx = dlist[bi].bx;
184
0
        dir[by][bx]      = svt_aom_cdef_find_dir(
185
0
            &in[8 * by * CDEF_BSTRIDE + 8 * bx], CDEF_BSTRIDE, &var[by][bx], coeff_shift);
186
0
    }
187
0
}
188
189
const int32_t svt_aom_eb_cdef_pri_taps[2][2] = {{4, 2}, {3, 3}};
190
const int32_t svt_aom_eb_cdef_sec_taps[2][2] = {{2, 1}, {2, 1}};
191
192
/* Smooth in the direction detected. */
193
void svt_cdef_filter_block_c(uint8_t* dst8, uint16_t* dst16, int32_t dstride, const uint16_t* in, int32_t pri_strength,
194
                             int32_t sec_strength, int32_t dir, int32_t pri_damping, int32_t sec_damping, int32_t bsize,
195
0
                             int32_t coeff_shift, uint8_t subsampling_factor) {
196
0
    int32_t        i, j, k;
197
0
    const int32_t  s        = CDEF_BSTRIDE;
198
0
    const int32_t* pri_taps = svt_aom_eb_cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
199
0
    const int32_t* sec_taps = svt_aom_eb_cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
200
201
0
    for (i = 0; i < (4 << (int32_t)(bsize == BLOCK_8X8 || bsize == BLOCK_4X8)); i += subsampling_factor) {
202
0
        for (j = 0; j < (4 << (int32_t)(bsize == BLOCK_8X8 || bsize == BLOCK_8X4)); j++) {
203
0
            int16_t sum = 0;
204
0
            int16_t y;
205
0
            int16_t x   = in[i * s + j];
206
0
            int32_t max = x;
207
0
            int32_t min = x;
208
0
            for (k = 0; k < 2; k++) {
209
0
                int16_t p0 = in[i * s + j + svt_aom_eb_cdef_directions[dir][k]];
210
0
                int16_t p1 = in[i * s + j - svt_aom_eb_cdef_directions[dir][k]];
211
0
                sum += (int16_t)(pri_taps[k] * constrain(p0 - x, pri_strength, pri_damping));
212
0
                sum += (int16_t)(pri_taps[k] * constrain(p1 - x, pri_strength, pri_damping));
213
0
                if (p0 != CDEF_VERY_LARGE) {
214
0
                    max = AOMMAX(p0, max);
215
0
                }
216
0
                if (p1 != CDEF_VERY_LARGE) {
217
0
                    max = AOMMAX(p1, max);
218
0
                }
219
0
                min        = AOMMIN(p0, min);
220
0
                min        = AOMMIN(p1, min);
221
0
                int16_t s0 = in[i * s + j + svt_aom_eb_cdef_directions[(dir + 2)][k]];
222
0
                int16_t s1 = in[i * s + j - svt_aom_eb_cdef_directions[(dir + 2)][k]];
223
0
                int16_t s2 = in[i * s + j + svt_aom_eb_cdef_directions[(dir - 2)][k]];
224
0
                int16_t s3 = in[i * s + j - svt_aom_eb_cdef_directions[(dir - 2)][k]];
225
0
                if (s0 != CDEF_VERY_LARGE) {
226
0
                    max = AOMMAX(s0, max);
227
0
                }
228
0
                if (s1 != CDEF_VERY_LARGE) {
229
0
                    max = AOMMAX(s1, max);
230
0
                }
231
0
                if (s2 != CDEF_VERY_LARGE) {
232
0
                    max = AOMMAX(s2, max);
233
0
                }
234
0
                if (s3 != CDEF_VERY_LARGE) {
235
0
                    max = AOMMAX(s3, max);
236
0
                }
237
0
                min = AOMMIN(s0, min);
238
0
                min = AOMMIN(s1, min);
239
0
                min = AOMMIN(s2, min);
240
0
                min = AOMMIN(s3, min);
241
0
                sum += (int16_t)(sec_taps[k] * constrain(s0 - x, sec_strength, sec_damping));
242
0
                sum += (int16_t)(sec_taps[k] * constrain(s1 - x, sec_strength, sec_damping));
243
0
                sum += (int16_t)(sec_taps[k] * constrain(s2 - x, sec_strength, sec_damping));
244
0
                sum += (int16_t)(sec_taps[k] * constrain(s3 - x, sec_strength, sec_damping));
245
0
            }
246
0
            y = (int16_t)clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), min, max);
247
0
            if (dst8) {
248
0
                dst8[i * dstride + j] = (uint8_t)y;
249
0
            } else {
250
0
                dst16[i * dstride + j] = (uint16_t)y;
251
0
            }
252
0
        }
253
0
    }
254
0
}
255
256
void svt_aom_copy_sb8_16(uint16_t* dst, int32_t dstride, const uint8_t* src, int32_t src_voffset, int32_t src_hoffset,
257
0
                         int32_t sstride, int32_t vsize, int32_t hsize, bool is_16bit) {
258
0
    if (is_16bit) {
259
0
        const uint16_t* base = ((uint16_t*)src) + (src_voffset * sstride + src_hoffset);
260
0
        for (int r = 0; r < vsize; r++) {
261
0
            svt_memcpy(dst, base, 2 * hsize);
262
0
            dst += dstride;
263
0
            base += sstride;
264
0
        }
265
0
    } else {
266
0
        const uint8_t* base = &src[src_voffset * sstride + src_hoffset];
267
0
        svt_aom_copy_rect8_8bit_to_16bit(dst, dstride, base, sstride, vsize, hsize);
268
0
    }
269
0
}
270
271
/*
272
 * Loop over the non-skip 8x8 blocks.  For each block, find the CDEF direction, then apply the specified filter.
273
*/
274
void svt_cdef_filter_fb(uint8_t* dst8, uint16_t* dst16, int32_t dstride, uint16_t* in, int32_t xdec, int32_t ydec,
275
                        uint8_t dir[CDEF_NBLOCKS][CDEF_NBLOCKS], int32_t* dirinit,
276
                        int32_t var[CDEF_NBLOCKS][CDEF_NBLOCKS], int32_t pli, CdefList* dlist, int32_t cdef_count,
277
                        int32_t level, int32_t sec_strength, int32_t pri_damping, int32_t sec_damping,
278
0
                        int32_t coeff_shift, uint8_t subsampling_factor) {
279
0
    int32_t bi;
280
0
    int32_t pri_strength = level << coeff_shift;
281
0
    sec_strength <<= coeff_shift;
282
0
    sec_damping += coeff_shift - (pli != PLANE_Y);
283
0
    pri_damping += coeff_shift - (pli != PLANE_Y);
284
285
0
    int32_t bsize  = ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
286
0
    int32_t bsizex = 3 - xdec;
287
0
    int32_t bsizey = 3 - ydec;
288
289
0
    if (!dstride && pri_strength == 0 && sec_strength == 0) {
290
        // If we're here, both primary and secondary strengths are 0, and
291
        // we still haven't written anything to y[] yet, so we just copy
292
        // the input to y[]. This is necessary only for svt_av1_cdef_search()
293
        // and only svt_av1_cdef_search() sets dirinit.
294
0
        for (bi = 0; bi < cdef_count; bi++) {
295
0
            int32_t   by = dlist[bi].by << bsizey;
296
0
            int32_t   bx = dlist[bi].bx << bsizex;
297
0
            int32_t   iy;
298
0
            uint16_t* src_16 = in + (by * CDEF_BSTRIDE + bx);
299
0
            if (dst8) {
300
0
                uint8_t* dst_8 = dst8 + (bi << (bsizex + bsizey));
301
                //size 2x2 and 3x3, no gain to use SIMD
302
0
                for (iy = 0; iy < 1 << bsizey; iy += subsampling_factor) {
303
0
                    for (int32_t ix = 0; ix < 1 << bsizex; ix++) {
304
0
                        dst_8[(iy << bsizex) + ix] = (uint8_t)src_16[iy * CDEF_BSTRIDE + ix];
305
0
                    }
306
0
                }
307
0
            } else {
308
0
                uint16_t* dst_16 = dst16 + (bi << (bsizex + bsizey));
309
0
                for (iy = 0; iy < 1 << bsizey; iy += subsampling_factor) {
310
0
                    memcpy(dst_16 + (iy << bsizex),
311
0
                           src_16 + iy * CDEF_BSTRIDE,
312
0
                           (uint32_t)(1 << bsizex) * sizeof(uint16_t));
313
0
                }
314
0
            }
315
0
        }
316
0
        return;
317
0
    }
318
319
0
    if (pli == 0) {
320
0
        if (!dirinit || !*dirinit) {
321
0
            cdef_find_dir(in, dlist, var, cdef_count, coeff_shift, dir);
322
0
            if (dirinit) {
323
0
                *dirinit = 1;
324
0
            }
325
0
        }
326
0
    } else if (pli == 1 && xdec != ydec) {
327
0
        for (bi = 0; bi < cdef_count; bi++) {
328
0
            static const uint8_t conv422[8] = {7, 0, 2, 4, 5, 6, 6, 6};
329
0
            static const uint8_t conv440[8] = {1, 2, 2, 2, 3, 4, 6, 0};
330
331
0
            int32_t by  = dlist[bi].by;
332
0
            int32_t bx  = dlist[bi].bx;
333
0
            dir[by][bx] = (xdec ? conv422 : conv440)[dir[by][bx]];
334
0
        }
335
0
    }
336
337
0
    for (bi = 0; bi < cdef_count; bi++) {
338
0
        int32_t by = dlist[bi].by;
339
0
        int32_t bx = dlist[bi].bx;
340
0
        int32_t t  = pli ? pri_strength : adjust_strength(pri_strength, var[by][bx]);
341
0
        int32_t k  = dstride ? (by << bsizey) * dstride + (bx << bsizex) : bi << (bsizex + bsizey);
342
0
        svt_cdef_filter_block(dst8 ? &dst8[k] : NULL,
343
0
                              dst8 ? NULL : &dst16[k],
344
0
                              dstride ? dstride : 1 << bsizex,
345
0
                              &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
346
0
                              t,
347
0
                              sec_strength,
348
0
                              pri_strength ? dir[by][bx] : 0,
349
0
                              pri_damping,
350
0
                              sec_damping,
351
0
                              bsize,
352
0
                              coeff_shift,
353
0
                              subsampling_factor);
354
0
    }
355
0
}