Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/common/deblock.cpp
Line
Count
Source
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Author: Gopu Govindaswamy <gopu@multicorewareinc.com>
5
*         Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "deblock.h"
27
#include "framedata.h"
28
#include "picyuv.h"
29
#include "slice.h"
30
#include "mv.h"
31
32
using namespace X265_NS;
33
34
139k
#define DEBLOCK_SMALLEST_BLOCK  8
35
728k
#define DEFAULT_INTRA_TC_OFFSET 2
36
37
void Deblock::deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir)
38
25.8k
{
39
25.8k
    uint8_t blockStrength[MAX_NUM_PARTITIONS];
40
41
25.8k
    memset(blockStrength, 0, sizeof(uint8_t) * cuGeom.numPartitions);
42
43
25.8k
    deblockCU(ctu, cuGeom, dir, blockStrength);
44
25.8k
}
45
46
static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir)
47
69.6k
{
48
69.6k
    if (dir == Deblock::EDGE_VER)
49
34.8k
    {
50
34.8k
        if (cu->m_cuPelX + g_zscanToPelX[absPartIdx] > 0)
51
30.2k
        {
52
30.2k
            uint32_t    tempPartIdx;
53
30.2k
            const CUData* tempCU = cu->getPULeft(tempPartIdx, absPartIdx);
54
30.2k
            return tempCU ? 2 : 0;
55
30.2k
        }
56
34.8k
    }
57
34.8k
    else
58
34.8k
    {
59
34.8k
        if (cu->m_cuPelY + g_zscanToPelY[absPartIdx] > 0)
60
30.9k
        {
61
30.9k
            uint32_t    tempPartIdx;
62
30.9k
            const CUData* tempCU = cu->getPUAbove(tempPartIdx, absPartIdx);
63
30.9k
            return tempCU ? 2 : 0;
64
30.9k
        }
65
34.8k
    }
66
67
8.44k
    return 0;
68
69.6k
}
69
70
/* Deblocking filter process in CU-based (the same function as conventional's)
71
 * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
72
void Deblock::deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[])
73
96.4k
{
74
96.4k
    uint32_t absPartIdx = cuGeom.absPartIdx;
75
96.4k
    uint32_t depth = cuGeom.depth;
76
96.4k
    if (cu->m_predMode[absPartIdx] == MODE_NONE)
77
0
        return;
78
79
96.4k
    if (cu->m_cuDepth[absPartIdx] > depth)
80
26.7k
    {
81
133k
        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
82
107k
        {
83
107k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
84
107k
            if (childGeom.flags & CUGeom::PRESENT)
85
70.6k
                deblockCU(cu, childGeom, dir, blockStrength);
86
107k
        }
87
26.7k
        return;
88
26.7k
    }
89
90
69.6k
    uint32_t numUnits = 1 << (cuGeom.log2CUSize - LOG2_UNIT_SIZE);
91
69.6k
    setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits);
92
69.6k
    setEdgefilterTU(cu, absPartIdx, 0, dir, blockStrength);
93
69.6k
    setEdgefilterMultiple(absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits);
94
95
69.6k
    uint32_t numParts = cuGeom.numPartitions;
96
2.16M
    for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + numParts; partIdx++)
97
2.09M
    {
98
2.09M
        uint32_t bsCheck = !(partIdx & (1 << dir));
99
100
2.09M
        if (bsCheck && blockStrength[partIdx])
101
291k
            blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength);
102
2.09M
    }
103
104
69.6k
    const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
105
69.6k
    uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift;
106
69.6k
    uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
107
69.6k
    uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE;
108
        
109
239k
    for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
110
170k
    {
111
170k
        edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
112
170k
        if (!((e0 + e) & chromaMask) && cu->m_chromaFormat != X265_CSP_I400)
113
89.6k
            edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength);
114
170k
    }
115
69.6k
}
116
117
static inline uint32_t calcBsIdx(uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
118
1.99M
{
119
1.99M
    if (dir)
120
997k
        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (edgeIdx << LOG2_RASTER_SIZE) + baseUnitIdx];
121
997k
    else
122
997k
        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (baseUnitIdx << LOG2_RASTER_SIZE) + edgeIdx];
123
1.99M
}
124
125
void Deblock::setEdgefilterMultiple(uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits)
126
140k
{
127
140k
    X265_CHECK(numUnits > 0, "numUnits edge filter check\n");
128
820k
    for (uint32_t i = 0; i < numUnits; i++)
129
680k
    {
130
680k
        const uint32_t bsidx = calcBsIdx(scanIdx, dir, edgeIdx, i);
131
680k
        blockStrength[bsidx] = value;
132
680k
    }
133
140k
}
134
135
void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[])
136
70.6k
{
137
70.6k
    uint32_t log2TrSize = cu->m_log2CUSize[absPartIdx] - tuDepth;
138
70.6k
    if (cu->m_tuDepth[absPartIdx] > tuDepth)
139
256
    {
140
256
        uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE - 1) * 2;
141
1.28k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
142
1.02k
            setEdgefilterTU(cu, absPartIdx, tuDepth + 1, dir, blockStrength);
143
256
        return;
144
256
    }
145
146
70.4k
    uint32_t numUnits = 1 << (log2TrSize - LOG2_UNIT_SIZE);
147
70.4k
    setEdgefilterMultiple(absPartIdx, dir, 0, 2, blockStrength, numUnits);
148
70.4k
}
149
150
void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits)
151
69.6k
{
152
69.6k
    const uint32_t hNumUnits = numUnits >> 1;
153
69.6k
    const uint32_t qNumUnits = numUnits >> 2;
154
155
69.6k
    switch (cu->m_partSize[absPartIdx])
156
69.6k
    {
157
0
    case SIZE_2NxN:
158
0
        if (EDGE_HOR == dir)
159
0
            setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
160
0
        break;
161
0
    case SIZE_Nx2N:
162
0
        if (EDGE_VER == dir)
163
0
            setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
164
0
        break;
165
0
    case SIZE_NxN:
166
0
        setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
167
0
        break;
168
0
    case SIZE_2NxnU:
169
0
        if (EDGE_HOR == dir)
170
0
            setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
171
0
        break;
172
0
    case SIZE_nLx2N:
173
0
        if (EDGE_VER == dir)
174
0
            setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
175
0
        break;
176
0
    case SIZE_2NxnD:
177
0
        if (EDGE_HOR == dir)
178
0
            setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
179
0
        break;
180
0
    case SIZE_nRx2N:
181
0
        if (EDGE_VER == dir)
182
0
            setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
183
0
        break;
184
185
69.6k
    case SIZE_2Nx2N:
186
69.6k
    default:
187
69.6k
        break;
188
69.6k
    }
189
69.6k
}
190
191
uint8_t Deblock::getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[])
192
291k
{
193
    // Calculate block index
194
291k
    uint32_t partP;
195
291k
    const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
196
197
    // Set BS for Intra MB : BS = 2
198
291k
    if (cuP->isIntra(partP) || cuQ->isIntra(partQ))
199
291k
        return 2;
200
201
    // Set BS for not Intra MB : BS = 1 or 0
202
0
    if (blockStrength[partQ] > 1 &&
203
0
        (cuQ->getCbf(partQ, TEXT_LUMA, cuQ->m_tuDepth[partQ]) ||
204
0
         cuP->getCbf(partP, TEXT_LUMA, cuP->m_tuDepth[partP])))
205
0
        return 1;
206
207
0
    static const MV zeroMv(0, 0);
208
0
    const Slice* const sliceQ = cuQ->m_slice;
209
0
    const Slice* const sliceP = cuP->m_slice;
210
0
    const Frame* refP0 = (cuP->m_refIdx[0][partP] >= 0) ? sliceP->m_refFrameList[0][cuP->m_refIdx[0][partP]] : NULL;
211
0
    const Frame* refQ0 = (cuQ->m_refIdx[0][partQ] >= 0) ? sliceQ->m_refFrameList[0][cuQ->m_refIdx[0][partQ]] : NULL;
212
0
    const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv;
213
0
    const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv;
214
0
    if (sliceQ->isInterP() && sliceP->isInterP())
215
0
    {
216
0
        return ((refP0 != refQ0) ||
217
0
                (abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4)) ? 1 : 0;
218
0
    }
219
    // (sliceQ->isInterB() || sliceP->isInterB())
220
0
    const Frame* refP1 = (cuP->m_refIdx[1][partP] >= 0) ? sliceP->m_refFrameList[1][cuP->m_refIdx[1][partP]] : NULL;
221
0
    const Frame* refQ1 = (cuQ->m_refIdx[1][partQ] >= 0) ? sliceQ->m_refFrameList[1][cuQ->m_refIdx[1][partQ]] : NULL;
222
0
    const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv;
223
0
    const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv;
224
225
0
    if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0)))
226
0
    {
227
0
        if (refP0 != refP1) // Different L0 & L1
228
0
        {
229
0
            if (refP0 == refQ0)
230
0
                return ((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
231
0
                        (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) ? 1 : 0;
232
0
            else
233
0
                return ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
234
0
                        (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4)) ? 1 : 0;
235
0
        }
236
0
        else // Same L0 & L1
237
0
        {
238
0
            return (((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
239
0
                     (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) &&
240
0
                    ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
241
0
                     (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4))) ? 1 : 0;
242
0
        }
243
0
    }
244
        
245
    // for all different Ref_Idx
246
0
    return 1;
247
0
}
248
249
static inline int32_t calcDP(pixel* src, intptr_t offset)
250
427k
{
251
427k
    return abs(static_cast<int32_t>(src[-offset * 3]) - 2 * src[-offset * 2] + src[-offset]);
252
427k
}
253
254
static inline int32_t calcDQ(pixel* src, intptr_t offset)
255
427k
{
256
427k
    return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
257
427k
}
258
259
static inline bool useStrongFiltering(intptr_t offset, int32_t beta, int32_t tc, pixel* src)
260
311k
{
261
311k
    int16_t m4     = (int16_t)src[0];
262
311k
    int16_t m3     = (int16_t)src[-offset];
263
311k
    int16_t m7     = (int16_t)src[offset * 3];
264
311k
    int16_t m0     = (int16_t)src[-offset * 4];
265
311k
    int32_t strong = abs(m0 - m3) + abs(m7 - m4);
266
267
311k
    return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
268
311k
}
269
270
/* Deblocking for the luminance component with strong or weak filter
271
 * \param src     pointer to picture data
272
 * \param offset  offset value for picture data
273
 * \param tc      tc value
274
 * \param maskP   indicator to enable filtering on partP
275
 * \param maskQ   indicator to enable filtering on partQ
276
 * \param maskP1  decision weak filter/no filter for partP
277
 * \param maskQ1  decision weak filter/no filter for partQ */
278
static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ,
279
                                 int32_t maskP1, int32_t maskQ1)
280
4.38k
{
281
4.38k
    int32_t thrCut = tc * 10;
282
4.38k
    int32_t tc2 = tc >> 1;
283
4.38k
    maskP1 &= maskP;
284
4.38k
    maskQ1 &= maskQ;
285
286
21.9k
    for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
287
17.5k
    {
288
17.5k
        int16_t m4  = (int16_t)src[0];
289
17.5k
        int16_t m3  = (int16_t)src[-offset];
290
17.5k
        int16_t m5  = (int16_t)src[offset];
291
17.5k
        int16_t m2  = (int16_t)src[-offset * 2];
292
293
17.5k
        int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
294
295
17.5k
        if (abs(delta) < thrCut)
296
17.5k
        {
297
17.5k
            delta = x265_clip3(-tc, tc, delta);
298
299
17.5k
            src[-offset] = x265_clip(m3 + (delta & maskP));
300
17.5k
            src[0] = x265_clip(m4 - (delta & maskQ));
301
17.5k
            if (maskP1)
302
17.5k
            {
303
17.5k
                int16_t m1  = (int16_t)src[-offset * 3];
304
17.5k
                int32_t delta1 = x265_clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
305
17.5k
                src[-offset * 2] = x265_clip(m2 + delta1);
306
17.5k
            }
307
17.5k
            if (maskQ1)
308
17.5k
            {
309
17.5k
                int16_t m6  = (int16_t)src[offset * 2];
310
17.5k
                int32_t delta2 = x265_clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
311
17.5k
                src[offset] = x265_clip(m5 + delta2);
312
17.5k
            }
313
17.5k
        }
314
17.5k
    }
315
4.38k
}
316
317
void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
318
170k
{
319
170k
    PicYuv* reconPic = cuQ->m_encData->m_reconPic[0];
320
170k
    pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx);
321
170k
    intptr_t stride = reconPic->m_stride;
322
170k
    const PPS* pps = cuQ->m_slice->m_pps;
323
324
170k
    intptr_t offset, srcStep;
325
326
170k
    int32_t maskP = -1;
327
170k
    int32_t maskQ = -1;
328
170k
    int32_t betaOffset = pps->deblockingFilterBetaOffsetDiv2 << 1;
329
170k
    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
330
170k
    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
331
332
170k
    if (dir == EDGE_VER)
333
85.0k
    {
334
85.0k
        offset = 1;
335
85.0k
        srcStep = stride;
336
85.0k
        src += (edge << LOG2_UNIT_SIZE);
337
85.0k
    }
338
85.0k
    else // (dir == EDGE_HOR)
339
85.0k
    {
340
85.0k
        offset = stride;
341
85.0k
        srcStep = 1;
342
85.0k
        src += (edge << LOG2_UNIT_SIZE) * stride;
343
85.0k
    }
344
345
170k
    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth;
346
1.21M
    for (uint32_t idx = 0; idx < numUnits; idx++)
347
1.04M
    {
348
1.04M
        uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx);
349
1.04M
        uint32_t bs = blockStrength[partQ];
350
351
1.04M
        if (!bs)
352
756k
            continue;
353
354
        // Derive neighboring PU index
355
291k
        uint32_t partP;
356
291k
        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
357
358
291k
        if (bCheckNoFilter)
359
77.5k
        {
360
            // check if each of PUs is lossless coded
361
77.5k
            maskP = cuP->m_tqBypass[partP] - 1;
362
77.5k
            maskQ = cuQ->m_tqBypass[partQ] - 1;
363
77.5k
            if (!(maskP | maskQ))
364
77.5k
                continue;
365
77.5k
        }
366
367
213k
        int32_t qpQ = cuQ->m_qp[partQ];
368
213k
        int32_t qpP = cuP->m_qp[partP];
369
213k
        int32_t qp  = (qpP + qpQ + 1) >> 1;
370
371
213k
        int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
372
373
213k
        const int32_t bitdepthShift = X265_DEPTH - 8;
374
213k
        int32_t beta = s_betaTable[indexB] << bitdepthShift;
375
376
213k
        intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
377
213k
        int32_t dp0 = calcDP(src + unitOffset              , offset);
378
213k
        int32_t dq0 = calcDQ(src + unitOffset              , offset);
379
213k
        int32_t dp3 = calcDP(src + unitOffset + srcStep * 3, offset);
380
213k
        int32_t dq3 = calcDQ(src + unitOffset + srcStep * 3, offset);
381
213k
        int32_t d0 = dp0 + dq0;
382
213k
        int32_t d3 = dp3 + dq3;
383
384
213k
        int32_t d =  d0 + d3;
385
386
213k
        if (d >= beta)
387
55.7k
            continue;
388
389
157k
        int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
390
157k
        int32_t tc = s_tcTable[indexTC] << bitdepthShift;
391
392
157k
        bool sw = (2 * d0 < (beta >> 2) &&
393
157k
                   2 * d3 < (beta >> 2) &&
394
157k
                   useStrongFiltering(offset, beta, tc, src + unitOffset              ) &&
395
153k
                   useStrongFiltering(offset, beta, tc, src + unitOffset + srcStep * 3));
396
397
157k
        if (sw)
398
153k
        {
399
153k
            int32_t tc2 = 2 * tc;
400
153k
            int32_t tcP = (tc2 & maskP);
401
153k
            int32_t tcQ = (tc2 & maskQ);
402
153k
            primitives.pelFilterLumaStrong[dir](src + unitOffset, srcStep, offset, tcP, tcQ);
403
153k
        }
404
4.38k
        else
405
4.38k
        {
406
4.38k
            int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
407
4.38k
            int32_t dp = dp0 + dp3;
408
4.38k
            int32_t dq = dq0 + dq3;
409
4.38k
            int32_t maskP1 = (dp < sideThreshold ? -1 : 0);
410
4.38k
            int32_t maskQ1 = (dq < sideThreshold ? -1 : 0);
411
412
4.38k
            pelFilterLuma(src + unitOffset, srcStep, offset, tc, maskP, maskQ, maskP1, maskQ1);
413
4.38k
        }
414
157k
    }
415
170k
}
416
417
void Deblock::edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
418
89.6k
{
419
89.6k
    int32_t chFmt = cuQ->m_chromaFormat, chromaShift;
420
89.6k
    intptr_t offset, srcStep;
421
89.6k
    const PPS* pps = cuQ->m_slice->m_pps;
422
423
89.6k
    int32_t maskP = -1;
424
89.6k
    int32_t maskQ = -1;
425
89.6k
    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
426
427
89.6k
    X265_CHECK(((dir == EDGE_VER)
428
89.6k
                ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_hChromaShift)
429
89.6k
                : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0,
430
89.6k
               "invalid edge\n");
431
432
89.6k
    PicYuv* reconPic = cuQ->m_encData->m_reconPic[0];
433
89.6k
    intptr_t stride = reconPic->m_strideC;
434
89.6k
    intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx);
435
89.6k
    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
436
437
89.6k
    if (dir == EDGE_VER)
438
44.5k
    {
439
44.5k
        chromaShift = cuQ->m_vChromaShift;
440
44.5k
        srcOffset += (edge << (LOG2_UNIT_SIZE - cuQ->m_hChromaShift));
441
44.5k
        offset     = 1;
442
44.5k
        srcStep    = stride;
443
44.5k
    }
444
45.0k
    else // (dir == EDGE_HOR)
445
45.0k
    {
446
45.0k
        chromaShift = cuQ->m_hChromaShift;
447
45.0k
        srcOffset += edge * stride << (LOG2_UNIT_SIZE - cuQ->m_vChromaShift);
448
45.0k
        offset     = stride;
449
45.0k
        srcStep    = 1;
450
45.0k
    }
451
452
89.6k
    pixel* srcChroma[2];
453
89.6k
    srcChroma[0] = reconPic->m_picOrg[1] + srcOffset;
454
89.6k
    srcChroma[1] = reconPic->m_picOrg[2] + srcOffset;
455
456
89.6k
    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift);
457
356k
    for (uint32_t idx = 0; idx < numUnits; idx++)
458
266k
    {
459
266k
        uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx << chromaShift);
460
266k
        uint32_t bs = blockStrength[partQ];
461
462
266k
        if (bs <= 1)
463
126k
            continue;
464
465
        // Derive neighboring PU index
466
140k
        uint32_t partP;
467
140k
        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
468
469
140k
        if (bCheckNoFilter)
470
36.9k
        {
471
            // check if each of PUs is lossless coded
472
36.9k
            maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
473
36.9k
            maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
474
36.9k
            if (!(maskP | maskQ))
475
36.9k
                continue;
476
36.9k
        }
477
478
103k
        int32_t qpQ = cuQ->m_qp[partQ];
479
103k
        int32_t qpP = cuP->m_qp[partP];
480
103k
        int32_t qpA = (qpP + qpQ + 1) >> 1;
481
482
103k
        intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
483
309k
        for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
484
206k
        {
485
206k
            int32_t qp = qpA + pps->chromaQpOffset[chromaIdx];
486
206k
            if (qp >= 30)
487
102k
                qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, QP_MAX_SPEC);
488
489
206k
            int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
490
206k
            const int32_t bitdepthShift = X265_DEPTH - 8;
491
206k
            int32_t tc = s_tcTable[indexTC] << bitdepthShift;
492
206k
            pixel* srcC = srcChroma[chromaIdx];
493
494
206k
            primitives.pelFilterChroma[dir](srcC + unitOffset, srcStep, offset, tc, maskP, maskQ);
495
206k
        }
496
103k
    }
497
89.6k
}
498
499
const uint8_t Deblock::s_tcTable[54] =
500
{
501
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
502
    2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24
503
};
504
505
const uint8_t Deblock::s_betaTable[52] =
506
{
507
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
508
    18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
509
};
510