Coverage Report

Created: 2026-06-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/common/deblock.cpp
Line
Count
Source
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Author: Gopu Govindaswamy <gopu@multicorewareinc.com>
5
*         Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "deblock.h"
27
#include "framedata.h"
28
#include "picyuv.h"
29
#include "slice.h"
30
#include "mv.h"
31
32
using namespace X265_NS;
33
34
152k
#define DEBLOCK_SMALLEST_BLOCK  8
35
844k
#define DEFAULT_INTRA_TC_OFFSET 2
36
37
void Deblock::deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir)
38
26.7k
{
39
26.7k
    uint8_t blockStrength[MAX_NUM_PARTITIONS];
40
41
26.7k
    memset(blockStrength, 0, sizeof(uint8_t) * cuGeom.numPartitions);
42
43
26.7k
    deblockCU(ctu, cuGeom, dir, blockStrength);
44
26.7k
}
45
46
static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir)
47
76.1k
{
48
76.1k
    if (dir == Deblock::EDGE_VER)
49
38.0k
    {
50
38.0k
        if (cu->m_cuPelX + g_zscanToPelX[absPartIdx] > 0)
51
33.2k
        {
52
33.2k
            uint32_t    tempPartIdx;
53
33.2k
            const CUData* tempCU = cu->getPULeft(tempPartIdx, absPartIdx);
54
33.2k
            return tempCU ? 2 : 0;
55
33.2k
        }
56
38.0k
    }
57
38.0k
    else
58
38.0k
    {
59
38.0k
        if (cu->m_cuPelY + g_zscanToPelY[absPartIdx] > 0)
60
33.8k
        {
61
33.8k
            uint32_t    tempPartIdx;
62
33.8k
            const CUData* tempCU = cu->getPUAbove(tempPartIdx, absPartIdx);
63
33.8k
            return tempCU ? 2 : 0;
64
33.8k
        }
65
38.0k
    }
66
67
9.02k
    return 0;
68
76.1k
}
69
70
/* Deblocking filter process in CU-based (the same function as conventional's)
71
 * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
72
void Deblock::deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[])
73
106k
{
74
106k
    uint32_t absPartIdx = cuGeom.absPartIdx;
75
106k
    uint32_t depth = cuGeom.depth;
76
106k
    if (cu->m_predMode[absPartIdx] == MODE_NONE)
77
0
        return;
78
79
106k
    if (cu->m_cuDepth[absPartIdx] > depth)
80
30.3k
    {
81
151k
        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
82
121k
        {
83
121k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
84
121k
            if (childGeom.flags & CUGeom::PRESENT)
85
79.7k
                deblockCU(cu, childGeom, dir, blockStrength);
86
121k
        }
87
30.3k
        return;
88
30.3k
    }
89
90
76.1k
    uint32_t numUnits = 1 << (cuGeom.log2CUSize - LOG2_UNIT_SIZE);
91
76.1k
    setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits);
92
76.1k
    setEdgefilterTU(cu, absPartIdx, 0, dir, blockStrength);
93
76.1k
    setEdgefilterMultiple(absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits);
94
95
76.1k
    uint32_t numParts = cuGeom.numPartitions;
96
2.38M
    for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + numParts; partIdx++)
97
2.31M
    {
98
2.31M
        uint32_t bsCheck = !(partIdx & (1 << dir));
99
100
2.31M
        if (bsCheck && blockStrength[partIdx])
101
319k
            blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength);
102
2.31M
    }
103
104
76.1k
    const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
105
76.1k
    uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift;
106
76.1k
    uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
107
76.1k
    uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE;
108
        
109
262k
    for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
110
186k
    {
111
186k
        edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
112
186k
        if (!((e0 + e) & chromaMask) && cu->m_chromaFormat != X265_CSP_I400)
113
98.4k
            edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength);
114
186k
    }
115
76.1k
}
116
117
static inline uint32_t calcBsIdx(uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
118
2.19M
{
119
2.19M
    if (dir)
120
1.09M
        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (edgeIdx << LOG2_RASTER_SIZE) + baseUnitIdx];
121
1.09M
    else
122
1.09M
        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (baseUnitIdx << LOG2_RASTER_SIZE) + edgeIdx];
123
2.19M
}
124
125
void Deblock::setEdgefilterMultiple(uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits)
126
153k
{
127
153k
    X265_CHECK(numUnits > 0, "numUnits edge filter check\n");
128
897k
    for (uint32_t i = 0; i < numUnits; i++)
129
744k
    {
130
744k
        const uint32_t bsidx = calcBsIdx(scanIdx, dir, edgeIdx, i);
131
744k
        blockStrength[bsidx] = value;
132
744k
    }
133
153k
}
134
135
void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[])
136
77.1k
{
137
77.1k
    uint32_t log2TrSize = cu->m_log2CUSize[absPartIdx] - tuDepth;
138
77.1k
    if (cu->m_tuDepth[absPartIdx] > tuDepth)
139
256
    {
140
256
        uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE - 1) * 2;
141
1.28k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
142
1.02k
            setEdgefilterTU(cu, absPartIdx, tuDepth + 1, dir, blockStrength);
143
256
        return;
144
256
    }
145
146
76.9k
    uint32_t numUnits = 1 << (log2TrSize - LOG2_UNIT_SIZE);
147
76.9k
    setEdgefilterMultiple(absPartIdx, dir, 0, 2, blockStrength, numUnits);
148
76.9k
}
149
150
void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits)
151
76.1k
{
152
76.1k
    const uint32_t hNumUnits = numUnits >> 1;
153
76.1k
    const uint32_t qNumUnits = numUnits >> 2;
154
155
76.1k
    switch (cu->m_partSize[absPartIdx])
156
76.1k
    {
157
0
    case SIZE_2NxN:
158
0
        if (EDGE_HOR == dir)
159
0
            setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
160
0
        break;
161
0
    case SIZE_Nx2N:
162
0
        if (EDGE_VER == dir)
163
0
            setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
164
0
        break;
165
0
    case SIZE_NxN:
166
0
        setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
167
0
        break;
168
0
    case SIZE_2NxnU:
169
0
        if (EDGE_HOR == dir)
170
0
            setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
171
0
        break;
172
0
    case SIZE_nLx2N:
173
0
        if (EDGE_VER == dir)
174
0
            setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
175
0
        break;
176
0
    case SIZE_2NxnD:
177
0
        if (EDGE_HOR == dir)
178
0
            setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
179
0
        break;
180
0
    case SIZE_nRx2N:
181
0
        if (EDGE_VER == dir)
182
0
            setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
183
0
        break;
184
185
76.1k
    case SIZE_2Nx2N:
186
76.1k
    default:
187
76.1k
        break;
188
76.1k
    }
189
76.1k
}
190
191
uint8_t Deblock::getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[])
192
319k
{
193
    // Calculate block index
194
319k
    uint32_t partP;
195
319k
    const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
196
197
    // Set BS for Intra MB : BS = 2
198
319k
    if (cuP->isIntra(partP) || cuQ->isIntra(partQ))
199
319k
        return 2;
200
201
    // Set BS for not Intra MB : BS = 1 or 0
202
0
    if (blockStrength[partQ] > 1 &&
203
0
        (cuQ->getCbf(partQ, TEXT_LUMA, cuQ->m_tuDepth[partQ]) ||
204
0
         cuP->getCbf(partP, TEXT_LUMA, cuP->m_tuDepth[partP])))
205
0
        return 1;
206
207
0
    static const MV zeroMv(0, 0);
208
0
    const Slice* const sliceQ = cuQ->m_slice;
209
0
    const Slice* const sliceP = cuP->m_slice;
210
0
    const Frame* refP0 = (cuP->m_refIdx[0][partP] >= 0) ? sliceP->m_refFrameList[0][cuP->m_refIdx[0][partP]] : NULL;
211
0
    const Frame* refQ0 = (cuQ->m_refIdx[0][partQ] >= 0) ? sliceQ->m_refFrameList[0][cuQ->m_refIdx[0][partQ]] : NULL;
212
0
    const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv;
213
0
    const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv;
214
0
    if (sliceQ->isInterP() && sliceP->isInterP())
215
0
    {
216
0
        return ((refP0 != refQ0) ||
217
0
                (abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4)) ? 1 : 0;
218
0
    }
219
    // (sliceQ->isInterB() || sliceP->isInterB())
220
0
    const Frame* refP1 = (cuP->m_refIdx[1][partP] >= 0) ? sliceP->m_refFrameList[1][cuP->m_refIdx[1][partP]] : NULL;
221
0
    const Frame* refQ1 = (cuQ->m_refIdx[1][partQ] >= 0) ? sliceQ->m_refFrameList[1][cuQ->m_refIdx[1][partQ]] : NULL;
222
0
    const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv;
223
0
    const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv;
224
225
0
    if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0)))
226
0
    {
227
0
        if (refP0 != refP1) // Different L0 & L1
228
0
        {
229
0
            if (refP0 == refQ0)
230
0
                return ((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
231
0
                        (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) ? 1 : 0;
232
0
            else
233
0
                return ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
234
0
                        (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4)) ? 1 : 0;
235
0
        }
236
0
        else // Same L0 & L1
237
0
        {
238
0
            return (((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
239
0
                     (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) &&
240
0
                    ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
241
0
                     (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4))) ? 1 : 0;
242
0
        }
243
0
    }
244
        
245
    // for all different Ref_Idx
246
0
    return 1;
247
0
}
248
249
static inline int32_t calcDP(pixel* src, intptr_t offset)
250
493k
{
251
493k
    return abs(static_cast<int32_t>(src[-offset * 3]) - 2 * src[-offset * 2] + src[-offset]);
252
493k
}
253
254
static inline int32_t calcDQ(pixel* src, intptr_t offset)
255
493k
{
256
493k
    return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
257
493k
}
258
259
static inline bool useStrongFiltering(intptr_t offset, int32_t beta, int32_t tc, pixel* src)
260
363k
{
261
363k
    int16_t m4     = (int16_t)src[0];
262
363k
    int16_t m3     = (int16_t)src[-offset];
263
363k
    int16_t m7     = (int16_t)src[offset * 3];
264
363k
    int16_t m0     = (int16_t)src[-offset * 4];
265
363k
    int32_t strong = abs(m0 - m3) + abs(m7 - m4);
266
267
363k
    return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
268
363k
}
269
270
/* Deblocking for the luminance component with strong or weak filter
271
 * \param src     pointer to picture data
272
 * \param offset  offset value for picture data
273
 * \param tc      tc value
274
 * \param maskP   indicator to enable filtering on partP
275
 * \param maskQ   indicator to enable filtering on partQ
276
 * \param maskP1  decision weak filter/no filter for partP
277
 * \param maskQ1  decision weak filter/no filter for partQ */
278
static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ,
279
                                 int32_t maskP1, int32_t maskQ1)
280
5.58k
{
281
5.58k
    int32_t thrCut = tc * 10;
282
5.58k
    int32_t tc2 = tc >> 1;
283
5.58k
    maskP1 &= maskP;
284
5.58k
    maskQ1 &= maskQ;
285
286
27.9k
    for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
287
22.3k
    {
288
22.3k
        int16_t m4  = (int16_t)src[0];
289
22.3k
        int16_t m3  = (int16_t)src[-offset];
290
22.3k
        int16_t m5  = (int16_t)src[offset];
291
22.3k
        int16_t m2  = (int16_t)src[-offset * 2];
292
293
22.3k
        int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
294
295
22.3k
        if (abs(delta) < thrCut)
296
22.3k
        {
297
22.3k
            delta = x265_clip3(-tc, tc, delta);
298
299
22.3k
            src[-offset] = x265_clip(m3 + (delta & maskP));
300
22.3k
            src[0] = x265_clip(m4 - (delta & maskQ));
301
22.3k
            if (maskP1)
302
22.3k
            {
303
22.3k
                int16_t m1  = (int16_t)src[-offset * 3];
304
22.3k
                int32_t delta1 = x265_clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
305
22.3k
                src[-offset * 2] = x265_clip(m2 + delta1);
306
22.3k
            }
307
22.3k
            if (maskQ1)
308
22.3k
            {
309
22.3k
                int16_t m6  = (int16_t)src[offset * 2];
310
22.3k
                int32_t delta2 = x265_clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
311
22.3k
                src[offset] = x265_clip(m5 + delta2);
312
22.3k
            }
313
22.3k
        }
314
22.3k
    }
315
5.58k
}
316
317
void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
318
186k
{
319
186k
    PicYuv* reconPic = cuQ->m_encData->m_reconPic[0];
320
186k
    pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx);
321
186k
    intptr_t stride = reconPic->m_stride;
322
186k
    const PPS* pps = cuQ->m_slice->m_pps;
323
324
186k
    intptr_t offset, srcStep;
325
326
186k
    int32_t maskP = -1;
327
186k
    int32_t maskQ = -1;
328
186k
    int32_t betaOffset = pps->deblockingFilterBetaOffsetDiv2 << 1;
329
186k
    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
330
186k
    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
331
332
186k
    if (dir == EDGE_VER)
333
93.0k
    {
334
93.0k
        offset = 1;
335
93.0k
        srcStep = stride;
336
93.0k
        src += (edge << LOG2_UNIT_SIZE);
337
93.0k
    }
338
93.0k
    else // (dir == EDGE_HOR)
339
93.0k
    {
340
93.0k
        offset = stride;
341
93.0k
        srcStep = 1;
342
93.0k
        src += (edge << LOG2_UNIT_SIZE) * stride;
343
93.0k
    }
344
345
186k
    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth;
346
1.34M
    for (uint32_t idx = 0; idx < numUnits; idx++)
347
1.15M
    {
348
1.15M
        uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx);
349
1.15M
        uint32_t bs = blockStrength[partQ];
350
351
1.15M
        if (!bs)
352
836k
            continue;
353
354
        // Derive neighboring PU index
355
319k
        uint32_t partP;
356
319k
        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
357
358
319k
        if (bCheckNoFilter)
359
72.6k
        {
360
            // check if each of PUs is lossless coded
361
72.6k
            maskP = cuP->m_tqBypass[partP] - 1;
362
72.6k
            maskQ = cuQ->m_tqBypass[partQ] - 1;
363
72.6k
            if (!(maskP | maskQ))
364
72.6k
                continue;
365
72.6k
        }
366
367
246k
        int32_t qpQ = cuQ->m_qp[partQ];
368
246k
        int32_t qpP = cuP->m_qp[partP];
369
246k
        int32_t qp  = (qpP + qpQ + 1) >> 1;
370
371
246k
        int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
372
373
246k
        const int32_t bitdepthShift = X265_DEPTH - 8;
374
246k
        int32_t beta = s_betaTable[indexB] << bitdepthShift;
375
376
246k
        intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
377
246k
        int32_t dp0 = calcDP(src + unitOffset              , offset);
378
246k
        int32_t dq0 = calcDQ(src + unitOffset              , offset);
379
246k
        int32_t dp3 = calcDP(src + unitOffset + srcStep * 3, offset);
380
246k
        int32_t dq3 = calcDQ(src + unitOffset + srcStep * 3, offset);
381
246k
        int32_t d0 = dp0 + dq0;
382
246k
        int32_t d3 = dp3 + dq3;
383
384
246k
        int32_t d =  d0 + d3;
385
386
246k
        if (d >= beta)
387
61.8k
            continue;
388
389
184k
        int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
390
184k
        int32_t tc = s_tcTable[indexTC] << bitdepthShift;
391
392
184k
        bool sw = (2 * d0 < (beta >> 2) &&
393
184k
                   2 * d3 < (beta >> 2) &&
394
184k
                   useStrongFiltering(offset, beta, tc, src + unitOffset              ) &&
395
179k
                   useStrongFiltering(offset, beta, tc, src + unitOffset + srcStep * 3));
396
397
184k
        if (sw)
398
179k
        {
399
179k
            int32_t tc2 = 2 * tc;
400
179k
            int32_t tcP = (tc2 & maskP);
401
179k
            int32_t tcQ = (tc2 & maskQ);
402
179k
            primitives.pelFilterLumaStrong[dir](src + unitOffset, srcStep, offset, tcP, tcQ);
403
179k
        }
404
5.58k
        else
405
5.58k
        {
406
5.58k
            int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
407
5.58k
            int32_t dp = dp0 + dp3;
408
5.58k
            int32_t dq = dq0 + dq3;
409
5.58k
            int32_t maskP1 = (dp < sideThreshold ? -1 : 0);
410
5.58k
            int32_t maskQ1 = (dq < sideThreshold ? -1 : 0);
411
412
5.58k
            pelFilterLuma(src + unitOffset, srcStep, offset, tc, maskP, maskQ, maskP1, maskQ1);
413
5.58k
        }
414
184k
    }
415
186k
}
416
417
void Deblock::edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
418
98.4k
{
419
98.4k
    int32_t chFmt = cuQ->m_chromaFormat, chromaShift;
420
98.4k
    intptr_t offset, srcStep;
421
98.4k
    const PPS* pps = cuQ->m_slice->m_pps;
422
423
98.4k
    int32_t maskP = -1;
424
98.4k
    int32_t maskQ = -1;
425
98.4k
    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
426
427
98.4k
    X265_CHECK(((dir == EDGE_VER)
428
98.4k
                ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_hChromaShift)
429
98.4k
                : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0,
430
98.4k
               "invalid edge\n");
431
432
98.4k
    PicYuv* reconPic = cuQ->m_encData->m_reconPic[0];
433
98.4k
    intptr_t stride = reconPic->m_strideC;
434
98.4k
    intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx);
435
98.4k
    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
436
437
98.4k
    if (dir == EDGE_VER)
438
49.0k
    {
439
49.0k
        chromaShift = cuQ->m_vChromaShift;
440
49.0k
        srcOffset += (edge << (LOG2_UNIT_SIZE - cuQ->m_hChromaShift));
441
49.0k
        offset     = 1;
442
49.0k
        srcStep    = stride;
443
49.0k
    }
444
49.4k
    else // (dir == EDGE_HOR)
445
49.4k
    {
446
49.4k
        chromaShift = cuQ->m_hChromaShift;
447
49.4k
        srcOffset += edge * stride << (LOG2_UNIT_SIZE - cuQ->m_vChromaShift);
448
49.4k
        offset     = stride;
449
49.4k
        srcStep    = 1;
450
49.4k
    }
451
452
98.4k
    pixel* srcChroma[2];
453
98.4k
    srcChroma[0] = reconPic->m_picOrg[1] + srcOffset;
454
98.4k
    srcChroma[1] = reconPic->m_picOrg[2] + srcOffset;
455
456
98.4k
    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift);
457
392k
    for (uint32_t idx = 0; idx < numUnits; idx++)
458
294k
    {
459
294k
        uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx << chromaShift);
460
294k
        uint32_t bs = blockStrength[partQ];
461
462
294k
        if (bs <= 1)
463
141k
            continue;
464
465
        // Derive neighboring PU index
466
153k
        uint32_t partP;
467
153k
        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
468
469
153k
        if (bCheckNoFilter)
470
34.5k
        {
471
            // check if each of PUs is lossless coded
472
34.5k
            maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
473
34.5k
            maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
474
34.5k
            if (!(maskP | maskQ))
475
34.5k
                continue;
476
34.5k
        }
477
478
118k
        int32_t qpQ = cuQ->m_qp[partQ];
479
118k
        int32_t qpP = cuP->m_qp[partP];
480
118k
        int32_t qpA = (qpP + qpQ + 1) >> 1;
481
482
118k
        intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
483
356k
        for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
484
237k
        {
485
237k
            int32_t qp = qpA + pps->chromaQpOffset[chromaIdx];
486
237k
            if (qp >= 30)
487
120k
                qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, QP_MAX_SPEC);
488
489
237k
            int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
490
237k
            const int32_t bitdepthShift = X265_DEPTH - 8;
491
237k
            int32_t tc = s_tcTable[indexTC] << bitdepthShift;
492
237k
            pixel* srcC = srcChroma[chromaIdx];
493
494
237k
            primitives.pelFilterChroma[dir](srcC + unitOffset, srcStep, offset, tc, maskP, maskQ);
495
237k
        }
496
118k
    }
497
98.4k
}
498
499
const uint8_t Deblock::s_tcTable[54] =
500
{
501
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
502
    2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24
503
};
504
505
const uint8_t Deblock::s_betaTable[52] =
506
{
507
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
508
    18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
509
};
510