Coverage Report

Created: 2026-02-26 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/common/deblock.cpp
Line
Count
Source
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Author: Gopu Govindaswamy <gopu@multicorewareinc.com>
5
*         Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "deblock.h"
27
#include "framedata.h"
28
#include "picyuv.h"
29
#include "slice.h"
30
#include "mv.h"
31
32
using namespace X265_NS;
33
34
162k
#define DEBLOCK_SMALLEST_BLOCK  8
35
812k
#define DEFAULT_INTRA_TC_OFFSET 2
36
37
void Deblock::deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir)
38
28.7k
{
39
28.7k
    uint8_t blockStrength[MAX_NUM_PARTITIONS];
40
41
28.7k
    memset(blockStrength, 0, sizeof(uint8_t) * cuGeom.numPartitions);
42
43
28.7k
    deblockCU(ctu, cuGeom, dir, blockStrength);
44
28.7k
}
45
46
static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir)
47
81.1k
{
48
81.1k
    if (dir == Deblock::EDGE_VER)
49
40.5k
    {
50
40.5k
        if (cu->m_cuPelX + g_zscanToPelX[absPartIdx] > 0)
51
35.2k
        {
52
35.2k
            uint32_t    tempPartIdx;
53
35.2k
            const CUData* tempCU = cu->getPULeft(tempPartIdx, absPartIdx);
54
35.2k
            return tempCU ? 2 : 0;
55
35.2k
        }
56
40.5k
    }
57
40.5k
    else
58
40.5k
    {
59
40.5k
        if (cu->m_cuPelY + g_zscanToPelY[absPartIdx] > 0)
60
35.8k
        {
61
35.8k
            uint32_t    tempPartIdx;
62
35.8k
            const CUData* tempCU = cu->getPUAbove(tempPartIdx, absPartIdx);
63
35.8k
            return tempCU ? 2 : 0;
64
35.8k
        }
65
40.5k
    }
66
67
10.1k
    return 0;
68
81.1k
}
69
70
/* Deblocking filter process in CU-based (the same function as conventional's)
71
 * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
72
void Deblock::deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[])
73
112k
{
74
112k
    uint32_t absPartIdx = cuGeom.absPartIdx;
75
112k
    uint32_t depth = cuGeom.depth;
76
112k
    if (cu->m_predMode[absPartIdx] == MODE_NONE)
77
0
        return;
78
79
112k
    if (cu->m_cuDepth[absPartIdx] > depth)
80
31.4k
    {
81
157k
        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
82
125k
        {
83
125k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
84
125k
            if (childGeom.flags & CUGeom::PRESENT)
85
83.8k
                deblockCU(cu, childGeom, dir, blockStrength);
86
125k
        }
87
31.4k
        return;
88
31.4k
    }
89
90
81.1k
    uint32_t numUnits = 1 << (cuGeom.log2CUSize - LOG2_UNIT_SIZE);
91
81.1k
    setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits);
92
81.1k
    setEdgefilterTU(cu, absPartIdx, 0, dir, blockStrength);
93
81.1k
    setEdgefilterMultiple(absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits);
94
95
81.1k
    uint32_t numParts = cuGeom.numPartitions;
96
2.49M
    for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + numParts; partIdx++)
97
2.41M
    {
98
2.41M
        uint32_t bsCheck = !(partIdx & (1 << dir));
99
100
2.41M
        if (bsCheck && blockStrength[partIdx])
101
336k
            blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength);
102
2.41M
    }
103
104
81.1k
    const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
105
81.1k
    uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift;
106
81.1k
    uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
107
81.1k
    uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE;
108
        
109
278k
    for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
110
197k
    {
111
197k
        edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
112
197k
        if (!((e0 + e) & chromaMask) && cu->m_chromaFormat != X265_CSP_I400)
113
103k
            edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength);
114
197k
    }
115
81.1k
}
116
117
static inline uint32_t calcBsIdx(uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
118
2.30M
{
119
2.30M
    if (dir)
120
1.15M
        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (edgeIdx << LOG2_RASTER_SIZE) + baseUnitIdx];
121
1.15M
    else
122
1.15M
        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (baseUnitIdx << LOG2_RASTER_SIZE) + edgeIdx];
123
2.30M
}
124
125
void Deblock::setEdgefilterMultiple(uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits)
126
163k
{
127
163k
    X265_CHECK(numUnits > 0, "numUnits edge filter check\n");
128
953k
    for (uint32_t i = 0; i < numUnits; i++)
129
790k
    {
130
790k
        const uint32_t bsidx = calcBsIdx(scanIdx, dir, edgeIdx, i);
131
790k
        blockStrength[bsidx] = value;
132
790k
    }
133
163k
}
134
135
void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[])
136
82.6k
{
137
82.6k
    uint32_t log2TrSize = cu->m_log2CUSize[absPartIdx] - tuDepth;
138
82.6k
    if (cu->m_tuDepth[absPartIdx] > tuDepth)
139
366
    {
140
366
        uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE - 1) * 2;
141
1.83k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
142
1.46k
            setEdgefilterTU(cu, absPartIdx, tuDepth + 1, dir, blockStrength);
143
366
        return;
144
366
    }
145
146
82.2k
    uint32_t numUnits = 1 << (log2TrSize - LOG2_UNIT_SIZE);
147
82.2k
    setEdgefilterMultiple(absPartIdx, dir, 0, 2, blockStrength, numUnits);
148
82.2k
}
149
150
void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits)
151
81.1k
{
152
81.1k
    const uint32_t hNumUnits = numUnits >> 1;
153
81.1k
    const uint32_t qNumUnits = numUnits >> 2;
154
155
81.1k
    switch (cu->m_partSize[absPartIdx])
156
81.1k
    {
157
0
    case SIZE_2NxN:
158
0
        if (EDGE_HOR == dir)
159
0
            setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
160
0
        break;
161
0
    case SIZE_Nx2N:
162
0
        if (EDGE_VER == dir)
163
0
            setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
164
0
        break;
165
0
    case SIZE_NxN:
166
0
        setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
167
0
        break;
168
0
    case SIZE_2NxnU:
169
0
        if (EDGE_HOR == dir)
170
0
            setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
171
0
        break;
172
0
    case SIZE_nLx2N:
173
0
        if (EDGE_VER == dir)
174
0
            setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
175
0
        break;
176
0
    case SIZE_2NxnD:
177
0
        if (EDGE_HOR == dir)
178
0
            setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
179
0
        break;
180
0
    case SIZE_nRx2N:
181
0
        if (EDGE_VER == dir)
182
0
            setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
183
0
        break;
184
185
81.1k
    case SIZE_2Nx2N:
186
81.1k
    default:
187
81.1k
        break;
188
81.1k
    }
189
81.1k
}
190
191
uint8_t Deblock::getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[])
192
336k
{
193
    // Calculate block index
194
336k
    uint32_t partP;
195
336k
    const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
196
197
    // Set BS for Intra MB : BS = 2
198
336k
    if (cuP->isIntra(partP) || cuQ->isIntra(partQ))
199
336k
        return 2;
200
201
    // Set BS for not Intra MB : BS = 1 or 0
202
0
    if (blockStrength[partQ] > 1 &&
203
0
        (cuQ->getCbf(partQ, TEXT_LUMA, cuQ->m_tuDepth[partQ]) ||
204
0
         cuP->getCbf(partP, TEXT_LUMA, cuP->m_tuDepth[partP])))
205
0
        return 1;
206
207
0
    static const MV zeroMv(0, 0);
208
0
    const Slice* const sliceQ = cuQ->m_slice;
209
0
    const Slice* const sliceP = cuP->m_slice;
210
0
    const Frame* refP0 = (cuP->m_refIdx[0][partP] >= 0) ? sliceP->m_refFrameList[0][cuP->m_refIdx[0][partP]] : NULL;
211
0
    const Frame* refQ0 = (cuQ->m_refIdx[0][partQ] >= 0) ? sliceQ->m_refFrameList[0][cuQ->m_refIdx[0][partQ]] : NULL;
212
0
    const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv;
213
0
    const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv;
214
0
    if (sliceQ->isInterP() && sliceP->isInterP())
215
0
    {
216
0
        return ((refP0 != refQ0) ||
217
0
                (abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4)) ? 1 : 0;
218
0
    }
219
    // (sliceQ->isInterB() || sliceP->isInterB())
220
0
    const Frame* refP1 = (cuP->m_refIdx[1][partP] >= 0) ? sliceP->m_refFrameList[1][cuP->m_refIdx[1][partP]] : NULL;
221
0
    const Frame* refQ1 = (cuQ->m_refIdx[1][partQ] >= 0) ? sliceQ->m_refFrameList[1][cuQ->m_refIdx[1][partQ]] : NULL;
222
0
    const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv;
223
0
    const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv;
224
225
0
    if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0)))
226
0
    {
227
0
        if (refP0 != refP1) // Different L0 & L1
228
0
        {
229
0
            if (refP0 == refQ0)
230
0
                return ((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
231
0
                        (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) ? 1 : 0;
232
0
            else
233
0
                return ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
234
0
                        (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4)) ? 1 : 0;
235
0
        }
236
0
        else // Same L0 & L1
237
0
        {
238
0
            return (((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
239
0
                     (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) &&
240
0
                    ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
241
0
                     (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4))) ? 1 : 0;
242
0
        }
243
0
    }
244
        
245
    // for all different Ref_Idx
246
0
    return 1;
247
0
}
248
249
static inline int32_t calcDP(pixel* src, intptr_t offset)
250
474k
{
251
474k
    return abs(static_cast<int32_t>(src[-offset * 3]) - 2 * src[-offset * 2] + src[-offset]);
252
474k
}
253
254
static inline int32_t calcDQ(pixel* src, intptr_t offset)
255
474k
{
256
474k
    return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
257
474k
}
258
259
static inline bool useStrongFiltering(intptr_t offset, int32_t beta, int32_t tc, pixel* src)
260
344k
{
261
344k
    int16_t m4     = (int16_t)src[0];
262
344k
    int16_t m3     = (int16_t)src[-offset];
263
344k
    int16_t m7     = (int16_t)src[offset * 3];
264
344k
    int16_t m0     = (int16_t)src[-offset * 4];
265
344k
    int32_t strong = abs(m0 - m3) + abs(m7 - m4);
266
267
344k
    return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
268
344k
}
269
270
/* Deblocking for the luminance component with strong or weak filter
271
 * \param src     pointer to picture data
272
 * \param offset  offset value for picture data
273
 * \param tc      tc value
274
 * \param maskP   indicator to enable filtering on partP
275
 * \param maskQ   indicator to enable filtering on partQ
276
 * \param maskP1  decision weak filter/no filter for partP
277
 * \param maskQ1  decision weak filter/no filter for partQ */
278
static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ,
279
                                 int32_t maskP1, int32_t maskQ1)
280
9.97k
{
281
9.97k
    int32_t thrCut = tc * 10;
282
9.97k
    int32_t tc2 = tc >> 1;
283
9.97k
    maskP1 &= maskP;
284
9.97k
    maskQ1 &= maskQ;
285
286
49.8k
    for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
287
39.8k
    {
288
39.8k
        int16_t m4  = (int16_t)src[0];
289
39.8k
        int16_t m3  = (int16_t)src[-offset];
290
39.8k
        int16_t m5  = (int16_t)src[offset];
291
39.8k
        int16_t m2  = (int16_t)src[-offset * 2];
292
293
39.8k
        int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
294
295
39.8k
        if (abs(delta) < thrCut)
296
39.8k
        {
297
39.8k
            delta = x265_clip3(-tc, tc, delta);
298
299
39.8k
            src[-offset] = x265_clip(m3 + (delta & maskP));
300
39.8k
            src[0] = x265_clip(m4 - (delta & maskQ));
301
39.8k
            if (maskP1)
302
39.8k
            {
303
39.8k
                int16_t m1  = (int16_t)src[-offset * 3];
304
39.8k
                int32_t delta1 = x265_clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
305
39.8k
                src[-offset * 2] = x265_clip(m2 + delta1);
306
39.8k
            }
307
39.8k
            if (maskQ1)
308
39.8k
            {
309
39.8k
                int16_t m6  = (int16_t)src[offset * 2];
310
39.8k
                int32_t delta2 = x265_clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
311
39.8k
                src[offset] = x265_clip(m5 + delta2);
312
39.8k
            }
313
39.8k
        }
314
39.8k
    }
315
9.97k
}
316
317
void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
318
197k
{
319
197k
    PicYuv* reconPic = cuQ->m_encData->m_reconPic[0];
320
197k
    pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx);
321
197k
    intptr_t stride = reconPic->m_stride;
322
197k
    const PPS* pps = cuQ->m_slice->m_pps;
323
324
197k
    intptr_t offset, srcStep;
325
326
197k
    int32_t maskP = -1;
327
197k
    int32_t maskQ = -1;
328
197k
    int32_t betaOffset = pps->deblockingFilterBetaOffsetDiv2 << 1;
329
197k
    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
330
197k
    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
331
332
197k
    if (dir == EDGE_VER)
333
98.6k
    {
334
98.6k
        offset = 1;
335
98.6k
        srcStep = stride;
336
98.6k
        src += (edge << LOG2_UNIT_SIZE);
337
98.6k
    }
338
98.6k
    else // (dir == EDGE_HOR)
339
98.6k
    {
340
98.6k
        offset = stride;
341
98.6k
        srcStep = 1;
342
98.6k
        src += (edge << LOG2_UNIT_SIZE) * stride;
343
98.6k
    }
344
345
197k
    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth;
346
1.40M
    for (uint32_t idx = 0; idx < numUnits; idx++)
347
1.20M
    {
348
1.20M
        uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx);
349
1.20M
        uint32_t bs = blockStrength[partQ];
350
351
1.20M
        if (!bs)
352
871k
            continue;
353
354
        // Derive neighboring PU index
355
336k
        uint32_t partP;
356
336k
        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
357
358
336k
        if (bCheckNoFilter)
359
99.2k
        {
360
            // check if each of PUs is lossless coded
361
99.2k
            maskP = cuP->m_tqBypass[partP] - 1;
362
99.2k
            maskQ = cuQ->m_tqBypass[partQ] - 1;
363
99.2k
            if (!(maskP | maskQ))
364
99.2k
                continue;
365
99.2k
        }
366
367
237k
        int32_t qpQ = cuQ->m_qp[partQ];
368
237k
        int32_t qpP = cuP->m_qp[partP];
369
237k
        int32_t qp  = (qpP + qpQ + 1) >> 1;
370
371
237k
        int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
372
373
237k
        const int32_t bitdepthShift = X265_DEPTH - 8;
374
237k
        int32_t beta = s_betaTable[indexB] << bitdepthShift;
375
376
237k
        intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
377
237k
        int32_t dp0 = calcDP(src + unitOffset              , offset);
378
237k
        int32_t dq0 = calcDQ(src + unitOffset              , offset);
379
237k
        int32_t dp3 = calcDP(src + unitOffset + srcStep * 3, offset);
380
237k
        int32_t dq3 = calcDQ(src + unitOffset + srcStep * 3, offset);
381
237k
        int32_t d0 = dp0 + dq0;
382
237k
        int32_t d3 = dp3 + dq3;
383
384
237k
        int32_t d =  d0 + d3;
385
386
237k
        if (d >= beta)
387
59.9k
            continue;
388
389
177k
        int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
390
177k
        int32_t tc = s_tcTable[indexTC] << bitdepthShift;
391
392
177k
        bool sw = (2 * d0 < (beta >> 2) &&
393
177k
                   2 * d3 < (beta >> 2) &&
394
177k
                   useStrongFiltering(offset, beta, tc, src + unitOffset              ) &&
395
167k
                   useStrongFiltering(offset, beta, tc, src + unitOffset + srcStep * 3));
396
397
177k
        if (sw)
398
167k
        {
399
167k
            int32_t tc2 = 2 * tc;
400
167k
            int32_t tcP = (tc2 & maskP);
401
167k
            int32_t tcQ = (tc2 & maskQ);
402
167k
            primitives.pelFilterLumaStrong[dir](src + unitOffset, srcStep, offset, tcP, tcQ);
403
167k
        }
404
9.97k
        else
405
9.97k
        {
406
9.97k
            int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
407
9.97k
            int32_t dp = dp0 + dp3;
408
9.97k
            int32_t dq = dq0 + dq3;
409
9.97k
            int32_t maskP1 = (dp < sideThreshold ? -1 : 0);
410
9.97k
            int32_t maskQ1 = (dq < sideThreshold ? -1 : 0);
411
412
9.97k
            pelFilterLuma(src + unitOffset, srcStep, offset, tc, maskP, maskQ, maskP1, maskQ1);
413
9.97k
        }
414
177k
    }
415
197k
}
416
417
void Deblock::edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
418
103k
{
419
103k
    int32_t chFmt = cuQ->m_chromaFormat, chromaShift;
420
103k
    intptr_t offset, srcStep;
421
103k
    const PPS* pps = cuQ->m_slice->m_pps;
422
423
103k
    int32_t maskP = -1;
424
103k
    int32_t maskQ = -1;
425
103k
    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
426
427
103k
    X265_CHECK(((dir == EDGE_VER)
428
103k
                ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_hChromaShift)
429
103k
                : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0,
430
103k
               "invalid edge\n");
431
432
103k
    PicYuv* reconPic = cuQ->m_encData->m_reconPic[0];
433
103k
    intptr_t stride = reconPic->m_strideC;
434
103k
    intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx);
435
103k
    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
436
437
103k
    if (dir == EDGE_VER)
438
52.0k
    {
439
52.0k
        chromaShift = cuQ->m_vChromaShift;
440
52.0k
        srcOffset += (edge << (LOG2_UNIT_SIZE - cuQ->m_hChromaShift));
441
52.0k
        offset     = 1;
442
52.0k
        srcStep    = stride;
443
52.0k
    }
444
51.8k
    else // (dir == EDGE_HOR)
445
51.8k
    {
446
51.8k
        chromaShift = cuQ->m_hChromaShift;
447
51.8k
        srcOffset += edge * stride << (LOG2_UNIT_SIZE - cuQ->m_vChromaShift);
448
51.8k
        offset     = stride;
449
51.8k
        srcStep    = 1;
450
51.8k
    }
451
452
103k
    pixel* srcChroma[2];
453
103k
    srcChroma[0] = reconPic->m_picOrg[1] + srcOffset;
454
103k
    srcChroma[1] = reconPic->m_picOrg[2] + srcOffset;
455
456
103k
    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift);
457
411k
    for (uint32_t idx = 0; idx < numUnits; idx++)
458
307k
    {
459
307k
        uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx << chromaShift);
460
307k
        uint32_t bs = blockStrength[partQ];
461
462
307k
        if (bs <= 1)
463
145k
            continue;
464
465
        // Derive neighboring PU index
466
161k
        uint32_t partP;
467
161k
        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
468
469
161k
        if (bCheckNoFilter)
470
47.3k
        {
471
            // check if each of PUs is lossless coded
472
47.3k
            maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
473
47.3k
            maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
474
47.3k
            if (!(maskP | maskQ))
475
47.3k
                continue;
476
47.3k
        }
477
478
114k
        int32_t qpQ = cuQ->m_qp[partQ];
479
114k
        int32_t qpP = cuP->m_qp[partP];
480
114k
        int32_t qpA = (qpP + qpQ + 1) >> 1;
481
482
114k
        intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
483
343k
        for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
484
229k
        {
485
229k
            int32_t qp = qpA + pps->chromaQpOffset[chromaIdx];
486
229k
            if (qp >= 30)
487
98.5k
                qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, QP_MAX_SPEC);
488
489
229k
            int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
490
229k
            const int32_t bitdepthShift = X265_DEPTH - 8;
491
229k
            int32_t tc = s_tcTable[indexTC] << bitdepthShift;
492
229k
            pixel* srcC = srcChroma[chromaIdx];
493
494
229k
            primitives.pelFilterChroma[dir](srcC + unitOffset, srcStep, offset, tc, maskP, maskQ);
495
229k
        }
496
114k
    }
497
103k
}
498
499
const uint8_t Deblock::s_tcTable[54] =
500
{
501
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
502
    2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24
503
};
504
505
const uint8_t Deblock::s_betaTable[52] =
506
{
507
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
508
    18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
509
};
510