Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/common/deblock.cpp
Line
Count
Source
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Author: Gopu Govindaswamy <gopu@multicorewareinc.com>
5
*         Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "deblock.h"
27
#include "framedata.h"
28
#include "picyuv.h"
29
#include "slice.h"
30
#include "mv.h"
31
32
using namespace X265_NS;
33
34
162k
#define DEBLOCK_SMALLEST_BLOCK  8
35
855k
#define DEFAULT_INTRA_TC_OFFSET 2
36
37
void Deblock::deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir)
38
27.8k
{
39
27.8k
    uint8_t blockStrength[MAX_NUM_PARTITIONS];
40
41
27.8k
    memset(blockStrength, 0, sizeof(uint8_t) * cuGeom.numPartitions);
42
43
27.8k
    deblockCU(ctu, cuGeom, dir, blockStrength);
44
27.8k
}
45
46
static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir)
47
81.1k
{
48
81.1k
    if (dir == Deblock::EDGE_VER)
49
40.5k
    {
50
40.5k
        if (cu->m_cuPelX + g_zscanToPelX[absPartIdx] > 0)
51
35.4k
        {
52
35.4k
            uint32_t    tempPartIdx;
53
35.4k
            const CUData* tempCU = cu->getPULeft(tempPartIdx, absPartIdx);
54
35.4k
            return tempCU ? 2 : 0;
55
35.4k
        }
56
40.5k
    }
57
40.5k
    else
58
40.5k
    {
59
40.5k
        if (cu->m_cuPelY + g_zscanToPelY[absPartIdx] > 0)
60
36.1k
        {
61
36.1k
            uint32_t    tempPartIdx;
62
36.1k
            const CUData* tempCU = cu->getPUAbove(tempPartIdx, absPartIdx);
63
36.1k
            return tempCU ? 2 : 0;
64
36.1k
        }
65
40.5k
    }
66
67
9.53k
    return 0;
68
81.1k
}
69
70
/* Deblocking filter process in CU-based (the same function as conventional's)
71
 * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
72
void Deblock::deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[])
73
113k
{
74
113k
    uint32_t absPartIdx = cuGeom.absPartIdx;
75
113k
    uint32_t depth = cuGeom.depth;
76
113k
    if (cu->m_predMode[absPartIdx] == MODE_NONE)
77
0
        return;
78
79
113k
    if (cu->m_cuDepth[absPartIdx] > depth)
80
32.5k
    {
81
162k
        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
82
130k
        {
83
130k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
84
130k
            if (childGeom.flags & CUGeom::PRESENT)
85
85.8k
                deblockCU(cu, childGeom, dir, blockStrength);
86
130k
        }
87
32.5k
        return;
88
32.5k
    }
89
90
81.1k
    uint32_t numUnits = 1 << (cuGeom.log2CUSize - LOG2_UNIT_SIZE);
91
81.1k
    setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits);
92
81.1k
    setEdgefilterTU(cu, absPartIdx, 0, dir, blockStrength);
93
81.1k
    setEdgefilterMultiple(absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits);
94
95
81.1k
    uint32_t numParts = cuGeom.numPartitions;
96
2.52M
    for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + numParts; partIdx++)
97
2.44M
    {
98
2.44M
        uint32_t bsCheck = !(partIdx & (1 << dir));
99
100
2.44M
        if (bsCheck && blockStrength[partIdx])
101
338k
            blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength);
102
2.44M
    }
103
104
81.1k
    const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
105
81.1k
    uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift;
106
81.1k
    uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
107
81.1k
    uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE;
108
        
109
278k
    for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
110
197k
    {
111
197k
        edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
112
197k
        if (!((e0 + e) & chromaMask) && cu->m_chromaFormat != X265_CSP_I400)
113
104k
            edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength);
114
197k
    }
115
81.1k
}
116
117
static inline uint32_t calcBsIdx(uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
118
2.32M
{
119
2.32M
    if (dir)
120
1.16M
        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (edgeIdx << LOG2_RASTER_SIZE) + baseUnitIdx];
121
1.16M
    else
122
1.16M
        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (baseUnitIdx << LOG2_RASTER_SIZE) + edgeIdx];
123
2.32M
}
124
125
void Deblock::setEdgefilterMultiple(uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits)
126
163k
{
127
163k
    X265_CHECK(numUnits > 0, "numUnits edge filter check\n");
128
952k
    for (uint32_t i = 0; i < numUnits; i++)
129
789k
    {
130
789k
        const uint32_t bsidx = calcBsIdx(scanIdx, dir, edgeIdx, i);
131
789k
        blockStrength[bsidx] = value;
132
789k
    }
133
163k
}
134
135
void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[])
136
82.3k
{
137
82.3k
    uint32_t log2TrSize = cu->m_log2CUSize[absPartIdx] - tuDepth;
138
82.3k
    if (cu->m_tuDepth[absPartIdx] > tuDepth)
139
302
    {
140
302
        uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE - 1) * 2;
141
1.51k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
142
1.20k
            setEdgefilterTU(cu, absPartIdx, tuDepth + 1, dir, blockStrength);
143
302
        return;
144
302
    }
145
146
82.0k
    uint32_t numUnits = 1 << (log2TrSize - LOG2_UNIT_SIZE);
147
82.0k
    setEdgefilterMultiple(absPartIdx, dir, 0, 2, blockStrength, numUnits);
148
82.0k
}
149
150
void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits)
151
81.1k
{
152
81.1k
    const uint32_t hNumUnits = numUnits >> 1;
153
81.1k
    const uint32_t qNumUnits = numUnits >> 2;
154
155
81.1k
    switch (cu->m_partSize[absPartIdx])
156
81.1k
    {
157
0
    case SIZE_2NxN:
158
0
        if (EDGE_HOR == dir)
159
0
            setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
160
0
        break;
161
0
    case SIZE_Nx2N:
162
0
        if (EDGE_VER == dir)
163
0
            setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
164
0
        break;
165
0
    case SIZE_NxN:
166
0
        setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
167
0
        break;
168
0
    case SIZE_2NxnU:
169
0
        if (EDGE_HOR == dir)
170
0
            setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
171
0
        break;
172
0
    case SIZE_nLx2N:
173
0
        if (EDGE_VER == dir)
174
0
            setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
175
0
        break;
176
0
    case SIZE_2NxnD:
177
0
        if (EDGE_HOR == dir)
178
0
            setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
179
0
        break;
180
0
    case SIZE_nRx2N:
181
0
        if (EDGE_VER == dir)
182
0
            setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
183
0
        break;
184
185
81.1k
    case SIZE_2Nx2N:
186
81.1k
    default:
187
81.1k
        break;
188
81.1k
    }
189
81.1k
}
190
191
uint8_t Deblock::getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[])
192
338k
{
193
    // Calculate block index
194
338k
    uint32_t partP;
195
338k
    const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
196
197
    // Set BS for Intra MB : BS = 2
198
338k
    if (cuP->isIntra(partP) || cuQ->isIntra(partQ))
199
338k
        return 2;
200
201
    // Set BS for not Intra MB : BS = 1 or 0
202
0
    if (blockStrength[partQ] > 1 &&
203
0
        (cuQ->getCbf(partQ, TEXT_LUMA, cuQ->m_tuDepth[partQ]) ||
204
0
         cuP->getCbf(partP, TEXT_LUMA, cuP->m_tuDepth[partP])))
205
0
        return 1;
206
207
0
    static const MV zeroMv(0, 0);
208
0
    const Slice* const sliceQ = cuQ->m_slice;
209
0
    const Slice* const sliceP = cuP->m_slice;
210
0
    const Frame* refP0 = (cuP->m_refIdx[0][partP] >= 0) ? sliceP->m_refFrameList[0][cuP->m_refIdx[0][partP]] : NULL;
211
0
    const Frame* refQ0 = (cuQ->m_refIdx[0][partQ] >= 0) ? sliceQ->m_refFrameList[0][cuQ->m_refIdx[0][partQ]] : NULL;
212
0
    const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv;
213
0
    const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv;
214
0
    if (sliceQ->isInterP() && sliceP->isInterP())
215
0
    {
216
0
        return ((refP0 != refQ0) ||
217
0
                (abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4)) ? 1 : 0;
218
0
    }
219
    // (sliceQ->isInterB() || sliceP->isInterB())
220
0
    const Frame* refP1 = (cuP->m_refIdx[1][partP] >= 0) ? sliceP->m_refFrameList[1][cuP->m_refIdx[1][partP]] : NULL;
221
0
    const Frame* refQ1 = (cuQ->m_refIdx[1][partQ] >= 0) ? sliceQ->m_refFrameList[1][cuQ->m_refIdx[1][partQ]] : NULL;
222
0
    const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv;
223
0
    const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv;
224
225
0
    if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0)))
226
0
    {
227
0
        if (refP0 != refP1) // Different L0 & L1
228
0
        {
229
0
            if (refP0 == refQ0)
230
0
                return ((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
231
0
                        (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) ? 1 : 0;
232
0
            else
233
0
                return ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
234
0
                        (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4)) ? 1 : 0;
235
0
        }
236
0
        else // Same L0 & L1
237
0
        {
238
0
            return (((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
239
0
                     (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) &&
240
0
                    ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
241
0
                     (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4))) ? 1 : 0;
242
0
        }
243
0
    }
244
        
245
    // for all different Ref_Idx
246
0
    return 1;
247
0
}
248
249
static inline int32_t calcDP(pixel* src, intptr_t offset)
250
501k
{
251
501k
    return abs(static_cast<int32_t>(src[-offset * 3]) - 2 * src[-offset * 2] + src[-offset]);
252
501k
}
253
254
static inline int32_t calcDQ(pixel* src, intptr_t offset)
255
501k
{
256
501k
    return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
257
501k
}
258
259
static inline bool useStrongFiltering(intptr_t offset, int32_t beta, int32_t tc, pixel* src)
260
365k
{
261
365k
    int16_t m4     = (int16_t)src[0];
262
365k
    int16_t m3     = (int16_t)src[-offset];
263
365k
    int16_t m7     = (int16_t)src[offset * 3];
264
365k
    int16_t m0     = (int16_t)src[-offset * 4];
265
365k
    int32_t strong = abs(m0 - m3) + abs(m7 - m4);
266
267
365k
    return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
268
365k
}
269
270
/* Deblocking for the luminance component with strong or weak filter
271
 * \param src     pointer to picture data
272
 * \param offset  offset value for picture data
273
 * \param tc      tc value
274
 * \param maskP   indicator to enable filtering on partP
275
 * \param maskQ   indicator to enable filtering on partQ
276
 * \param maskP1  decision weak filter/no filter for partP
277
 * \param maskQ1  decision weak filter/no filter for partQ */
278
static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ,
279
                                 int32_t maskP1, int32_t maskQ1)
280
7.84k
{
281
7.84k
    int32_t thrCut = tc * 10;
282
7.84k
    int32_t tc2 = tc >> 1;
283
7.84k
    maskP1 &= maskP;
284
7.84k
    maskQ1 &= maskQ;
285
286
39.2k
    for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
287
31.3k
    {
288
31.3k
        int16_t m4  = (int16_t)src[0];
289
31.3k
        int16_t m3  = (int16_t)src[-offset];
290
31.3k
        int16_t m5  = (int16_t)src[offset];
291
31.3k
        int16_t m2  = (int16_t)src[-offset * 2];
292
293
31.3k
        int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
294
295
31.3k
        if (abs(delta) < thrCut)
296
31.3k
        {
297
31.3k
            delta = x265_clip3(-tc, tc, delta);
298
299
31.3k
            src[-offset] = x265_clip(m3 + (delta & maskP));
300
31.3k
            src[0] = x265_clip(m4 - (delta & maskQ));
301
31.3k
            if (maskP1)
302
31.3k
            {
303
31.3k
                int16_t m1  = (int16_t)src[-offset * 3];
304
31.3k
                int32_t delta1 = x265_clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
305
31.3k
                src[-offset * 2] = x265_clip(m2 + delta1);
306
31.3k
            }
307
31.3k
            if (maskQ1)
308
31.3k
            {
309
31.3k
                int16_t m6  = (int16_t)src[offset * 2];
310
31.3k
                int32_t delta2 = x265_clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
311
31.3k
                src[offset] = x265_clip(m5 + delta2);
312
31.3k
            }
313
31.3k
        }
314
31.3k
    }
315
7.84k
}
316
317
void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
318
197k
{
319
197k
    PicYuv* reconPic = cuQ->m_encData->m_reconPic[0];
320
197k
    pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx);
321
197k
    intptr_t stride = reconPic->m_stride;
322
197k
    const PPS* pps = cuQ->m_slice->m_pps;
323
324
197k
    intptr_t offset, srcStep;
325
326
197k
    int32_t maskP = -1;
327
197k
    int32_t maskQ = -1;
328
197k
    int32_t betaOffset = pps->deblockingFilterBetaOffsetDiv2 << 1;
329
197k
    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
330
197k
    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
331
332
197k
    if (dir == EDGE_VER)
333
98.5k
    {
334
98.5k
        offset = 1;
335
98.5k
        srcStep = stride;
336
98.5k
        src += (edge << LOG2_UNIT_SIZE);
337
98.5k
    }
338
98.5k
    else // (dir == EDGE_HOR)
339
98.5k
    {
340
98.5k
        offset = stride;
341
98.5k
        srcStep = 1;
342
98.5k
        src += (edge << LOG2_UNIT_SIZE) * stride;
343
98.5k
    }
344
345
197k
    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth;
346
1.41M
    for (uint32_t idx = 0; idx < numUnits; idx++)
347
1.22M
    {
348
1.22M
        uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx);
349
1.22M
        uint32_t bs = blockStrength[partQ];
350
351
1.22M
        if (!bs)
352
882k
            continue;
353
354
        // Derive neighboring PU index
355
338k
        uint32_t partP;
356
338k
        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
357
358
338k
        if (bCheckNoFilter)
359
87.9k
        {
360
            // check if each of PUs is lossless coded
361
87.9k
            maskP = cuP->m_tqBypass[partP] - 1;
362
87.9k
            maskQ = cuQ->m_tqBypass[partQ] - 1;
363
87.9k
            if (!(maskP | maskQ))
364
87.9k
                continue;
365
87.9k
        }
366
367
250k
        int32_t qpQ = cuQ->m_qp[partQ];
368
250k
        int32_t qpP = cuP->m_qp[partP];
369
250k
        int32_t qp  = (qpP + qpQ + 1) >> 1;
370
371
250k
        int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
372
373
250k
        const int32_t bitdepthShift = X265_DEPTH - 8;
374
250k
        int32_t beta = s_betaTable[indexB] << bitdepthShift;
375
376
250k
        intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
377
250k
        int32_t dp0 = calcDP(src + unitOffset              , offset);
378
250k
        int32_t dq0 = calcDQ(src + unitOffset              , offset);
379
250k
        int32_t dp3 = calcDP(src + unitOffset + srcStep * 3, offset);
380
250k
        int32_t dq3 = calcDQ(src + unitOffset + srcStep * 3, offset);
381
250k
        int32_t d0 = dp0 + dq0;
382
250k
        int32_t d3 = dp3 + dq3;
383
384
250k
        int32_t d =  d0 + d3;
385
386
250k
        if (d >= beta)
387
63.7k
            continue;
388
389
186k
        int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
390
186k
        int32_t tc = s_tcTable[indexTC] << bitdepthShift;
391
392
186k
        bool sw = (2 * d0 < (beta >> 2) &&
393
186k
                   2 * d3 < (beta >> 2) &&
394
186k
                   useStrongFiltering(offset, beta, tc, src + unitOffset              ) &&
395
179k
                   useStrongFiltering(offset, beta, tc, src + unitOffset + srcStep * 3));
396
397
186k
        if (sw)
398
179k
        {
399
179k
            int32_t tc2 = 2 * tc;
400
179k
            int32_t tcP = (tc2 & maskP);
401
179k
            int32_t tcQ = (tc2 & maskQ);
402
179k
            primitives.pelFilterLumaStrong[dir](src + unitOffset, srcStep, offset, tcP, tcQ);
403
179k
        }
404
7.84k
        else
405
7.84k
        {
406
7.84k
            int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
407
7.84k
            int32_t dp = dp0 + dp3;
408
7.84k
            int32_t dq = dq0 + dq3;
409
7.84k
            int32_t maskP1 = (dp < sideThreshold ? -1 : 0);
410
7.84k
            int32_t maskQ1 = (dq < sideThreshold ? -1 : 0);
411
412
7.84k
            pelFilterLuma(src + unitOffset, srcStep, offset, tc, maskP, maskQ, maskP1, maskQ1);
413
7.84k
        }
414
186k
    }
415
197k
}
416
417
void Deblock::edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
418
104k
{
419
104k
    int32_t chFmt = cuQ->m_chromaFormat, chromaShift;
420
104k
    intptr_t offset, srcStep;
421
104k
    const PPS* pps = cuQ->m_slice->m_pps;
422
423
104k
    int32_t maskP = -1;
424
104k
    int32_t maskQ = -1;
425
104k
    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
426
427
104k
    X265_CHECK(((dir == EDGE_VER)
428
104k
                ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_hChromaShift)
429
104k
                : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0,
430
104k
               "invalid edge\n");
431
432
104k
    PicYuv* reconPic = cuQ->m_encData->m_reconPic[0];
433
104k
    intptr_t stride = reconPic->m_strideC;
434
104k
    intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx);
435
104k
    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
436
437
104k
    if (dir == EDGE_VER)
438
51.9k
    {
439
51.9k
        chromaShift = cuQ->m_vChromaShift;
440
51.9k
        srcOffset += (edge << (LOG2_UNIT_SIZE - cuQ->m_hChromaShift));
441
51.9k
        offset     = 1;
442
51.9k
        srcStep    = stride;
443
51.9k
    }
444
52.5k
    else // (dir == EDGE_HOR)
445
52.5k
    {
446
52.5k
        chromaShift = cuQ->m_hChromaShift;
447
52.5k
        srcOffset += edge * stride << (LOG2_UNIT_SIZE - cuQ->m_vChromaShift);
448
52.5k
        offset     = stride;
449
52.5k
        srcStep    = 1;
450
52.5k
    }
451
452
104k
    pixel* srcChroma[2];
453
104k
    srcChroma[0] = reconPic->m_picOrg[1] + srcOffset;
454
104k
    srcChroma[1] = reconPic->m_picOrg[2] + srcOffset;
455
456
104k
    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift);
457
415k
    for (uint32_t idx = 0; idx < numUnits; idx++)
458
311k
    {
459
311k
        uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx << chromaShift);
460
311k
        uint32_t bs = blockStrength[partQ];
461
462
311k
        if (bs <= 1)
463
148k
            continue;
464
465
        // Derive neighboring PU index
466
162k
        uint32_t partP;
467
162k
        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
468
469
162k
        if (bCheckNoFilter)
470
41.8k
        {
471
            // check if each of PUs is lossless coded
472
41.8k
            maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
473
41.8k
            maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
474
41.8k
            if (!(maskP | maskQ))
475
41.8k
                continue;
476
41.8k
        }
477
478
120k
        int32_t qpQ = cuQ->m_qp[partQ];
479
120k
        int32_t qpP = cuP->m_qp[partP];
480
120k
        int32_t qpA = (qpP + qpQ + 1) >> 1;
481
482
120k
        intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
483
361k
        for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
484
241k
        {
485
241k
            int32_t qp = qpA + pps->chromaQpOffset[chromaIdx];
486
241k
            if (qp >= 30)
487
111k
                qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, QP_MAX_SPEC);
488
489
241k
            int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
490
241k
            const int32_t bitdepthShift = X265_DEPTH - 8;
491
241k
            int32_t tc = s_tcTable[indexTC] << bitdepthShift;
492
241k
            pixel* srcC = srcChroma[chromaIdx];
493
494
241k
            primitives.pelFilterChroma[dir](srcC + unitOffset, srcStep, offset, tc, maskP, maskQ);
495
241k
        }
496
120k
    }
497
104k
}
498
499
const uint8_t Deblock::s_tcTable[54] =
500
{
501
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
502
    2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24
503
};
504
505
const uint8_t Deblock::s_betaTable[52] =
506
{
507
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
508
    18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
509
};
510