Coverage Report

Created: 2026-03-08 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/common/deblock.cpp
Line
Count
Source
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Author: Gopu Govindaswamy <gopu@multicorewareinc.com>
5
*         Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "deblock.h"
27
#include "framedata.h"
28
#include "picyuv.h"
29
#include "slice.h"
30
#include "mv.h"
31
32
using namespace X265_NS;
33
34
159k
#define DEBLOCK_SMALLEST_BLOCK  8
35
818k
#define DEFAULT_INTRA_TC_OFFSET 2
36
37
void Deblock::deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir)
38
27.4k
{
39
27.4k
    uint8_t blockStrength[MAX_NUM_PARTITIONS];
40
41
27.4k
    memset(blockStrength, 0, sizeof(uint8_t) * cuGeom.numPartitions);
42
43
27.4k
    deblockCU(ctu, cuGeom, dir, blockStrength);
44
27.4k
}
45
46
static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir)
47
79.7k
{
48
79.7k
    if (dir == Deblock::EDGE_VER)
49
39.8k
    {
50
39.8k
        if (cu->m_cuPelX + g_zscanToPelX[absPartIdx] > 0)
51
34.8k
        {
52
34.8k
            uint32_t    tempPartIdx;
53
34.8k
            const CUData* tempCU = cu->getPULeft(tempPartIdx, absPartIdx);
54
34.8k
            return tempCU ? 2 : 0;
55
34.8k
        }
56
39.8k
    }
57
39.8k
    else
58
39.8k
    {
59
39.8k
        if (cu->m_cuPelY + g_zscanToPelY[absPartIdx] > 0)
60
35.4k
        {
61
35.4k
            uint32_t    tempPartIdx;
62
35.4k
            const CUData* tempCU = cu->getPUAbove(tempPartIdx, absPartIdx);
63
35.4k
            return tempCU ? 2 : 0;
64
35.4k
        }
65
39.8k
    }
66
67
9.50k
    return 0;
68
79.7k
}
69
70
/* Deblocking filter process in CU-based (the same function as conventional's)
71
 * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
72
void Deblock::deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[])
73
111k
{
74
111k
    uint32_t absPartIdx = cuGeom.absPartIdx;
75
111k
    uint32_t depth = cuGeom.depth;
76
111k
    if (cu->m_predMode[absPartIdx] == MODE_NONE)
77
0
        return;
78
79
111k
    if (cu->m_cuDepth[absPartIdx] > depth)
80
31.8k
    {
81
159k
        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
82
127k
        {
83
127k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
84
127k
            if (childGeom.flags & CUGeom::PRESENT)
85
84.2k
                deblockCU(cu, childGeom, dir, blockStrength);
86
127k
        }
87
31.8k
        return;
88
31.8k
    }
89
90
79.7k
    uint32_t numUnits = 1 << (cuGeom.log2CUSize - LOG2_UNIT_SIZE);
91
79.7k
    setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits);
92
79.7k
    setEdgefilterTU(cu, absPartIdx, 0, dir, blockStrength);
93
79.7k
    setEdgefilterMultiple(absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits);
94
95
79.7k
    uint32_t numParts = cuGeom.numPartitions;
96
2.46M
    for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + numParts; partIdx++)
97
2.38M
    {
98
2.38M
        uint32_t bsCheck = !(partIdx & (1 << dir));
99
100
2.38M
        if (bsCheck && blockStrength[partIdx])
101
332k
            blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength);
102
2.38M
    }
103
104
79.7k
    const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
105
79.7k
    uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift;
106
79.7k
    uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
107
79.7k
    uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE;
108
        
109
273k
    for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
110
193k
    {
111
193k
        edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
112
193k
        if (!((e0 + e) & chromaMask) && cu->m_chromaFormat != X265_CSP_I400)
113
102k
            edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength);
114
193k
    }
115
79.7k
}
116
117
static inline uint32_t calcBsIdx(uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
118
2.27M
{
119
2.27M
    if (dir)
120
1.13M
        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (edgeIdx << LOG2_RASTER_SIZE) + baseUnitIdx];
121
1.13M
    else
122
1.13M
        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (baseUnitIdx << LOG2_RASTER_SIZE) + edgeIdx];
123
2.27M
}
124
125
void Deblock::setEdgefilterMultiple(uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits)
126
160k
{
127
160k
    X265_CHECK(numUnits > 0, "numUnits edge filter check\n");
128
935k
    for (uint32_t i = 0; i < numUnits; i++)
129
775k
    {
130
775k
        const uint32_t bsidx = calcBsIdx(scanIdx, dir, edgeIdx, i);
131
775k
        blockStrength[bsidx] = value;
132
775k
    }
133
160k
}
134
135
void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[])
136
81.1k
{
137
81.1k
    uint32_t log2TrSize = cu->m_log2CUSize[absPartIdx] - tuDepth;
138
81.1k
    if (cu->m_tuDepth[absPartIdx] > tuDepth)
139
330
    {
140
330
        uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE - 1) * 2;
141
1.65k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
142
1.32k
            setEdgefilterTU(cu, absPartIdx, tuDepth + 1, dir, blockStrength);
143
330
        return;
144
330
    }
145
146
80.7k
    uint32_t numUnits = 1 << (log2TrSize - LOG2_UNIT_SIZE);
147
80.7k
    setEdgefilterMultiple(absPartIdx, dir, 0, 2, blockStrength, numUnits);
148
80.7k
}
149
150
void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits)
151
79.7k
{
152
79.7k
    const uint32_t hNumUnits = numUnits >> 1;
153
79.7k
    const uint32_t qNumUnits = numUnits >> 2;
154
155
79.7k
    switch (cu->m_partSize[absPartIdx])
156
79.7k
    {
157
0
    case SIZE_2NxN:
158
0
        if (EDGE_HOR == dir)
159
0
            setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
160
0
        break;
161
0
    case SIZE_Nx2N:
162
0
        if (EDGE_VER == dir)
163
0
            setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
164
0
        break;
165
0
    case SIZE_NxN:
166
0
        setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
167
0
        break;
168
0
    case SIZE_2NxnU:
169
0
        if (EDGE_HOR == dir)
170
0
            setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
171
0
        break;
172
0
    case SIZE_nLx2N:
173
0
        if (EDGE_VER == dir)
174
0
            setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
175
0
        break;
176
0
    case SIZE_2NxnD:
177
0
        if (EDGE_HOR == dir)
178
0
            setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
179
0
        break;
180
0
    case SIZE_nRx2N:
181
0
        if (EDGE_VER == dir)
182
0
            setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
183
0
        break;
184
185
79.7k
    case SIZE_2Nx2N:
186
79.7k
    default:
187
79.7k
        break;
188
79.7k
    }
189
79.7k
}
190
191
uint8_t Deblock::getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[])
192
332k
{
193
    // Calculate block index
194
332k
    uint32_t partP;
195
332k
    const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
196
197
    // Set BS for Intra MB : BS = 2
198
332k
    if (cuP->isIntra(partP) || cuQ->isIntra(partQ))
199
332k
        return 2;
200
201
    // Set BS for not Intra MB : BS = 1 or 0
202
0
    if (blockStrength[partQ] > 1 &&
203
0
        (cuQ->getCbf(partQ, TEXT_LUMA, cuQ->m_tuDepth[partQ]) ||
204
0
         cuP->getCbf(partP, TEXT_LUMA, cuP->m_tuDepth[partP])))
205
0
        return 1;
206
207
0
    static const MV zeroMv(0, 0);
208
0
    const Slice* const sliceQ = cuQ->m_slice;
209
0
    const Slice* const sliceP = cuP->m_slice;
210
0
    const Frame* refP0 = (cuP->m_refIdx[0][partP] >= 0) ? sliceP->m_refFrameList[0][cuP->m_refIdx[0][partP]] : NULL;
211
0
    const Frame* refQ0 = (cuQ->m_refIdx[0][partQ] >= 0) ? sliceQ->m_refFrameList[0][cuQ->m_refIdx[0][partQ]] : NULL;
212
0
    const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv;
213
0
    const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv;
214
0
    if (sliceQ->isInterP() && sliceP->isInterP())
215
0
    {
216
0
        return ((refP0 != refQ0) ||
217
0
                (abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4)) ? 1 : 0;
218
0
    }
219
    // (sliceQ->isInterB() || sliceP->isInterB())
220
0
    const Frame* refP1 = (cuP->m_refIdx[1][partP] >= 0) ? sliceP->m_refFrameList[1][cuP->m_refIdx[1][partP]] : NULL;
221
0
    const Frame* refQ1 = (cuQ->m_refIdx[1][partQ] >= 0) ? sliceQ->m_refFrameList[1][cuQ->m_refIdx[1][partQ]] : NULL;
222
0
    const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv;
223
0
    const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv;
224
225
0
    if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0)))
226
0
    {
227
0
        if (refP0 != refP1) // Different L0 & L1
228
0
        {
229
0
            if (refP0 == refQ0)
230
0
                return ((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
231
0
                        (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) ? 1 : 0;
232
0
            else
233
0
                return ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
234
0
                        (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4)) ? 1 : 0;
235
0
        }
236
0
        else // Same L0 & L1
237
0
        {
238
0
            return (((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
239
0
                     (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) &&
240
0
                    ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
241
0
                     (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4))) ? 1 : 0;
242
0
        }
243
0
    }
244
        
245
    // for all different Ref_Idx
246
0
    return 1;
247
0
}
248
249
static inline int32_t calcDP(pixel* src, intptr_t offset)
250
471k
{
251
471k
    return abs(static_cast<int32_t>(src[-offset * 3]) - 2 * src[-offset * 2] + src[-offset]);
252
471k
}
253
254
static inline int32_t calcDQ(pixel* src, intptr_t offset)
255
471k
{
256
471k
    return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
257
471k
}
258
259
static inline bool useStrongFiltering(intptr_t offset, int32_t beta, int32_t tc, pixel* src)
260
357k
{
261
357k
    int16_t m4     = (int16_t)src[0];
262
357k
    int16_t m3     = (int16_t)src[-offset];
263
357k
    int16_t m7     = (int16_t)src[offset * 3];
264
357k
    int16_t m0     = (int16_t)src[-offset * 4];
265
357k
    int32_t strong = abs(m0 - m3) + abs(m7 - m4);
266
267
357k
    return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
268
357k
}
269
270
/* Deblocking for the luminance component with strong or weak filter
271
 * \param src     pointer to picture data
272
 * \param offset  offset value for picture data
273
 * \param tc      tc value
274
 * \param maskP   indicator to enable filtering on partP
275
 * \param maskQ   indicator to enable filtering on partQ
276
 * \param maskP1  decision weak filter/no filter for partP
277
 * \param maskQ1  decision weak filter/no filter for partQ */
278
static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ,
279
                                 int32_t maskP1, int32_t maskQ1)
280
6.96k
{
281
6.96k
    int32_t thrCut = tc * 10;
282
6.96k
    int32_t tc2 = tc >> 1;
283
6.96k
    maskP1 &= maskP;
284
6.96k
    maskQ1 &= maskQ;
285
286
34.8k
    for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
287
27.8k
    {
288
27.8k
        int16_t m4  = (int16_t)src[0];
289
27.8k
        int16_t m3  = (int16_t)src[-offset];
290
27.8k
        int16_t m5  = (int16_t)src[offset];
291
27.8k
        int16_t m2  = (int16_t)src[-offset * 2];
292
293
27.8k
        int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
294
295
27.8k
        if (abs(delta) < thrCut)
296
27.8k
        {
297
27.8k
            delta = x265_clip3(-tc, tc, delta);
298
299
27.8k
            src[-offset] = x265_clip(m3 + (delta & maskP));
300
27.8k
            src[0] = x265_clip(m4 - (delta & maskQ));
301
27.8k
            if (maskP1)
302
27.8k
            {
303
27.8k
                int16_t m1  = (int16_t)src[-offset * 3];
304
27.8k
                int32_t delta1 = x265_clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
305
27.8k
                src[-offset * 2] = x265_clip(m2 + delta1);
306
27.8k
            }
307
27.8k
            if (maskQ1)
308
27.8k
            {
309
27.8k
                int16_t m6  = (int16_t)src[offset * 2];
310
27.8k
                int32_t delta2 = x265_clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
311
27.8k
                src[offset] = x265_clip(m5 + delta2);
312
27.8k
            }
313
27.8k
        }
314
27.8k
    }
315
6.96k
}
316
317
void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
318
193k
{
319
193k
    PicYuv* reconPic = cuQ->m_encData->m_reconPic[0];
320
193k
    pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx);
321
193k
    intptr_t stride = reconPic->m_stride;
322
193k
    const PPS* pps = cuQ->m_slice->m_pps;
323
324
193k
    intptr_t offset, srcStep;
325
326
193k
    int32_t maskP = -1;
327
193k
    int32_t maskQ = -1;
328
193k
    int32_t betaOffset = pps->deblockingFilterBetaOffsetDiv2 << 1;
329
193k
    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
330
193k
    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
331
332
193k
    if (dir == EDGE_VER)
333
96.7k
    {
334
96.7k
        offset = 1;
335
96.7k
        srcStep = stride;
336
96.7k
        src += (edge << LOG2_UNIT_SIZE);
337
96.7k
    }
338
96.7k
    else // (dir == EDGE_HOR)
339
96.7k
    {
340
96.7k
        offset = stride;
341
96.7k
        srcStep = 1;
342
96.7k
        src += (edge << LOG2_UNIT_SIZE) * stride;
343
96.7k
    }
344
345
193k
    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth;
346
1.38M
    for (uint32_t idx = 0; idx < numUnits; idx++)
347
1.19M
    {
348
1.19M
        uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx);
349
1.19M
        uint32_t bs = blockStrength[partQ];
350
351
1.19M
        if (!bs)
352
861k
            continue;
353
354
        // Derive neighboring PU index
355
332k
        uint32_t partP;
356
332k
        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
357
358
332k
        if (bCheckNoFilter)
359
96.5k
        {
360
            // check if each of PUs is lossless coded
361
96.5k
            maskP = cuP->m_tqBypass[partP] - 1;
362
96.5k
            maskQ = cuQ->m_tqBypass[partQ] - 1;
363
96.5k
            if (!(maskP | maskQ))
364
96.5k
                continue;
365
96.5k
        }
366
367
235k
        int32_t qpQ = cuQ->m_qp[partQ];
368
235k
        int32_t qpP = cuP->m_qp[partP];
369
235k
        int32_t qp  = (qpP + qpQ + 1) >> 1;
370
371
235k
        int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
372
373
235k
        const int32_t bitdepthShift = X265_DEPTH - 8;
374
235k
        int32_t beta = s_betaTable[indexB] << bitdepthShift;
375
376
235k
        intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
377
235k
        int32_t dp0 = calcDP(src + unitOffset              , offset);
378
235k
        int32_t dq0 = calcDQ(src + unitOffset              , offset);
379
235k
        int32_t dp3 = calcDP(src + unitOffset + srcStep * 3, offset);
380
235k
        int32_t dq3 = calcDQ(src + unitOffset + srcStep * 3, offset);
381
235k
        int32_t d0 = dp0 + dq0;
382
235k
        int32_t d3 = dp3 + dq3;
383
384
235k
        int32_t d =  d0 + d3;
385
386
235k
        if (d >= beta)
387
53.3k
            continue;
388
389
182k
        int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
390
182k
        int32_t tc = s_tcTable[indexTC] << bitdepthShift;
391
392
182k
        bool sw = (2 * d0 < (beta >> 2) &&
393
182k
                   2 * d3 < (beta >> 2) &&
394
182k
                   useStrongFiltering(offset, beta, tc, src + unitOffset              ) &&
395
175k
                   useStrongFiltering(offset, beta, tc, src + unitOffset + srcStep * 3));
396
397
182k
        if (sw)
398
175k
        {
399
175k
            int32_t tc2 = 2 * tc;
400
175k
            int32_t tcP = (tc2 & maskP);
401
175k
            int32_t tcQ = (tc2 & maskQ);
402
175k
            primitives.pelFilterLumaStrong[dir](src + unitOffset, srcStep, offset, tcP, tcQ);
403
175k
        }
404
6.96k
        else
405
6.96k
        {
406
6.96k
            int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
407
6.96k
            int32_t dp = dp0 + dp3;
408
6.96k
            int32_t dq = dq0 + dq3;
409
6.96k
            int32_t maskP1 = (dp < sideThreshold ? -1 : 0);
410
6.96k
            int32_t maskQ1 = (dq < sideThreshold ? -1 : 0);
411
412
6.96k
            pelFilterLuma(src + unitOffset, srcStep, offset, tc, maskP, maskQ, maskP1, maskQ1);
413
6.96k
        }
414
182k
    }
415
193k
}
416
417
void Deblock::edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
418
102k
{
419
102k
    int32_t chFmt = cuQ->m_chromaFormat, chromaShift;
420
102k
    intptr_t offset, srcStep;
421
102k
    const PPS* pps = cuQ->m_slice->m_pps;
422
423
102k
    int32_t maskP = -1;
424
102k
    int32_t maskQ = -1;
425
102k
    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
426
427
102k
    X265_CHECK(((dir == EDGE_VER)
428
102k
                ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_hChromaShift)
429
102k
                : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0,
430
102k
               "invalid edge\n");
431
432
102k
    PicYuv* reconPic = cuQ->m_encData->m_reconPic[0];
433
102k
    intptr_t stride = reconPic->m_strideC;
434
102k
    intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx);
435
102k
    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
436
437
102k
    if (dir == EDGE_VER)
438
51.2k
    {
439
51.2k
        chromaShift = cuQ->m_vChromaShift;
440
51.2k
        srcOffset += (edge << (LOG2_UNIT_SIZE - cuQ->m_hChromaShift));
441
51.2k
        offset     = 1;
442
51.2k
        srcStep    = stride;
443
51.2k
    }
444
51.1k
    else // (dir == EDGE_HOR)
445
51.1k
    {
446
51.1k
        chromaShift = cuQ->m_hChromaShift;
447
51.1k
        srcOffset += edge * stride << (LOG2_UNIT_SIZE - cuQ->m_vChromaShift);
448
51.1k
        offset     = stride;
449
51.1k
        srcStep    = 1;
450
51.1k
    }
451
452
102k
    pixel* srcChroma[2];
453
102k
    srcChroma[0] = reconPic->m_picOrg[1] + srcOffset;
454
102k
    srcChroma[1] = reconPic->m_picOrg[2] + srcOffset;
455
456
102k
    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift);
457
406k
    for (uint32_t idx = 0; idx < numUnits; idx++)
458
304k
    {
459
304k
        uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx << chromaShift);
460
304k
        uint32_t bs = blockStrength[partQ];
461
462
304k
        if (bs <= 1)
463
144k
            continue;
464
465
        // Derive neighboring PU index
466
159k
        uint32_t partP;
467
159k
        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
468
469
159k
        if (bCheckNoFilter)
470
45.8k
        {
471
            // check if each of PUs is lossless coded
472
45.8k
            maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
473
45.8k
            maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
474
45.8k
            if (!(maskP | maskQ))
475
45.8k
                continue;
476
45.8k
        }
477
478
113k
        int32_t qpQ = cuQ->m_qp[partQ];
479
113k
        int32_t qpP = cuP->m_qp[partP];
480
113k
        int32_t qpA = (qpP + qpQ + 1) >> 1;
481
482
113k
        intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
483
340k
        for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
484
227k
        {
485
227k
            int32_t qp = qpA + pps->chromaQpOffset[chromaIdx];
486
227k
            if (qp >= 30)
487
119k
                qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, QP_MAX_SPEC);
488
489
227k
            int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
490
227k
            const int32_t bitdepthShift = X265_DEPTH - 8;
491
227k
            int32_t tc = s_tcTable[indexTC] << bitdepthShift;
492
227k
            pixel* srcC = srcChroma[chromaIdx];
493
494
227k
            primitives.pelFilterChroma[dir](srcC + unitOffset, srcStep, offset, tc, maskP, maskQ);
495
227k
        }
496
113k
    }
497
102k
}
498
499
const uint8_t Deblock::s_tcTable[54] =
500
{
501
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
502
    2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24
503
};
504
505
const uint8_t Deblock::s_betaTable[52] =
506
{
507
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
508
    18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
509
};
510