Coverage Report

Created: 2022-08-24 06:17

/src/x265/source/common/deblock.cpp
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Author: Gopu Govindaswamy <gopu@multicorewareinc.com>
5
*         Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "deblock.h"
27
#include "framedata.h"
28
#include "picyuv.h"
29
#include "slice.h"
30
#include "mv.h"
31
32
using namespace X265_NS;
33
34
172k
#define DEBLOCK_SMALLEST_BLOCK  8
35
903k
#define DEFAULT_INTRA_TC_OFFSET 2
36
37
void Deblock::deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir)
38
27.9k
{
39
27.9k
    uint8_t blockStrength[MAX_NUM_PARTITIONS];
40
41
27.9k
    memset(blockStrength, 0, sizeof(uint8_t) * cuGeom.numPartitions);
42
43
27.9k
    deblockCU(ctu, cuGeom, dir, blockStrength);
44
27.9k
}
45
46
static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir)
47
86.3k
{
48
86.3k
    if (dir == Deblock::EDGE_VER)
49
43.1k
    {
50
43.1k
        if (cu->m_cuPelX + g_zscanToPelX[absPartIdx] > 0)
51
37.9k
        {
52
37.9k
            uint32_t    tempPartIdx;
53
37.9k
            const CUData* tempCU = cu->getPULeft(tempPartIdx, absPartIdx);
54
37.9k
            return tempCU ? 2 : 0;
55
37.9k
        }
56
43.1k
    }
57
43.1k
    else
58
43.1k
    {
59
43.1k
        if (cu->m_cuPelY + g_zscanToPelY[absPartIdx] > 0)
60
38.1k
        {
61
38.1k
            uint32_t    tempPartIdx;
62
38.1k
            const CUData* tempCU = cu->getPUAbove(tempPartIdx, absPartIdx);
63
38.1k
            return tempCU ? 2 : 0;
64
38.1k
        }
65
43.1k
    }
66
67
10.2k
    return 0;
68
86.3k
}
69
70
/* Deblocking filter process in CU-based (the same function as conventional's)
71
 * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
72
void Deblock::deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[])
73
120k
{
74
120k
    uint32_t absPartIdx = cuGeom.absPartIdx;
75
120k
    uint32_t depth = cuGeom.depth;
76
120k
    if (cu->m_predMode[absPartIdx] == MODE_NONE)
77
0
        return;
78
79
120k
    if (cu->m_cuDepth[absPartIdx] > depth)
80
34.6k
    {
81
173k
        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
82
138k
        {
83
138k
            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
84
138k
            if (childGeom.flags & CUGeom::PRESENT)
85
92.9k
                deblockCU(cu, childGeom, dir, blockStrength);
86
138k
        }
87
34.6k
        return;
88
34.6k
    }
89
90
86.3k
    uint32_t numUnits = 1 << (cuGeom.log2CUSize - LOG2_UNIT_SIZE);
91
86.3k
    setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits);
92
86.3k
    setEdgefilterTU(cu, absPartIdx, 0, dir, blockStrength);
93
86.3k
    setEdgefilterMultiple(absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits);
94
95
86.3k
    uint32_t numParts = cuGeom.numPartitions;
96
2.79M
    for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + numParts; partIdx++)
97
2.70M
    {
98
2.70M
        uint32_t bsCheck = !(partIdx & (1 << dir));
99
100
2.70M
        if (bsCheck && blockStrength[partIdx])
101
368k
            blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength);
102
2.70M
    }
103
104
86.3k
    const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
105
86.3k
    uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift;
106
86.3k
    uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
107
86.3k
    uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE;
108
        
109
300k
    for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
110
214k
    {
111
214k
        edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
112
214k
        if (!((e0 + e) & chromaMask) && cu->m_chromaFormat != X265_CSP_I400)
113
113k
            edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength);
114
214k
    }
115
86.3k
}
116
117
static inline uint32_t calcBsIdx(uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
118
2.55M
{
119
2.55M
    if (dir)
120
1.27M
        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (edgeIdx << LOG2_RASTER_SIZE) + baseUnitIdx];
121
1.27M
    else
122
1.27M
        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (baseUnitIdx << LOG2_RASTER_SIZE) + edgeIdx];
123
2.55M
}
124
125
void Deblock::setEdgefilterMultiple(uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits)
126
173k
{
127
173k
    X265_CHECK(numUnits > 0, "numUnits edge filter check\n");
128
1.03M
    for (uint32_t i = 0; i < numUnits; i++)
129
858k
    {
130
858k
        const uint32_t bsidx = calcBsIdx(scanIdx, dir, edgeIdx, i);
131
858k
        blockStrength[bsidx] = value;
132
858k
    }
133
173k
}
134
135
void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[])
136
87.9k
{
137
87.9k
    uint32_t log2TrSize = cu->m_log2CUSize[absPartIdx] - tuDepth;
138
87.9k
    if (cu->m_tuDepth[absPartIdx] > tuDepth)
139
390
    {
140
390
        uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE - 1) * 2;
141
1.95k
        for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
142
1.56k
            setEdgefilterTU(cu, absPartIdx, tuDepth + 1, dir, blockStrength);
143
390
        return;
144
390
    }
145
146
87.5k
    uint32_t numUnits = 1 << (log2TrSize - LOG2_UNIT_SIZE);
147
87.5k
    setEdgefilterMultiple(absPartIdx, dir, 0, 2, blockStrength, numUnits);
148
87.5k
}
149
150
void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits)
151
86.3k
{
152
86.3k
    const uint32_t hNumUnits = numUnits >> 1;
153
86.3k
    const uint32_t qNumUnits = numUnits >> 2;
154
155
86.3k
    switch (cu->m_partSize[absPartIdx])
156
86.3k
    {
157
0
    case SIZE_2NxN:
158
0
        if (EDGE_HOR == dir)
159
0
            setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
160
0
        break;
161
0
    case SIZE_Nx2N:
162
0
        if (EDGE_VER == dir)
163
0
            setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
164
0
        break;
165
0
    case SIZE_NxN:
166
0
        setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
167
0
        break;
168
0
    case SIZE_2NxnU:
169
0
        if (EDGE_HOR == dir)
170
0
            setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
171
0
        break;
172
0
    case SIZE_nLx2N:
173
0
        if (EDGE_VER == dir)
174
0
            setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
175
0
        break;
176
0
    case SIZE_2NxnD:
177
0
        if (EDGE_HOR == dir)
178
0
            setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
179
0
        break;
180
0
    case SIZE_nRx2N:
181
0
        if (EDGE_VER == dir)
182
0
            setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
183
0
        break;
184
185
86.3k
    case SIZE_2Nx2N:
186
86.3k
    default:
187
86.3k
        break;
188
86.3k
    }
189
86.3k
}
190
191
uint8_t Deblock::getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[])
192
368k
{
193
    // Calculate block index
194
368k
    uint32_t partP;
195
368k
    const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
196
197
    // Set BS for Intra MB : BS = 2
198
368k
    if (cuP->isIntra(partP) || cuQ->isIntra(partQ))
199
368k
        return 2;
200
201
    // Set BS for not Intra MB : BS = 1 or 0
202
0
    if (blockStrength[partQ] > 1 &&
203
0
        (cuQ->getCbf(partQ, TEXT_LUMA, cuQ->m_tuDepth[partQ]) ||
204
0
         cuP->getCbf(partP, TEXT_LUMA, cuP->m_tuDepth[partP])))
205
0
        return 1;
206
207
0
    static const MV zeroMv(0, 0);
208
0
    const Slice* const sliceQ = cuQ->m_slice;
209
0
    const Slice* const sliceP = cuP->m_slice;
210
0
    const Frame* refP0 = (cuP->m_refIdx[0][partP] >= 0) ? sliceP->m_refFrameList[0][cuP->m_refIdx[0][partP]] : NULL;
211
0
    const Frame* refQ0 = (cuQ->m_refIdx[0][partQ] >= 0) ? sliceQ->m_refFrameList[0][cuQ->m_refIdx[0][partQ]] : NULL;
212
0
    const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv;
213
0
    const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv;
214
0
    if (sliceQ->isInterP() && sliceP->isInterP())
215
0
    {
216
0
        return ((refP0 != refQ0) ||
217
0
                (abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4)) ? 1 : 0;
218
0
    }
219
    // (sliceQ->isInterB() || sliceP->isInterB())
220
0
    const Frame* refP1 = (cuP->m_refIdx[1][partP] >= 0) ? sliceP->m_refFrameList[1][cuP->m_refIdx[1][partP]] : NULL;
221
0
    const Frame* refQ1 = (cuQ->m_refIdx[1][partQ] >= 0) ? sliceQ->m_refFrameList[1][cuQ->m_refIdx[1][partQ]] : NULL;
222
0
    const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv;
223
0
    const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv;
224
225
0
    if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0)))
226
0
    {
227
0
        if (refP0 != refP1) // Different L0 & L1
228
0
        {
229
0
            if (refP0 == refQ0)
230
0
                return ((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
231
0
                        (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) ? 1 : 0;
232
0
            else
233
0
                return ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
234
0
                        (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4)) ? 1 : 0;
235
0
        }
236
0
        else // Same L0 & L1
237
0
        {
238
0
            return (((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
239
0
                     (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) &&
240
0
                    ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
241
0
                     (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4))) ? 1 : 0;
242
0
        }
243
0
    }
244
        
245
    // for all different Ref_Idx
246
0
    return 1;
247
0
}
248
249
static inline int32_t calcDP(pixel* src, intptr_t offset)
250
509k
{
251
509k
    return abs(static_cast<int32_t>(src[-offset * 3]) - 2 * src[-offset * 2] + src[-offset]);
252
509k
}
253
254
static inline int32_t calcDQ(pixel* src, intptr_t offset)
255
509k
{
256
509k
    return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
257
509k
}
258
259
static inline bool useStrongFiltering(intptr_t offset, int32_t beta, int32_t tc, pixel* src)
260
405k
{
261
405k
    int16_t m4     = (int16_t)src[0];
262
405k
    int16_t m3     = (int16_t)src[-offset];
263
405k
    int16_t m7     = (int16_t)src[offset * 3];
264
405k
    int16_t m0     = (int16_t)src[-offset * 4];
265
405k
    int32_t strong = abs(m0 - m3) + abs(m7 - m4);
266
267
405k
    return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
268
405k
}
269
270
/* Deblocking for the luminance component with strong or weak filter
271
 * \param src     pointer to picture data
272
 * \param offset  offset value for picture data
273
 * \param tc      tc value
274
 * \param maskP   indicator to enable filtering on partP
275
 * \param maskQ   indicator to enable filtering on partQ
276
 * \param maskP1  decision weak filter/no filter for partP
277
 * \param maskQ1  decision weak filter/no filter for partQ */
278
static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ,
279
                                 int32_t maskP1, int32_t maskQ1)
280
6.41k
{
281
6.41k
    int32_t thrCut = tc * 10;
282
6.41k
    int32_t tc2 = tc >> 1;
283
6.41k
    maskP1 &= maskP;
284
6.41k
    maskQ1 &= maskQ;
285
286
32.0k
    for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
287
25.6k
    {
288
25.6k
        int16_t m4  = (int16_t)src[0];
289
25.6k
        int16_t m3  = (int16_t)src[-offset];
290
25.6k
        int16_t m5  = (int16_t)src[offset];
291
25.6k
        int16_t m2  = (int16_t)src[-offset * 2];
292
293
25.6k
        int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
294
295
25.6k
        if (abs(delta) < thrCut)
296
25.6k
        {
297
25.6k
            delta = x265_clip3(-tc, tc, delta);
298
299
25.6k
            src[-offset] = x265_clip(m3 + (delta & maskP));
300
25.6k
            src[0] = x265_clip(m4 - (delta & maskQ));
301
25.6k
            if (maskP1)
302
25.6k
            {
303
25.6k
                int16_t m1  = (int16_t)src[-offset * 3];
304
25.6k
                int32_t delta1 = x265_clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
305
25.6k
                src[-offset * 2] = x265_clip(m2 + delta1);
306
25.6k
            }
307
25.6k
            if (maskQ1)
308
25.6k
            {
309
25.6k
                int16_t m6  = (int16_t)src[offset * 2];
310
25.6k
                int32_t delta2 = x265_clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
311
25.6k
                src[offset] = x265_clip(m5 + delta2);
312
25.6k
            }
313
25.6k
        }
314
25.6k
    }
315
6.41k
}
316
317
void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
318
214k
{
319
214k
    PicYuv* reconPic = cuQ->m_encData->m_reconPic;
320
214k
    pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx);
321
214k
    intptr_t stride = reconPic->m_stride;
322
214k
    const PPS* pps = cuQ->m_slice->m_pps;
323
324
214k
    intptr_t offset, srcStep;
325
326
214k
    int32_t maskP = -1;
327
214k
    int32_t maskQ = -1;
328
214k
    int32_t betaOffset = pps->deblockingFilterBetaOffsetDiv2 << 1;
329
214k
    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
330
214k
    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
331
332
214k
    if (dir == EDGE_VER)
333
107k
    {
334
107k
        offset = 1;
335
107k
        srcStep = stride;
336
107k
        src += (edge << LOG2_UNIT_SIZE);
337
107k
    }
338
107k
    else // (dir == EDGE_HOR)
339
107k
    {
340
107k
        offset = stride;
341
107k
        srcStep = 1;
342
107k
        src += (edge << LOG2_UNIT_SIZE) * stride;
343
107k
    }
344
345
214k
    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth;
346
1.56M
    for (uint32_t idx = 0; idx < numUnits; idx++)
347
1.35M
    {
348
1.35M
        uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx);
349
1.35M
        uint32_t bs = blockStrength[partQ];
350
351
1.35M
        if (!bs)
352
985k
            continue;
353
354
        // Derive neighboring PU index
355
368k
        uint32_t partP;
356
368k
        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
357
358
368k
        if (bCheckNoFilter)
359
113k
        {
360
            // check if each of PUs is lossless coded
361
113k
            maskP = cuP->m_tqBypass[partP] - 1;
362
113k
            maskQ = cuQ->m_tqBypass[partQ] - 1;
363
113k
            if (!(maskP | maskQ))
364
113k
                continue;
365
113k
        }
366
367
254k
        int32_t qpQ = cuQ->m_qp[partQ];
368
254k
        int32_t qpP = cuP->m_qp[partP];
369
254k
        int32_t qp  = (qpP + qpQ + 1) >> 1;
370
371
254k
        int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
372
373
254k
        const int32_t bitdepthShift = X265_DEPTH - 8;
374
254k
        int32_t beta = s_betaTable[indexB] << bitdepthShift;
375
376
254k
        intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
377
254k
        int32_t dp0 = calcDP(src + unitOffset              , offset);
378
254k
        int32_t dq0 = calcDQ(src + unitOffset              , offset);
379
254k
        int32_t dp3 = calcDP(src + unitOffset + srcStep * 3, offset);
380
254k
        int32_t dq3 = calcDQ(src + unitOffset + srcStep * 3, offset);
381
254k
        int32_t d0 = dp0 + dq0;
382
254k
        int32_t d3 = dp3 + dq3;
383
384
254k
        int32_t d =  d0 + d3;
385
386
254k
        if (d >= beta)
387
48.8k
            continue;
388
389
205k
        int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
390
205k
        int32_t tc = s_tcTable[indexTC] << bitdepthShift;
391
392
205k
        bool sw = (2 * d0 < (beta >> 2) &&
393
205k
                   2 * d3 < (beta >> 2) &&
394
205k
                   useStrongFiltering(offset, beta, tc, src + unitOffset              ) &&
395
205k
                   useStrongFiltering(offset, beta, tc, src + unitOffset + srcStep * 3));
396
397
205k
        if (sw)
398
199k
        {
399
199k
            int32_t tc2 = 2 * tc;
400
199k
            int32_t tcP = (tc2 & maskP);
401
199k
            int32_t tcQ = (tc2 & maskQ);
402
199k
            primitives.pelFilterLumaStrong[dir](src + unitOffset, srcStep, offset, tcP, tcQ);
403
199k
        }
404
6.41k
        else
405
6.41k
        {
406
6.41k
            int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
407
6.41k
            int32_t dp = dp0 + dp3;
408
6.41k
            int32_t dq = dq0 + dq3;
409
6.41k
            int32_t maskP1 = (dp < sideThreshold ? -1 : 0);
410
6.41k
            int32_t maskQ1 = (dq < sideThreshold ? -1 : 0);
411
412
6.41k
            pelFilterLuma(src + unitOffset, srcStep, offset, tc, maskP, maskQ, maskP1, maskQ1);
413
6.41k
        }
414
205k
    }
415
214k
}
416
417
void Deblock::edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
418
113k
{
419
113k
    int32_t chFmt = cuQ->m_chromaFormat, chromaShift;
420
113k
    intptr_t offset, srcStep;
421
113k
    const PPS* pps = cuQ->m_slice->m_pps;
422
423
113k
    int32_t maskP = -1;
424
113k
    int32_t maskQ = -1;
425
113k
    int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
426
427
113k
    X265_CHECK(((dir == EDGE_VER)
428
113k
                ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_hChromaShift)
429
113k
                : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0,
430
113k
               "invalid edge\n");
431
432
113k
    PicYuv* reconPic = cuQ->m_encData->m_reconPic;
433
113k
    intptr_t stride = reconPic->m_strideC;
434
113k
    intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx);
435
113k
    bool bCheckNoFilter = pps->bTransquantBypassEnabled;
436
437
113k
    if (dir == EDGE_VER)
438
56.5k
    {
439
56.5k
        chromaShift = cuQ->m_vChromaShift;
440
56.5k
        srcOffset += (edge << (LOG2_UNIT_SIZE - cuQ->m_hChromaShift));
441
56.5k
        offset     = 1;
442
56.5k
        srcStep    = stride;
443
56.5k
    }
444
56.6k
    else // (dir == EDGE_HOR)
445
56.6k
    {
446
56.6k
        chromaShift = cuQ->m_hChromaShift;
447
56.6k
        srcOffset += edge * stride << (LOG2_UNIT_SIZE - cuQ->m_vChromaShift);
448
56.6k
        offset     = stride;
449
56.6k
        srcStep    = 1;
450
56.6k
    }
451
452
113k
    pixel* srcChroma[2];
453
113k
    srcChroma[0] = reconPic->m_picOrg[1] + srcOffset;
454
113k
    srcChroma[1] = reconPic->m_picOrg[2] + srcOffset;
455
456
113k
    uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift);
457
457k
    for (uint32_t idx = 0; idx < numUnits; idx++)
458
344k
    {
459
344k
        uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx << chromaShift);
460
344k
        uint32_t bs = blockStrength[partQ];
461
462
344k
        if (bs <= 1)
463
167k
            continue;
464
465
        // Derive neighboring PU index
466
176k
        uint32_t partP;
467
176k
        const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
468
469
176k
        if (bCheckNoFilter)
470
54.1k
        {
471
            // check if each of PUs is lossless coded
472
54.1k
            maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
473
54.1k
            maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
474
54.1k
            if (!(maskP | maskQ))
475
54.1k
                continue;
476
54.1k
        }
477
478
122k
        int32_t qpQ = cuQ->m_qp[partQ];
479
122k
        int32_t qpP = cuP->m_qp[partP];
480
122k
        int32_t qpA = (qpP + qpQ + 1) >> 1;
481
482
122k
        intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
483
368k
        for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
484
245k
        {
485
245k
            int32_t qp = qpA + pps->chromaQpOffset[chromaIdx];
486
245k
            if (qp >= 30)
487
135k
                qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, QP_MAX_SPEC);
488
489
245k
            int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
490
245k
            const int32_t bitdepthShift = X265_DEPTH - 8;
491
245k
            int32_t tc = s_tcTable[indexTC] << bitdepthShift;
492
245k
            pixel* srcC = srcChroma[chromaIdx];
493
494
245k
            primitives.pelFilterChroma[dir](srcC + unitOffset, srcStep, offset, tc, maskP, maskQ);
495
245k
        }
496
122k
    }
497
113k
}
498
499
const uint8_t Deblock::s_tcTable[54] =
500
{
501
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
502
    2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24
503
};
504
505
const uint8_t Deblock::s_betaTable[52] =
506
{
507
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
508
    18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
509
};
510