/src/x265/source/common/deblock.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /***************************************************************************** |
2 | | * Copyright (C) 2013-2020 MulticoreWare, Inc |
3 | | * |
4 | | * Author: Gopu Govindaswamy <gopu@multicorewareinc.com> |
5 | | * Min Chen <chenm003@163.com> |
6 | | * |
7 | | * This program is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 2 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * This program is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
20 | | * |
21 | | * This program is also available under a commercial proprietary license. |
22 | | * For more information, contact us at license @ x265.com. |
23 | | *****************************************************************************/ |
24 | | |
25 | | #include "common.h" |
26 | | #include "deblock.h" |
27 | | #include "framedata.h" |
28 | | #include "picyuv.h" |
29 | | #include "slice.h" |
30 | | #include "mv.h" |
31 | | |
32 | | using namespace X265_NS; |
33 | | |
34 | 0 | #define DEBLOCK_SMALLEST_BLOCK 8 |
35 | 0 | #define DEFAULT_INTRA_TC_OFFSET 2 |
36 | | |
37 | | void Deblock::deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir) |
38 | 0 | { |
39 | 0 | uint8_t blockStrength[MAX_NUM_PARTITIONS]; |
40 | |
|
41 | 0 | memset(blockStrength, 0, sizeof(uint8_t) * cuGeom.numPartitions); |
42 | |
|
43 | 0 | deblockCU(ctu, cuGeom, dir, blockStrength); |
44 | 0 | } |
45 | | |
46 | | static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir) |
47 | 0 | { |
48 | 0 | if (dir == Deblock::EDGE_VER) |
49 | 0 | { |
50 | 0 | if (cu->m_cuPelX + g_zscanToPelX[absPartIdx] > 0) |
51 | 0 | { |
52 | 0 | uint32_t tempPartIdx; |
53 | 0 | const CUData* tempCU = cu->getPULeft(tempPartIdx, absPartIdx); |
54 | 0 | return tempCU ? 2 : 0; |
55 | 0 | } |
56 | 0 | } |
57 | 0 | else |
58 | 0 | { |
59 | 0 | if (cu->m_cuPelY + g_zscanToPelY[absPartIdx] > 0) |
60 | 0 | { |
61 | 0 | uint32_t tempPartIdx; |
62 | 0 | const CUData* tempCU = cu->getPUAbove(tempPartIdx, absPartIdx); |
63 | 0 | return tempCU ? 2 : 0; |
64 | 0 | } |
65 | 0 | } |
66 | | |
67 | 0 | return 0; |
68 | 0 | } |
69 | | |
70 | | /* Deblocking filter process in CU-based (the same function as conventional's) |
71 | | * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */ |
72 | | void Deblock::deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[]) |
73 | 0 | { |
74 | 0 | uint32_t absPartIdx = cuGeom.absPartIdx; |
75 | 0 | uint32_t depth = cuGeom.depth; |
76 | 0 | if (cu->m_predMode[absPartIdx] == MODE_NONE) |
77 | 0 | return; |
78 | | |
79 | 0 | if (cu->m_cuDepth[absPartIdx] > depth) |
80 | 0 | { |
81 | 0 | for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++) |
82 | 0 | { |
83 | 0 | const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx); |
84 | 0 | if (childGeom.flags & CUGeom::PRESENT) |
85 | 0 | deblockCU(cu, childGeom, dir, blockStrength); |
86 | 0 | } |
87 | 0 | return; |
88 | 0 | } |
89 | | |
90 | 0 | uint32_t numUnits = 1 << (cuGeom.log2CUSize - LOG2_UNIT_SIZE); |
91 | 0 | setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits); |
92 | 0 | setEdgefilterTU(cu, absPartIdx, 0, dir, blockStrength); |
93 | 0 | setEdgefilterMultiple(absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits); |
94 | |
|
95 | 0 | uint32_t numParts = cuGeom.numPartitions; |
96 | 0 | for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + numParts; partIdx++) |
97 | 0 | { |
98 | 0 | uint32_t bsCheck = !(partIdx & (1 << dir)); |
99 | |
|
100 | 0 | if (bsCheck && blockStrength[partIdx]) |
101 | 0 | blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength); |
102 | 0 | } |
103 | |
|
104 | 0 | const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE; |
105 | 0 | uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift; |
106 | 0 | uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1; |
107 | 0 | uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE; |
108 | | |
109 | 0 | for (uint32_t e = 0; e < numUnits; e += partIdxIncr) |
110 | 0 | { |
111 | 0 | edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength); |
112 | 0 | if (!((e0 + e) & chromaMask) && cu->m_chromaFormat != X265_CSP_I400) |
113 | 0 | edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength); |
114 | 0 | } |
115 | 0 | } |
116 | | |
117 | | static inline uint32_t calcBsIdx(uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx) |
118 | 0 | { |
119 | 0 | if (dir) |
120 | 0 | return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (edgeIdx << LOG2_RASTER_SIZE) + baseUnitIdx]; |
121 | 0 | else |
122 | 0 | return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (baseUnitIdx << LOG2_RASTER_SIZE) + edgeIdx]; |
123 | 0 | } |
124 | | |
125 | | void Deblock::setEdgefilterMultiple(uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits) |
126 | 0 | { |
127 | 0 | X265_CHECK(numUnits > 0, "numUnits edge filter check\n"); |
128 | 0 | for (uint32_t i = 0; i < numUnits; i++) |
129 | 0 | { |
130 | 0 | const uint32_t bsidx = calcBsIdx(scanIdx, dir, edgeIdx, i); |
131 | 0 | blockStrength[bsidx] = value; |
132 | 0 | } |
133 | 0 | } |
134 | | |
135 | | void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[]) |
136 | 0 | { |
137 | 0 | uint32_t log2TrSize = cu->m_log2CUSize[absPartIdx] - tuDepth; |
138 | 0 | if (cu->m_tuDepth[absPartIdx] > tuDepth) |
139 | 0 | { |
140 | 0 | uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE - 1) * 2; |
141 | 0 | for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts) |
142 | 0 | setEdgefilterTU(cu, absPartIdx, tuDepth + 1, dir, blockStrength); |
143 | 0 | return; |
144 | 0 | } |
145 | | |
146 | 0 | uint32_t numUnits = 1 << (log2TrSize - LOG2_UNIT_SIZE); |
147 | 0 | setEdgefilterMultiple(absPartIdx, dir, 0, 2, blockStrength, numUnits); |
148 | 0 | } |
149 | | |
150 | | void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits) |
151 | 0 | { |
152 | 0 | const uint32_t hNumUnits = numUnits >> 1; |
153 | 0 | const uint32_t qNumUnits = numUnits >> 2; |
154 | |
|
155 | 0 | switch (cu->m_partSize[absPartIdx]) |
156 | 0 | { |
157 | 0 | case SIZE_2NxN: |
158 | 0 | if (EDGE_HOR == dir) |
159 | 0 | setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits); |
160 | 0 | break; |
161 | 0 | case SIZE_Nx2N: |
162 | 0 | if (EDGE_VER == dir) |
163 | 0 | setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits); |
164 | 0 | break; |
165 | 0 | case SIZE_NxN: |
166 | 0 | setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits); |
167 | 0 | break; |
168 | 0 | case SIZE_2NxnU: |
169 | 0 | if (EDGE_HOR == dir) |
170 | 0 | setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits); |
171 | 0 | break; |
172 | 0 | case SIZE_nLx2N: |
173 | 0 | if (EDGE_VER == dir) |
174 | 0 | setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits); |
175 | 0 | break; |
176 | 0 | case SIZE_2NxnD: |
177 | 0 | if (EDGE_HOR == dir) |
178 | 0 | setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits); |
179 | 0 | break; |
180 | 0 | case SIZE_nRx2N: |
181 | 0 | if (EDGE_VER == dir) |
182 | 0 | setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits); |
183 | 0 | break; |
184 | | |
185 | 0 | case SIZE_2Nx2N: |
186 | 0 | default: |
187 | 0 | break; |
188 | 0 | } |
189 | 0 | } |
190 | | |
191 | | uint8_t Deblock::getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[]) |
192 | 0 | { |
193 | | // Calculate block index |
194 | 0 | uint32_t partP; |
195 | 0 | const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ)); |
196 | | |
197 | | // Set BS for Intra MB : BS = 2 |
198 | 0 | if (cuP->isIntra(partP) || cuQ->isIntra(partQ)) |
199 | 0 | return 2; |
200 | | |
201 | | // Set BS for not Intra MB : BS = 1 or 0 |
202 | 0 | if (blockStrength[partQ] > 1 && |
203 | 0 | (cuQ->getCbf(partQ, TEXT_LUMA, cuQ->m_tuDepth[partQ]) || |
204 | 0 | cuP->getCbf(partP, TEXT_LUMA, cuP->m_tuDepth[partP]))) |
205 | 0 | return 1; |
206 | | |
207 | 0 | static const MV zeroMv(0, 0); |
208 | 0 | const Slice* const sliceQ = cuQ->m_slice; |
209 | 0 | const Slice* const sliceP = cuP->m_slice; |
210 | 0 | const Frame* refP0 = (cuP->m_refIdx[0][partP] >= 0) ? sliceP->m_refFrameList[0][cuP->m_refIdx[0][partP]] : NULL; |
211 | 0 | const Frame* refQ0 = (cuQ->m_refIdx[0][partQ] >= 0) ? sliceQ->m_refFrameList[0][cuQ->m_refIdx[0][partQ]] : NULL; |
212 | 0 | const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv; |
213 | 0 | const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv; |
214 | 0 | if (sliceQ->isInterP() && sliceP->isInterP()) |
215 | 0 | { |
216 | 0 | return ((refP0 != refQ0) || |
217 | 0 | (abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4)) ? 1 : 0; |
218 | 0 | } |
219 | | // (sliceQ->isInterB() || sliceP->isInterB()) |
220 | 0 | const Frame* refP1 = (cuP->m_refIdx[1][partP] >= 0) ? sliceP->m_refFrameList[1][cuP->m_refIdx[1][partP]] : NULL; |
221 | 0 | const Frame* refQ1 = (cuQ->m_refIdx[1][partQ] >= 0) ? sliceQ->m_refFrameList[1][cuQ->m_refIdx[1][partQ]] : NULL; |
222 | 0 | const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv; |
223 | 0 | const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv; |
224 | |
|
225 | 0 | if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0))) |
226 | 0 | { |
227 | 0 | if (refP0 != refP1) // Different L0 & L1 |
228 | 0 | { |
229 | 0 | if (refP0 == refQ0) |
230 | 0 | return ((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) || |
231 | 0 | (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) ? 1 : 0; |
232 | 0 | else |
233 | 0 | return ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) || |
234 | 0 | (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4)) ? 1 : 0; |
235 | 0 | } |
236 | 0 | else // Same L0 & L1 |
237 | 0 | { |
238 | 0 | return (((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) || |
239 | 0 | (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) && |
240 | 0 | ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) || |
241 | 0 | (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4))) ? 1 : 0; |
242 | 0 | } |
243 | 0 | } |
244 | | |
245 | | // for all different Ref_Idx |
246 | 0 | return 1; |
247 | 0 | } |
248 | | |
249 | | static inline int32_t calcDP(pixel* src, intptr_t offset) |
250 | 0 | { |
251 | 0 | return abs(static_cast<int32_t>(src[-offset * 3]) - 2 * src[-offset * 2] + src[-offset]); |
252 | 0 | } |
253 | | |
254 | | static inline int32_t calcDQ(pixel* src, intptr_t offset) |
255 | 0 | { |
256 | 0 | return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]); |
257 | 0 | } |
258 | | |
259 | | static inline bool useStrongFiltering(intptr_t offset, int32_t beta, int32_t tc, pixel* src) |
260 | 0 | { |
261 | 0 | int16_t m4 = (int16_t)src[0]; |
262 | 0 | int16_t m3 = (int16_t)src[-offset]; |
263 | 0 | int16_t m7 = (int16_t)src[offset * 3]; |
264 | 0 | int16_t m0 = (int16_t)src[-offset * 4]; |
265 | 0 | int32_t strong = abs(m0 - m3) + abs(m7 - m4); |
266 | |
|
267 | 0 | return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1)); |
268 | 0 | } |
269 | | |
270 | | /* Deblocking for the luminance component with strong or weak filter |
271 | | * \param src pointer to picture data |
272 | | * \param offset offset value for picture data |
273 | | * \param tc tc value |
274 | | * \param maskP indicator to enable filtering on partP |
275 | | * \param maskQ indicator to enable filtering on partQ |
276 | | * \param maskP1 decision weak filter/no filter for partP |
277 | | * \param maskQ1 decision weak filter/no filter for partQ */ |
278 | | static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ, |
279 | | int32_t maskP1, int32_t maskQ1) |
280 | 0 | { |
281 | 0 | int32_t thrCut = tc * 10; |
282 | 0 | int32_t tc2 = tc >> 1; |
283 | 0 | maskP1 &= maskP; |
284 | 0 | maskQ1 &= maskQ; |
285 | |
|
286 | 0 | for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) |
287 | 0 | { |
288 | 0 | int16_t m4 = (int16_t)src[0]; |
289 | 0 | int16_t m3 = (int16_t)src[-offset]; |
290 | 0 | int16_t m5 = (int16_t)src[offset]; |
291 | 0 | int16_t m2 = (int16_t)src[-offset * 2]; |
292 | |
|
293 | 0 | int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4; |
294 | |
|
295 | 0 | if (abs(delta) < thrCut) |
296 | 0 | { |
297 | 0 | delta = x265_clip3(-tc, tc, delta); |
298 | |
|
299 | 0 | src[-offset] = x265_clip(m3 + (delta & maskP)); |
300 | 0 | src[0] = x265_clip(m4 - (delta & maskQ)); |
301 | 0 | if (maskP1) |
302 | 0 | { |
303 | 0 | int16_t m1 = (int16_t)src[-offset * 3]; |
304 | 0 | int32_t delta1 = x265_clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1)); |
305 | 0 | src[-offset * 2] = x265_clip(m2 + delta1); |
306 | 0 | } |
307 | 0 | if (maskQ1) |
308 | 0 | { |
309 | 0 | int16_t m6 = (int16_t)src[offset * 2]; |
310 | 0 | int32_t delta2 = x265_clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1)); |
311 | 0 | src[offset] = x265_clip(m5 + delta2); |
312 | 0 | } |
313 | 0 | } |
314 | 0 | } |
315 | 0 | } |
316 | | |
317 | | void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[]) |
318 | 0 | { |
319 | 0 | PicYuv* reconPic = cuQ->m_encData->m_reconPic; |
320 | 0 | pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx); |
321 | 0 | intptr_t stride = reconPic->m_stride; |
322 | 0 | const PPS* pps = cuQ->m_slice->m_pps; |
323 | |
|
324 | 0 | intptr_t offset, srcStep; |
325 | |
|
326 | 0 | int32_t maskP = -1; |
327 | 0 | int32_t maskQ = -1; |
328 | 0 | int32_t betaOffset = pps->deblockingFilterBetaOffsetDiv2 << 1; |
329 | 0 | int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1; |
330 | 0 | bool bCheckNoFilter = pps->bTransquantBypassEnabled; |
331 | |
|
332 | 0 | if (dir == EDGE_VER) |
333 | 0 | { |
334 | 0 | offset = 1; |
335 | 0 | srcStep = stride; |
336 | 0 | src += (edge << LOG2_UNIT_SIZE); |
337 | 0 | } |
338 | 0 | else // (dir == EDGE_HOR) |
339 | 0 | { |
340 | 0 | offset = stride; |
341 | 0 | srcStep = 1; |
342 | 0 | src += (edge << LOG2_UNIT_SIZE) * stride; |
343 | 0 | } |
344 | |
|
345 | 0 | uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth; |
346 | 0 | for (uint32_t idx = 0; idx < numUnits; idx++) |
347 | 0 | { |
348 | 0 | uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx); |
349 | 0 | uint32_t bs = blockStrength[partQ]; |
350 | |
|
351 | 0 | if (!bs) |
352 | 0 | continue; |
353 | | |
354 | | // Derive neighboring PU index |
355 | 0 | uint32_t partP; |
356 | 0 | const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ)); |
357 | |
|
358 | 0 | if (bCheckNoFilter) |
359 | 0 | { |
360 | | // check if each of PUs is lossless coded |
361 | 0 | maskP = cuP->m_tqBypass[partP] - 1; |
362 | 0 | maskQ = cuQ->m_tqBypass[partQ] - 1; |
363 | 0 | if (!(maskP | maskQ)) |
364 | 0 | continue; |
365 | 0 | } |
366 | | |
367 | 0 | int32_t qpQ = cuQ->m_qp[partQ]; |
368 | 0 | int32_t qpP = cuP->m_qp[partP]; |
369 | 0 | int32_t qp = (qpP + qpQ + 1) >> 1; |
370 | |
|
371 | 0 | int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset); |
372 | |
|
373 | 0 | const int32_t bitdepthShift = X265_DEPTH - 8; |
374 | 0 | int32_t beta = s_betaTable[indexB] << bitdepthShift; |
375 | |
|
376 | 0 | intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE; |
377 | 0 | int32_t dp0 = calcDP(src + unitOffset , offset); |
378 | 0 | int32_t dq0 = calcDQ(src + unitOffset , offset); |
379 | 0 | int32_t dp3 = calcDP(src + unitOffset + srcStep * 3, offset); |
380 | 0 | int32_t dq3 = calcDQ(src + unitOffset + srcStep * 3, offset); |
381 | 0 | int32_t d0 = dp0 + dq0; |
382 | 0 | int32_t d3 = dp3 + dq3; |
383 | |
|
384 | 0 | int32_t d = d0 + d3; |
385 | |
|
386 | 0 | if (d >= beta) |
387 | 0 | continue; |
388 | | |
389 | 0 | int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset)); |
390 | 0 | int32_t tc = s_tcTable[indexTC] << bitdepthShift; |
391 | |
|
392 | 0 | bool sw = (2 * d0 < (beta >> 2) && |
393 | 0 | 2 * d3 < (beta >> 2) && |
394 | 0 | useStrongFiltering(offset, beta, tc, src + unitOffset ) && |
395 | 0 | useStrongFiltering(offset, beta, tc, src + unitOffset + srcStep * 3)); |
396 | |
|
397 | 0 | if (sw) |
398 | 0 | { |
399 | 0 | int32_t tc2 = 2 * tc; |
400 | 0 | int32_t tcP = (tc2 & maskP); |
401 | 0 | int32_t tcQ = (tc2 & maskQ); |
402 | 0 | primitives.pelFilterLumaStrong[dir](src + unitOffset, srcStep, offset, tcP, tcQ); |
403 | 0 | } |
404 | 0 | else |
405 | 0 | { |
406 | 0 | int32_t sideThreshold = (beta + (beta >> 1)) >> 3; |
407 | 0 | int32_t dp = dp0 + dp3; |
408 | 0 | int32_t dq = dq0 + dq3; |
409 | 0 | int32_t maskP1 = (dp < sideThreshold ? -1 : 0); |
410 | 0 | int32_t maskQ1 = (dq < sideThreshold ? -1 : 0); |
411 | |
|
412 | 0 | pelFilterLuma(src + unitOffset, srcStep, offset, tc, maskP, maskQ, maskP1, maskQ1); |
413 | 0 | } |
414 | 0 | } |
415 | 0 | } |
416 | | |
417 | | void Deblock::edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[]) |
418 | 0 | { |
419 | 0 | int32_t chFmt = cuQ->m_chromaFormat, chromaShift; |
420 | 0 | intptr_t offset, srcStep; |
421 | 0 | const PPS* pps = cuQ->m_slice->m_pps; |
422 | |
|
423 | 0 | int32_t maskP = -1; |
424 | 0 | int32_t maskQ = -1; |
425 | 0 | int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1; |
426 | |
|
427 | 0 | X265_CHECK(((dir == EDGE_VER) |
428 | 0 | ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_hChromaShift) |
429 | 0 | : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0, |
430 | 0 | "invalid edge\n"); |
431 | |
|
432 | 0 | PicYuv* reconPic = cuQ->m_encData->m_reconPic; |
433 | 0 | intptr_t stride = reconPic->m_strideC; |
434 | 0 | intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx); |
435 | 0 | bool bCheckNoFilter = pps->bTransquantBypassEnabled; |
436 | |
|
437 | 0 | if (dir == EDGE_VER) |
438 | 0 | { |
439 | 0 | chromaShift = cuQ->m_vChromaShift; |
440 | 0 | srcOffset += (edge << (LOG2_UNIT_SIZE - cuQ->m_hChromaShift)); |
441 | 0 | offset = 1; |
442 | 0 | srcStep = stride; |
443 | 0 | } |
444 | 0 | else // (dir == EDGE_HOR) |
445 | 0 | { |
446 | 0 | chromaShift = cuQ->m_hChromaShift; |
447 | 0 | srcOffset += edge * stride << (LOG2_UNIT_SIZE - cuQ->m_vChromaShift); |
448 | 0 | offset = stride; |
449 | 0 | srcStep = 1; |
450 | 0 | } |
451 | |
|
452 | 0 | pixel* srcChroma[2]; |
453 | 0 | srcChroma[0] = reconPic->m_picOrg[1] + srcOffset; |
454 | 0 | srcChroma[1] = reconPic->m_picOrg[2] + srcOffset; |
455 | |
|
456 | 0 | uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift); |
457 | 0 | for (uint32_t idx = 0; idx < numUnits; idx++) |
458 | 0 | { |
459 | 0 | uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx << chromaShift); |
460 | 0 | uint32_t bs = blockStrength[partQ]; |
461 | |
|
462 | 0 | if (bs <= 1) |
463 | 0 | continue; |
464 | | |
465 | | // Derive neighboring PU index |
466 | 0 | uint32_t partP; |
467 | 0 | const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ)); |
468 | |
|
469 | 0 | if (bCheckNoFilter) |
470 | 0 | { |
471 | | // check if each of PUs is lossless coded |
472 | 0 | maskP = (cuP->m_tqBypass[partP] ? 0 : -1); |
473 | 0 | maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1); |
474 | 0 | if (!(maskP | maskQ)) |
475 | 0 | continue; |
476 | 0 | } |
477 | | |
478 | 0 | int32_t qpQ = cuQ->m_qp[partQ]; |
479 | 0 | int32_t qpP = cuP->m_qp[partP]; |
480 | 0 | int32_t qpA = (qpP + qpQ + 1) >> 1; |
481 | |
|
482 | 0 | intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE; |
483 | 0 | for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++) |
484 | 0 | { |
485 | 0 | int32_t qp = qpA + pps->chromaQpOffset[chromaIdx]; |
486 | 0 | if (qp >= 30) |
487 | 0 | qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, QP_MAX_SPEC); |
488 | |
|
489 | 0 | int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset)); |
490 | 0 | const int32_t bitdepthShift = X265_DEPTH - 8; |
491 | 0 | int32_t tc = s_tcTable[indexTC] << bitdepthShift; |
492 | 0 | pixel* srcC = srcChroma[chromaIdx]; |
493 | |
|
494 | 0 | primitives.pelFilterChroma[dir](srcC + unitOffset, srcStep, offset, tc, maskP, maskQ); |
495 | 0 | } |
496 | 0 | } |
497 | 0 | } |
498 | | |
499 | | const uint8_t Deblock::s_tcTable[54] = |
500 | | { |
501 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, |
502 | | 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24 |
503 | | }; |
504 | | |
505 | | const uint8_t Deblock::s_betaTable[52] = |
506 | | { |
507 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, |
508 | | 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64 |
509 | | }; |
510 | | |