/src/x265/source/common/predict.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /***************************************************************************** |
2 | | * Copyright (C) 2013-2020 MulticoreWare, Inc |
3 | | * |
4 | | * Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com> |
5 | | * Min Chen <chenm003@163.com> |
6 | | * |
7 | | * This program is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 2 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * This program is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
20 | | * |
21 | | * This program is also available under a commercial proprietary license. |
22 | | * For more information, contact us at license @ x265.com. |
23 | | *****************************************************************************/ |
24 | | |
25 | | #include "common.h" |
26 | | #include "slice.h" |
27 | | #include "framedata.h" |
28 | | #include "picyuv.h" |
29 | | #include "predict.h" |
30 | | #include "primitives.h" |
31 | | |
32 | | using namespace X265_NS; |
33 | | |
34 | | #if _MSC_VER |
35 | | #pragma warning(disable: 4127) // conditional expression is constant |
36 | | #endif |
37 | | |
38 | | PredictionUnit::PredictionUnit(const CUData& cu, const CUGeom& cuGeom, int puIdx) |
39 | 0 | { |
40 | | /* address of CTU */ |
41 | 0 | ctuAddr = cu.m_cuAddr; |
42 | | |
43 | | /* offset of CU */ |
44 | 0 | cuAbsPartIdx = cuGeom.absPartIdx; |
45 | | |
46 | | /* offset and dimensions of PU */ |
47 | 0 | cu.getPartIndexAndSize(puIdx, puAbsPartIdx, width, height); |
48 | 0 | } |
49 | | |
50 | | namespace |
51 | | { |
52 | | inline pixel weightBidir(int w0, int16_t P0, int w1, int16_t P1, int round, int shift, int offset) |
53 | 0 | { |
54 | 0 | return x265_clip((w0 * (P0 + IF_INTERNAL_OFFS) + w1 * (P1 + IF_INTERNAL_OFFS) + round + (offset * (1 << (shift - 1)))) >> shift); |
55 | 0 | } |
56 | | } |
57 | | |
58 | | Predict::Predict() |
59 | 0 | { |
60 | 0 | } |
61 | | |
62 | | Predict::~Predict() |
63 | 0 | { |
64 | 0 | m_predShortYuv[0].destroy(); |
65 | 0 | m_predShortYuv[1].destroy(); |
66 | 0 | } |
67 | | |
68 | | bool Predict::allocBuffers(int csp) |
69 | 0 | { |
70 | 0 | m_csp = csp; |
71 | 0 | m_hChromaShift = CHROMA_H_SHIFT(csp); |
72 | 0 | m_vChromaShift = CHROMA_V_SHIFT(csp); |
73 | |
|
74 | 0 | return m_predShortYuv[0].create(MAX_CU_SIZE, csp) && m_predShortYuv[1].create(MAX_CU_SIZE, csp); |
75 | 0 | } |
76 | | |
77 | | void Predict::motionCompensation(const CUData& cu, const PredictionUnit& pu, Yuv& predYuv, bool bLuma, bool bChroma) |
78 | 0 | { |
79 | 0 | int refIdx0 = cu.m_refIdx[0][pu.puAbsPartIdx]; |
80 | 0 | int refIdx1 = cu.m_refIdx[1][pu.puAbsPartIdx]; |
81 | |
|
82 | 0 | if (cu.m_slice->isInterP()) |
83 | 0 | { |
84 | | /* P Slice */ |
85 | 0 | WeightValues wv0[3]; |
86 | |
|
87 | 0 | X265_CHECK(refIdx0 >= 0, "invalid P refidx\n"); |
88 | 0 | X265_CHECK(refIdx0 < cu.m_slice->m_numRefIdx[0], "P refidx out of range\n"); |
89 | 0 | const WeightParam *wp0 = cu.m_slice->m_weightPredTable[0][refIdx0]; |
90 | |
|
91 | 0 | MV mv0 = cu.m_mv[0][pu.puAbsPartIdx]; |
92 | 0 | cu.clipMv(mv0); |
93 | |
|
94 | 0 | if (cu.m_slice->m_pps->bUseWeightPred && wp0->wtPresent) |
95 | 0 | { |
96 | 0 | for (int plane = 0; plane < (bChroma ? 3 : 1); plane++) |
97 | 0 | { |
98 | 0 | wv0[plane].w = wp0[plane].inputWeight; |
99 | 0 | wv0[plane].offset = wp0[plane].inputOffset * (1 << (X265_DEPTH - 8)); |
100 | 0 | wv0[plane].shift = wp0[plane].log2WeightDenom; |
101 | 0 | wv0[plane].round = wp0[plane].log2WeightDenom >= 1 ? 1 << (wp0[plane].log2WeightDenom - 1) : 0; |
102 | 0 | } |
103 | |
|
104 | 0 | ShortYuv& shortYuv = m_predShortYuv[0]; |
105 | |
|
106 | 0 | if (bLuma) |
107 | 0 | predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
108 | 0 | if (bChroma) |
109 | 0 | predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
110 | |
|
111 | 0 | addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma); |
112 | 0 | } |
113 | 0 | else |
114 | 0 | { |
115 | | #if ENABLE_SCC_EXT |
116 | | if (cu.m_slice->m_param->bEnableSCC && refIdx0 == (cu.m_slice->m_numRefIdx[0] - 1)) |
117 | | { |
118 | | if (bLuma) |
119 | | predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refFrameList[0][refIdx0]->m_reconPic[1], mv0); |
120 | | if (bChroma) |
121 | | predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refFrameList[0][refIdx0]->m_reconPic[1], mv0); |
122 | | } |
123 | | else |
124 | | #endif |
125 | 0 | { |
126 | 0 | if (bLuma) |
127 | 0 | predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
128 | 0 | if (bChroma) |
129 | 0 | predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
130 | 0 | } |
131 | 0 | } |
132 | 0 | } |
133 | 0 | else |
134 | 0 | { |
135 | | /* B Slice */ |
136 | |
|
137 | 0 | WeightValues wv0[3], wv1[3]; |
138 | 0 | const WeightParam *pwp0, *pwp1; |
139 | |
|
140 | 0 | X265_CHECK(refIdx0 < cu.m_slice->m_numRefIdx[0], "bidir refidx0 out of range\n"); |
141 | 0 | X265_CHECK(refIdx1 < cu.m_slice->m_numRefIdx[1], "bidir refidx1 out of range\n"); |
142 | |
|
143 | 0 | if (cu.m_slice->m_pps->bUseWeightedBiPred) |
144 | 0 | { |
145 | 0 | pwp0 = refIdx0 >= 0 ? cu.m_slice->m_weightPredTable[0][refIdx0] : NULL; |
146 | 0 | pwp1 = refIdx1 >= 0 ? cu.m_slice->m_weightPredTable[1][refIdx1] : NULL; |
147 | |
|
148 | 0 | if (pwp0 && pwp1 && (pwp0->wtPresent || pwp1->wtPresent)) |
149 | 0 | { |
150 | | /* biprediction weighting */ |
151 | 0 | for (int plane = 0; plane < (bChroma ? 3 : 1); plane++) |
152 | 0 | { |
153 | 0 | wv0[plane].w = pwp0[plane].inputWeight; |
154 | 0 | wv0[plane].o = pwp0[plane].inputOffset * (1 << (X265_DEPTH - 8)); |
155 | 0 | wv0[plane].shift = pwp0[plane].log2WeightDenom; |
156 | 0 | wv0[plane].round = 1 << pwp0[plane].log2WeightDenom; |
157 | |
|
158 | 0 | wv1[plane].w = pwp1[plane].inputWeight; |
159 | 0 | wv1[plane].o = pwp1[plane].inputOffset * (1 << (X265_DEPTH - 8)); |
160 | 0 | wv1[plane].shift = wv0[plane].shift; |
161 | 0 | wv1[plane].round = wv0[plane].round; |
162 | 0 | } |
163 | 0 | } |
164 | 0 | else |
165 | 0 | { |
166 | | /* uniprediction weighting, always outputs to wv0 */ |
167 | 0 | const WeightParam* pwp = (refIdx0 >= 0) ? pwp0 : pwp1; |
168 | 0 | for (int plane = 0; plane < (bChroma ? 3 : 1); plane++) |
169 | 0 | { |
170 | 0 | wv0[plane].w = pwp[plane].inputWeight; |
171 | 0 | wv0[plane].offset = pwp[plane].inputOffset * (1 << (X265_DEPTH - 8)); |
172 | 0 | wv0[plane].shift = pwp[plane].log2WeightDenom; |
173 | 0 | wv0[plane].round = pwp[plane].log2WeightDenom >= 1 ? 1 << (pwp[plane].log2WeightDenom - 1) : 0; |
174 | 0 | } |
175 | 0 | } |
176 | 0 | } |
177 | 0 | else |
178 | 0 | pwp0 = pwp1 = NULL; |
179 | |
|
180 | 0 | if (refIdx0 >= 0 && refIdx1 >= 0) |
181 | 0 | { |
182 | 0 | MV mv0 = cu.m_mv[0][pu.puAbsPartIdx]; |
183 | 0 | MV mv1 = cu.m_mv[1][pu.puAbsPartIdx]; |
184 | 0 | cu.clipMv(mv0); |
185 | 0 | cu.clipMv(mv1); |
186 | |
|
187 | 0 | if (bLuma) |
188 | 0 | { |
189 | | #if ENABLE_SCC_EXT |
190 | | if (cu.m_slice->m_param->bEnableSCC && refIdx0 == (cu.m_slice->m_numRefIdx[0] - 1)) |
191 | | predInterLumaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refFrameList[0][refIdx0]->m_reconPic[1], mv0); |
192 | | else |
193 | | #endif |
194 | 0 | predInterLumaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
195 | 0 | predInterLumaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refReconPicList[1][refIdx1], mv1); |
196 | 0 | } |
197 | 0 | if (bChroma) |
198 | 0 | { |
199 | | #if ENABLE_SCC_EXT |
200 | | if (cu.m_slice->m_param->bEnableSCC && refIdx0 == (cu.m_slice->m_numRefIdx[0] - 1)) |
201 | | predInterChromaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refFrameList[0][refIdx0]->m_reconPic[1], mv0); |
202 | | else |
203 | | #endif |
204 | 0 | predInterChromaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
205 | 0 | predInterChromaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refReconPicList[1][refIdx1], mv1); |
206 | 0 | } |
207 | |
|
208 | 0 | if (pwp0 && pwp1 && (pwp0->wtPresent || pwp1->wtPresent)) |
209 | 0 | addWeightBi(pu, predYuv, m_predShortYuv[0], m_predShortYuv[1], wv0, wv1, bLuma, bChroma); |
210 | 0 | else |
211 | 0 | predYuv.addAvg(m_predShortYuv[0], m_predShortYuv[1], pu.puAbsPartIdx, pu.width, pu.height, bLuma, bChroma); |
212 | 0 | } |
213 | 0 | else if (refIdx0 >= 0) |
214 | 0 | { |
215 | 0 | MV mv0 = cu.m_mv[0][pu.puAbsPartIdx]; |
216 | 0 | cu.clipMv(mv0); |
217 | |
|
218 | 0 | if (pwp0 && pwp0->wtPresent) |
219 | 0 | { |
220 | 0 | ShortYuv& shortYuv = m_predShortYuv[0]; |
221 | |
|
222 | 0 | if (bLuma) |
223 | 0 | predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
224 | 0 | if (bChroma) |
225 | 0 | predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
226 | |
|
227 | 0 | addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma); |
228 | 0 | } |
229 | 0 | else |
230 | 0 | { |
231 | | #if ENABLE_SCC_EXT |
232 | | if (cu.m_slice->m_param->bEnableSCC && refIdx0 == (cu.m_slice->m_numRefIdx[0] - 1)) |
233 | | { |
234 | | if (bLuma) |
235 | | predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refFrameList[0][refIdx0]->m_reconPic[1], mv0); |
236 | | if (bChroma) |
237 | | predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refFrameList[0][refIdx0]->m_reconPic[1], mv0); |
238 | | } |
239 | | else |
240 | | #endif |
241 | 0 | { |
242 | 0 | if (bLuma) |
243 | 0 | predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
244 | 0 | if (bChroma) |
245 | 0 | predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
246 | 0 | } |
247 | 0 | } |
248 | 0 | } |
249 | 0 | else |
250 | 0 | { |
251 | 0 | MV mv1 = cu.m_mv[1][pu.puAbsPartIdx]; |
252 | 0 | cu.clipMv(mv1); |
253 | | |
254 | | /* uniprediction to L1 */ |
255 | 0 | X265_CHECK(refIdx1 >= 0, "refidx1 was not positive\n"); |
256 | |
|
257 | 0 | if (pwp1 && pwp1->wtPresent) |
258 | 0 | { |
259 | 0 | ShortYuv& shortYuv = m_predShortYuv[0]; |
260 | |
|
261 | 0 | if (bLuma) |
262 | 0 | predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1); |
263 | 0 | if (bChroma) |
264 | 0 | predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1); |
265 | |
|
266 | 0 | addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma); |
267 | 0 | } |
268 | 0 | else |
269 | 0 | { |
270 | 0 | if (bLuma) |
271 | 0 | predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1); |
272 | 0 | if (bChroma) |
273 | 0 | predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1); |
274 | 0 | } |
275 | 0 | } |
276 | 0 | } |
277 | 0 | } |
278 | | |
279 | | void Predict::predInterLumaPixel(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const |
280 | 0 | { |
281 | 0 | pixel* dst = dstYuv.getLumaAddr(pu.puAbsPartIdx); |
282 | 0 | intptr_t dstStride = dstYuv.m_size; |
283 | |
|
284 | 0 | intptr_t srcStride = refPic.m_stride; |
285 | 0 | intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride; |
286 | 0 | int partEnum = partitionFromSizes(pu.width, pu.height); |
287 | 0 | const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset; |
288 | |
|
289 | 0 | int xFrac = mv.x & 3; |
290 | 0 | int yFrac = mv.y & 3; |
291 | |
|
292 | 0 | if (!(yFrac | xFrac)) |
293 | 0 | primitives.pu[partEnum].copy_pp(dst, dstStride, src, srcStride); |
294 | 0 | else if (!yFrac) |
295 | 0 | primitives.pu[partEnum].luma_hpp(src, srcStride, dst, dstStride, xFrac); |
296 | 0 | else if (!xFrac) |
297 | 0 | primitives.pu[partEnum].luma_vpp(src, srcStride, dst, dstStride, yFrac); |
298 | 0 | else |
299 | 0 | primitives.pu[partEnum].luma_hvpp(src, srcStride, dst, dstStride, xFrac, yFrac); |
300 | 0 | } |
301 | | |
302 | | void Predict::predInterLumaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const |
303 | 0 | { |
304 | 0 | int16_t* dst = dstSYuv.getLumaAddr(pu.puAbsPartIdx); |
305 | 0 | intptr_t dstStride = dstSYuv.m_size; |
306 | |
|
307 | 0 | intptr_t srcStride = refPic.m_stride; |
308 | 0 | intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride; |
309 | 0 | const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset; |
310 | |
|
311 | 0 | int partEnum = partitionFromSizes(pu.width, pu.height); |
312 | |
|
313 | 0 | X265_CHECK((pu.width % 4) + (pu.height % 4) == 0, "width or height not divisible by 4\n"); |
314 | 0 | X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n"); |
315 | |
|
316 | 0 | int xFrac = mv.x & 3; |
317 | 0 | int yFrac = mv.y & 3; |
318 | |
|
319 | 0 | if (!(yFrac | xFrac)) |
320 | 0 | { |
321 | 0 | bool srcbufferAlignCheck = (refPic.m_cuOffsetY[pu.ctuAddr] + refPic.m_buOffsetY[pu.cuAbsPartIdx + pu.puAbsPartIdx] + srcOffset) % 64 == 0; |
322 | 0 | bool dstbufferAlignCheck = (dstSYuv.getAddrOffset(pu.puAbsPartIdx, dstSYuv.m_size) % 64) == 0; |
323 | 0 | primitives.pu[partEnum].convert_p2s[srcStride % 64 == 0 && dstStride % 64 == 0 && srcbufferAlignCheck && dstbufferAlignCheck](src, srcStride, dst, dstStride); |
324 | 0 | } |
325 | 0 | else if (!yFrac) |
326 | 0 | primitives.pu[partEnum].luma_hps(src, srcStride, dst, dstStride, xFrac, 0); |
327 | 0 | else if (!xFrac) |
328 | 0 | primitives.pu[partEnum].luma_vps(src, srcStride, dst, dstStride, yFrac); |
329 | 0 | else |
330 | 0 | { |
331 | 0 | ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]); |
332 | 0 | int immedStride = pu.width; |
333 | 0 | int halfFilterSize = NTAPS_LUMA >> 1; |
334 | |
|
335 | 0 | primitives.pu[partEnum].luma_hps(src, srcStride, immed, immedStride, xFrac, 1); |
336 | 0 | primitives.pu[partEnum].luma_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, yFrac); |
337 | 0 | } |
338 | 0 | } |
339 | | |
340 | | void Predict::predInterChromaPixel(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const |
341 | 0 | { |
342 | 0 | intptr_t dstStride = dstYuv.m_csize; |
343 | 0 | intptr_t refStride = refPic.m_strideC; |
344 | |
|
345 | 0 | int mvx = mv.x << (1 - m_hChromaShift); |
346 | 0 | int mvy = mv.y << (1 - m_vChromaShift); |
347 | |
|
348 | 0 | intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride; |
349 | |
|
350 | 0 | const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset; |
351 | 0 | const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset; |
352 | |
|
353 | 0 | pixel* dstCb = dstYuv.getCbAddr(pu.puAbsPartIdx); |
354 | 0 | pixel* dstCr = dstYuv.getCrAddr(pu.puAbsPartIdx); |
355 | |
|
356 | 0 | int partEnum = partitionFromSizes(pu.width, pu.height); |
357 | |
|
358 | 0 | int xFrac = mvx & 7; |
359 | 0 | int yFrac = mvy & 7; |
360 | |
|
361 | 0 | if (!(yFrac | xFrac)) |
362 | 0 | { |
363 | 0 | primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCb, dstStride, refCb, refStride); |
364 | 0 | primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCr, dstStride, refCr, refStride); |
365 | 0 | } |
366 | 0 | else if (!yFrac) |
367 | 0 | { |
368 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCb, refStride, dstCb, dstStride, xFrac); |
369 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCr, refStride, dstCr, dstStride, xFrac); |
370 | 0 | } |
371 | 0 | else if (!xFrac) |
372 | 0 | { |
373 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCb, refStride, dstCb, dstStride, yFrac); |
374 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCr, refStride, dstCr, dstStride, yFrac); |
375 | 0 | } |
376 | 0 | else |
377 | 0 | { |
378 | 0 | ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_CHROMA - 1)]); |
379 | 0 | int immedStride = pu.width >> m_hChromaShift; |
380 | 0 | int halfFilterSize = NTAPS_CHROMA >> 1; |
381 | |
|
382 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, immed, immedStride, xFrac, 1); |
383 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCb, dstStride, yFrac); |
384 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, immed, immedStride, xFrac, 1); |
385 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCr, dstStride, yFrac); |
386 | 0 | } |
387 | 0 | } |
388 | | |
389 | | void Predict::predInterChromaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const |
390 | 0 | { |
391 | 0 | intptr_t dstStride = dstSYuv.m_csize; |
392 | 0 | intptr_t refStride = refPic.m_strideC; |
393 | |
|
394 | 0 | int mvx = mv.x << (1 - m_hChromaShift); |
395 | 0 | int mvy = mv.y << (1 - m_vChromaShift); |
396 | |
|
397 | 0 | intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride; |
398 | |
|
399 | 0 | const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset; |
400 | 0 | const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset; |
401 | |
|
402 | 0 | int16_t* dstCb = dstSYuv.getCbAddr(pu.puAbsPartIdx); |
403 | 0 | int16_t* dstCr = dstSYuv.getCrAddr(pu.puAbsPartIdx); |
404 | |
|
405 | 0 | int partEnum = partitionFromSizes(pu.width, pu.height); |
406 | | |
407 | 0 | uint32_t cxWidth = pu.width >> m_hChromaShift; |
408 | |
|
409 | 0 | X265_CHECK(((cxWidth | (pu.height >> m_vChromaShift)) % 2) == 0, "chroma block size expected to be multiple of 2\n"); |
410 | |
|
411 | 0 | int xFrac = mvx & 7; |
412 | 0 | int yFrac = mvy & 7; |
413 | |
|
414 | 0 | if (!(yFrac | xFrac)) |
415 | 0 | { |
416 | 0 | bool srcbufferAlignCheckC = (refPic.m_cuOffsetC[pu.ctuAddr] + refPic.m_buOffsetC[pu.cuAbsPartIdx + pu.puAbsPartIdx] + refOffset) % 64 == 0; |
417 | 0 | bool dstbufferAlignCheckC = dstSYuv.getChromaAddrOffset(pu.puAbsPartIdx) % 64 == 0; |
418 | 0 | primitives.chroma[m_csp].pu[partEnum].p2s[refStride % 64 == 0 && dstStride % 64 == 0 && srcbufferAlignCheckC && dstbufferAlignCheckC](refCb, refStride, dstCb, dstStride); |
419 | 0 | primitives.chroma[m_csp].pu[partEnum].p2s[refStride % 64 == 0 && dstStride % 64 == 0 && srcbufferAlignCheckC && dstbufferAlignCheckC](refCr, refStride, dstCr, dstStride); |
420 | 0 | } |
421 | 0 | else if (!yFrac) |
422 | 0 | { |
423 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, dstCb, dstStride, xFrac, 0); |
424 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, dstCr, dstStride, xFrac, 0); |
425 | 0 | } |
426 | 0 | else if (!xFrac) |
427 | 0 | { |
428 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vps(refCb, refStride, dstCb, dstStride, yFrac); |
429 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vps(refCr, refStride, dstCr, dstStride, yFrac); |
430 | 0 | } |
431 | 0 | else |
432 | 0 | { |
433 | 0 | ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_CHROMA - 1)]); |
434 | 0 | int immedStride = cxWidth; |
435 | 0 | int halfFilterSize = NTAPS_CHROMA >> 1; |
436 | |
|
437 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, immed, immedStride, xFrac, 1); |
438 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCb, dstStride, yFrac); |
439 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, immed, immedStride, xFrac, 1); |
440 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCr, dstStride, yFrac); |
441 | 0 | } |
442 | 0 | } |
443 | | |
444 | | /* weighted averaging for bi-pred */ |
445 | | void Predict::addWeightBi(const PredictionUnit& pu, Yuv& predYuv, const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, const WeightValues wp0[3], const WeightValues wp1[3], bool bLuma, bool bChroma) const |
446 | 0 | { |
447 | 0 | int x, y; |
448 | |
|
449 | 0 | int w0, w1, offset, shiftNum, shift, round; |
450 | 0 | uint32_t src0Stride, src1Stride, dststride; |
451 | |
|
452 | 0 | if (bLuma) |
453 | 0 | { |
454 | 0 | pixel* dstY = predYuv.getLumaAddr(pu.puAbsPartIdx); |
455 | 0 | const int16_t* srcY0 = srcYuv0.getLumaAddr(pu.puAbsPartIdx); |
456 | 0 | const int16_t* srcY1 = srcYuv1.getLumaAddr(pu.puAbsPartIdx); |
457 | | |
458 | | // Luma |
459 | 0 | w0 = wp0[0].w; |
460 | 0 | offset = wp0[0].o + wp1[0].o; |
461 | 0 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; |
462 | 0 | shift = wp0[0].shift + shiftNum + 1; |
463 | 0 | round = shift ? (1 << (shift - 1)) : 0; |
464 | 0 | w1 = wp1[0].w; |
465 | |
|
466 | 0 | src0Stride = srcYuv0.m_size; |
467 | 0 | src1Stride = srcYuv1.m_size; |
468 | 0 | dststride = predYuv.m_size; |
469 | | |
470 | | // TODO: can we use weight_sp here? |
471 | 0 | for (y = pu.height - 1; y >= 0; y--) |
472 | 0 | { |
473 | 0 | for (x = pu.width - 1; x >= 0; ) |
474 | 0 | { |
475 | | // note: luma min width is 4 |
476 | 0 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); |
477 | 0 | x--; |
478 | 0 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); |
479 | 0 | x--; |
480 | 0 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); |
481 | 0 | x--; |
482 | 0 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); |
483 | 0 | x--; |
484 | 0 | } |
485 | |
|
486 | 0 | srcY0 += src0Stride; |
487 | 0 | srcY1 += src1Stride; |
488 | 0 | dstY += dststride; |
489 | 0 | } |
490 | 0 | } |
491 | |
|
492 | 0 | if (bChroma) |
493 | 0 | { |
494 | 0 | pixel* dstU = predYuv.getCbAddr(pu.puAbsPartIdx); |
495 | 0 | pixel* dstV = predYuv.getCrAddr(pu.puAbsPartIdx); |
496 | 0 | const int16_t* srcU0 = srcYuv0.getCbAddr(pu.puAbsPartIdx); |
497 | 0 | const int16_t* srcV0 = srcYuv0.getCrAddr(pu.puAbsPartIdx); |
498 | 0 | const int16_t* srcU1 = srcYuv1.getCbAddr(pu.puAbsPartIdx); |
499 | 0 | const int16_t* srcV1 = srcYuv1.getCrAddr(pu.puAbsPartIdx); |
500 | | |
501 | | // Chroma U |
502 | 0 | w0 = wp0[1].w; |
503 | 0 | offset = wp0[1].o + wp1[1].o; |
504 | 0 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; |
505 | 0 | shift = wp0[1].shift + shiftNum + 1; |
506 | 0 | round = shift ? (1 << (shift - 1)) : 0; |
507 | 0 | w1 = wp1[1].w; |
508 | |
|
509 | 0 | src0Stride = srcYuv0.m_csize; |
510 | 0 | src1Stride = srcYuv1.m_csize; |
511 | 0 | dststride = predYuv.m_csize; |
512 | |
|
513 | 0 | uint32_t cwidth = pu.width >> srcYuv0.m_hChromaShift; |
514 | 0 | uint32_t cheight = pu.height >> srcYuv0.m_vChromaShift; |
515 | | |
516 | | // TODO: can we use weight_sp here? |
517 | 0 | for (y = cheight - 1; y >= 0; y--) |
518 | 0 | { |
519 | 0 | for (x = cwidth - 1; x >= 0;) |
520 | 0 | { |
521 | | // note: chroma min width is 2 |
522 | 0 | dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset); |
523 | 0 | x--; |
524 | 0 | dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset); |
525 | 0 | x--; |
526 | 0 | } |
527 | |
|
528 | 0 | srcU0 += src0Stride; |
529 | 0 | srcU1 += src1Stride; |
530 | 0 | dstU += dststride; |
531 | 0 | } |
532 | | |
533 | | // Chroma V |
534 | 0 | w0 = wp0[2].w; |
535 | 0 | offset = wp0[2].o + wp1[2].o; |
536 | 0 | shift = wp0[2].shift + shiftNum + 1; |
537 | 0 | round = shift ? (1 << (shift - 1)) : 0; |
538 | 0 | w1 = wp1[2].w; |
539 | |
|
540 | 0 | for (y = cheight - 1; y >= 0; y--) |
541 | 0 | { |
542 | 0 | for (x = cwidth - 1; x >= 0;) |
543 | 0 | { |
544 | | // note: chroma min width is 2 |
545 | 0 | dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset); |
546 | 0 | x--; |
547 | 0 | dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset); |
548 | 0 | x--; |
549 | 0 | } |
550 | |
|
551 | 0 | srcV0 += src0Stride; |
552 | 0 | srcV1 += src1Stride; |
553 | 0 | dstV += dststride; |
554 | 0 | } |
555 | 0 | } |
556 | 0 | } |
557 | | |
558 | | /* weighted averaging for uni-pred */ |
559 | | void Predict::addWeightUni(const PredictionUnit& pu, Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const |
560 | 0 | { |
561 | 0 | int w0, offset, shiftNum, shift, round; |
562 | 0 | uint32_t srcStride, dstStride; |
563 | |
|
564 | 0 | if (bLuma) |
565 | 0 | { |
566 | 0 | pixel* dstY = predYuv.getLumaAddr(pu.puAbsPartIdx); |
567 | 0 | const int16_t* srcY0 = srcYuv.getLumaAddr(pu.puAbsPartIdx); |
568 | | |
569 | | // Luma |
570 | 0 | w0 = wp[0].w; |
571 | 0 | offset = wp[0].offset; |
572 | 0 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; |
573 | 0 | shift = wp[0].shift + shiftNum; |
574 | 0 | round = shift ? (1 << (shift - 1)) : 0; |
575 | 0 | srcStride = srcYuv.m_size; |
576 | 0 | dstStride = predYuv.m_size; |
577 | |
|
578 | 0 | primitives.weight_sp(srcY0, dstY, srcStride, dstStride, pu.width, pu.height, w0, round, shift, offset); |
579 | 0 | } |
580 | |
|
581 | 0 | if (bChroma) |
582 | 0 | { |
583 | 0 | pixel* dstU = predYuv.getCbAddr(pu.puAbsPartIdx); |
584 | 0 | pixel* dstV = predYuv.getCrAddr(pu.puAbsPartIdx); |
585 | 0 | const int16_t* srcU0 = srcYuv.getCbAddr(pu.puAbsPartIdx); |
586 | 0 | const int16_t* srcV0 = srcYuv.getCrAddr(pu.puAbsPartIdx); |
587 | | |
588 | | // Chroma U |
589 | 0 | w0 = wp[1].w; |
590 | 0 | offset = wp[1].offset; |
591 | 0 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; |
592 | 0 | shift = wp[1].shift + shiftNum; |
593 | 0 | round = shift ? (1 << (shift - 1)) : 0; |
594 | |
|
595 | 0 | srcStride = srcYuv.m_csize; |
596 | 0 | dstStride = predYuv.m_csize; |
597 | |
|
598 | 0 | uint32_t cwidth = pu.width >> srcYuv.m_hChromaShift; |
599 | 0 | uint32_t cheight = pu.height >> srcYuv.m_vChromaShift; |
600 | |
|
601 | 0 | primitives.weight_sp(srcU0, dstU, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset); |
602 | | |
603 | | // Chroma V |
604 | 0 | w0 = wp[2].w; |
605 | 0 | offset = wp[2].offset; |
606 | 0 | shift = wp[2].shift + shiftNum; |
607 | 0 | round = shift ? (1 << (shift - 1)) : 0; |
608 | |
|
609 | 0 | primitives.weight_sp(srcV0, dstV, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset); |
610 | 0 | } |
611 | 0 | } |
612 | | |
613 | | void Predict::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSize) |
614 | 0 | { |
615 | 0 | int tuSize = 1 << log2TrSize; |
616 | 0 | int sizeIdx = log2TrSize - 2; |
617 | 0 | X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n"); |
618 | |
|
619 | 0 | int filter = !!(g_intraFilterFlags[dirMode] & tuSize); |
620 | 0 | bool bFilter = log2TrSize <= 4; |
621 | 0 | primitives.cu[sizeIdx].intra_pred[dirMode](dst, stride, intraNeighbourBuf[filter], dirMode, bFilter); |
622 | 0 | } |
623 | | |
624 | | void Predict::predIntraChromaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSizeC) |
625 | 0 | { |
626 | 0 | int tuSize = 1 << log2TrSizeC; |
627 | 0 | int sizeIdx = log2TrSizeC - 2; |
628 | 0 | X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n"); |
629 | |
|
630 | 0 | int filter = !!(m_csp == X265_CSP_I444 && (g_intraFilterFlags[dirMode] & tuSize)); |
631 | 0 | primitives.cu[sizeIdx].intra_pred[dirMode](dst, stride, intraNeighbourBuf[filter], dirMode, 0); |
632 | 0 | } |
633 | | |
634 | | void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t puAbsPartIdx, const IntraNeighbors& intraNeighbors, int dirMode) |
635 | 0 | { |
636 | 0 | int tuSize = 1 << intraNeighbors.log2TrSize; |
637 | 0 | int tuSize2 = tuSize << 1; |
638 | |
|
639 | 0 | PicYuv* reconPic = cu.m_encData->m_reconPic[0]; |
640 | 0 | pixel* adiOrigin = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx); |
641 | 0 | intptr_t picStride = reconPic->m_stride; |
642 | |
|
643 | 0 | fillReferenceSamples(adiOrigin, picStride, intraNeighbors, intraNeighbourBuf[0]); |
644 | |
|
645 | 0 | pixel* refBuf = intraNeighbourBuf[0]; |
646 | 0 | pixel* fltBuf = intraNeighbourBuf[1]; |
647 | |
|
648 | 0 | pixel topLeft = refBuf[0], topLast = refBuf[tuSize2], leftLast = refBuf[tuSize2 + tuSize2]; |
649 | |
|
650 | 0 | if (dirMode == ALL_IDX ? (8 | 16 | 32) & tuSize : g_intraFilterFlags[dirMode] & tuSize) |
651 | 0 | { |
652 | | // generate filtered intra prediction samples |
653 | |
|
654 | 0 | if (cu.m_slice->m_sps->bUseStrongIntraSmoothing && tuSize == 32) |
655 | 0 | { |
656 | 0 | const int threshold = 1 << (X265_DEPTH - 5); |
657 | |
|
658 | 0 | pixel topMiddle = refBuf[32], leftMiddle = refBuf[tuSize2 + 32]; |
659 | |
|
660 | 0 | if (abs(topLeft + topLast - (topMiddle << 1)) < threshold && |
661 | 0 | abs(topLeft + leftLast - (leftMiddle << 1)) < threshold) |
662 | 0 | { |
663 | | // "strong" bilinear interpolation |
664 | 0 | const int shift = 5 + 1; |
665 | 0 | int init = (topLeft << shift) + tuSize; |
666 | 0 | int deltaL, deltaR; |
667 | |
|
668 | 0 | deltaL = leftLast - topLeft; deltaR = topLast - topLeft; |
669 | |
|
670 | 0 | fltBuf[0] = topLeft; |
671 | 0 | for (int i = 1; i < tuSize2; i++) |
672 | 0 | { |
673 | 0 | fltBuf[i + tuSize2] = (pixel)((init + deltaL * i) >> shift); // Left Filtering |
674 | 0 | fltBuf[i] = (pixel)((init + deltaR * i) >> shift); // Above Filtering |
675 | 0 | } |
676 | 0 | fltBuf[tuSize2] = topLast; |
677 | 0 | fltBuf[tuSize2 + tuSize2] = leftLast; |
678 | 0 | return; |
679 | 0 | } |
680 | 0 | } |
681 | | |
682 | 0 | primitives.cu[intraNeighbors.log2TrSize - 2].intra_filter(refBuf, fltBuf); |
683 | 0 | } |
684 | 0 | } |
685 | | |
686 | | void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t puAbsPartIdx, const IntraNeighbors& intraNeighbors, uint32_t chromaId) |
687 | 0 | { |
688 | 0 | PicYuv* reconPic = cu.m_encData->m_reconPic[0]; |
689 | 0 | const pixel* adiOrigin = reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx); |
690 | 0 | intptr_t picStride = reconPic->m_strideC; |
691 | |
|
692 | 0 | fillReferenceSamples(adiOrigin, picStride, intraNeighbors, intraNeighbourBuf[0]); |
693 | |
|
694 | 0 | if (m_csp == X265_CSP_I444) |
695 | 0 | primitives.cu[intraNeighbors.log2TrSize - 2].intra_filter(intraNeighbourBuf[0], intraNeighbourBuf[1]); |
696 | 0 | } |
697 | | |
698 | | void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, bool isLuma, IntraNeighbors *intraNeighbors) |
699 | 0 | { |
700 | 0 | uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth; |
701 | 0 | int log2UnitWidth = LOG2_UNIT_SIZE; |
702 | 0 | int log2UnitHeight = LOG2_UNIT_SIZE; |
703 | |
|
704 | 0 | if (!isLuma) |
705 | 0 | { |
706 | 0 | log2TrSize -= cu.m_hChromaShift; |
707 | 0 | log2UnitWidth -= cu.m_hChromaShift; |
708 | 0 | log2UnitHeight -= cu.m_vChromaShift; |
709 | 0 | } |
710 | |
|
711 | 0 | int numIntraNeighbor; |
712 | 0 | bool* bNeighborFlags = intraNeighbors->bNeighborFlags; |
713 | |
|
714 | 0 | uint32_t tuSize = 1 << log2TrSize; |
715 | 0 | int tuWidthInUnits = tuSize >> log2UnitWidth; |
716 | 0 | int tuHeightInUnits = tuSize >> log2UnitHeight; |
717 | 0 | int aboveUnits = tuWidthInUnits << 1; |
718 | 0 | int leftUnits = tuHeightInUnits << 1; |
719 | 0 | uint32_t partIdxLT = cu.m_absIdxInCTU + absPartIdx; |
720 | 0 | uint32_t partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + tuWidthInUnits - 1]; |
721 | 0 | uint32_t partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) << LOG2_RASTER_SIZE)]; |
722 | |
|
723 | 0 | if (cu.m_slice->isIntra() || !cu.m_slice->m_pps->bConstrainedIntraPred) |
724 | 0 | { |
725 | 0 | bNeighborFlags[leftUnits] = isAboveLeftAvailable<false>(cu, partIdxLT); |
726 | 0 | numIntraNeighbor = (int)(bNeighborFlags[leftUnits]); |
727 | 0 | numIntraNeighbor += isAboveAvailable<false>(cu, partIdxLT, partIdxRT, bNeighborFlags + leftUnits + 1); |
728 | 0 | numIntraNeighbor += isAboveRightAvailable<false>(cu, partIdxRT, bNeighborFlags + leftUnits + 1 + tuWidthInUnits, tuWidthInUnits); |
729 | 0 | numIntraNeighbor += isLeftAvailable<false>(cu, partIdxLT, partIdxLB, bNeighborFlags + leftUnits - 1); |
730 | 0 | numIntraNeighbor += isBelowLeftAvailable<false>(cu, partIdxLB, bNeighborFlags + tuHeightInUnits - 1, tuHeightInUnits); |
731 | 0 | } |
732 | 0 | else |
733 | 0 | { |
734 | 0 | bNeighborFlags[leftUnits] = isAboveLeftAvailable<true>(cu, partIdxLT); |
735 | 0 | numIntraNeighbor = (int)(bNeighborFlags[leftUnits]); |
736 | 0 | numIntraNeighbor += isAboveAvailable<true>(cu, partIdxLT, partIdxRT, bNeighborFlags + leftUnits + 1); |
737 | 0 | numIntraNeighbor += isAboveRightAvailable<true>(cu, partIdxRT, bNeighborFlags + leftUnits + 1 + tuWidthInUnits, tuWidthInUnits); |
738 | 0 | numIntraNeighbor += isLeftAvailable<true>(cu, partIdxLT, partIdxLB, bNeighborFlags + leftUnits - 1); |
739 | 0 | numIntraNeighbor += isBelowLeftAvailable<true>(cu, partIdxLB, bNeighborFlags + tuHeightInUnits - 1, tuHeightInUnits); |
740 | 0 | } |
741 | |
|
742 | 0 | intraNeighbors->numIntraNeighbor = numIntraNeighbor; |
743 | 0 | intraNeighbors->totalUnits = aboveUnits + leftUnits + 1; |
744 | 0 | intraNeighbors->aboveUnits = aboveUnits; |
745 | 0 | intraNeighbors->leftUnits = leftUnits; |
746 | 0 | intraNeighbors->unitWidth = 1 << log2UnitWidth; |
747 | 0 | intraNeighbors->unitHeight = 1 << log2UnitHeight; |
748 | 0 | intraNeighbors->log2TrSize = log2TrSize; |
749 | 0 | } |
750 | | |
751 | | void Predict::fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, const IntraNeighbors& intraNeighbors, pixel dst[258]) |
752 | 0 | { |
753 | 0 | const pixel dcValue = (pixel)(1 << (X265_DEPTH - 1)); |
754 | 0 | int numIntraNeighbor = intraNeighbors.numIntraNeighbor; |
755 | 0 | int totalUnits = intraNeighbors.totalUnits; |
756 | 0 | uint32_t tuSize = 1 << intraNeighbors.log2TrSize; |
757 | 0 | uint32_t refSize = tuSize * 2 + 1; |
758 | | |
759 | | // Nothing is available, perform DC prediction. |
760 | 0 | if (numIntraNeighbor == 0) |
761 | 0 | { |
762 | | // Fill top border with DC value |
763 | 0 | for (uint32_t i = 0; i < refSize; i++) |
764 | 0 | dst[i] = dcValue; |
765 | | |
766 | | // Fill left border with DC value |
767 | 0 | for (uint32_t i = 0; i < refSize - 1; i++) |
768 | 0 | dst[i + refSize] = dcValue; |
769 | 0 | } |
770 | 0 | else if (numIntraNeighbor == totalUnits) |
771 | 0 | { |
772 | | // Fill top border with rec. samples |
773 | 0 | const pixel* adiTemp = adiOrigin - picStride - 1; |
774 | 0 | memcpy(dst, adiTemp, refSize * sizeof(pixel)); |
775 | | |
776 | | // Fill left border with rec. samples |
777 | 0 | adiTemp = adiOrigin - 1; |
778 | 0 | for (uint32_t i = 0; i < refSize - 1; i++) |
779 | 0 | { |
780 | 0 | dst[i + refSize] = adiTemp[0]; |
781 | 0 | adiTemp += picStride; |
782 | 0 | } |
783 | 0 | } |
784 | 0 | else // reference samples are partially available |
785 | 0 | { |
786 | 0 | const bool *bNeighborFlags = intraNeighbors.bNeighborFlags; |
787 | 0 | const bool *pNeighborFlags; |
788 | 0 | int aboveUnits = intraNeighbors.aboveUnits; |
789 | 0 | int leftUnits = intraNeighbors.leftUnits; |
790 | 0 | int unitWidth = intraNeighbors.unitWidth; |
791 | 0 | int unitHeight = intraNeighbors.unitHeight; |
792 | 0 | int totalSamples = (leftUnits * unitHeight) + ((aboveUnits + 1) * unitWidth); |
793 | 0 | pixel adiLineBuffer[5 * MAX_CU_SIZE]; |
794 | 0 | pixel *adi; |
795 | | |
796 | | // Initialize |
797 | 0 | for (int i = 0; i < totalSamples; i++) |
798 | 0 | adiLineBuffer[i] = dcValue; |
799 | | |
800 | | // Fill top-left sample |
801 | 0 | const pixel* adiTemp = adiOrigin - picStride - 1; |
802 | 0 | adi = adiLineBuffer + (leftUnits * unitHeight); |
803 | 0 | pNeighborFlags = bNeighborFlags + leftUnits; |
804 | 0 | if (*pNeighborFlags) |
805 | 0 | { |
806 | 0 | pixel topLeftVal = adiTemp[0]; |
807 | 0 | for (int i = 0; i < unitWidth; i++) |
808 | 0 | adi[i] = topLeftVal; |
809 | 0 | } |
810 | | |
811 | | // Fill left & below-left samples |
812 | 0 | adiTemp += picStride; |
813 | 0 | adi--; |
814 | | // NOTE: over copy here, but reduce condition operators |
815 | 0 | for (int j = 0; j < leftUnits * unitHeight; j++) |
816 | 0 | { |
817 | 0 | adi[-j] = adiTemp[j * picStride]; |
818 | 0 | } |
819 | | |
820 | | // Fill above & above-right samples |
821 | 0 | adiTemp = adiOrigin - picStride; |
822 | 0 | adi = adiLineBuffer + (leftUnits * unitHeight) + unitWidth; |
823 | | // NOTE: over copy here, but reduce condition operators |
824 | 0 | memcpy(adi, adiTemp, aboveUnits * unitWidth * sizeof(*adiTemp)); |
825 | | |
826 | | // Pad reference samples when necessary |
827 | 0 | int curr = 0; |
828 | 0 | int next = 1; |
829 | 0 | adi = adiLineBuffer; |
830 | 0 | int pAdiLineTopRowOffset = leftUnits * (unitHeight - unitWidth); |
831 | 0 | if (!bNeighborFlags[0]) |
832 | 0 | { |
833 | | // very bottom unit of bottom-left; at least one unit will be valid. |
834 | 0 | while (next < totalUnits && !bNeighborFlags[next]) |
835 | 0 | next++; |
836 | |
|
837 | 0 | pixel* pAdiLineNext = adiLineBuffer + ((next < leftUnits) ? (next * unitHeight) : (pAdiLineTopRowOffset + (next * unitWidth))); |
838 | 0 | const pixel refSample = *pAdiLineNext; |
839 | | // Pad unavailable samples with new value |
840 | 0 | int nextOrTop = X265_MIN(next, leftUnits); |
841 | | |
842 | | // fill left column |
843 | | #if HIGH_BIT_DEPTH |
844 | | while (curr < nextOrTop) |
845 | | { |
846 | | for (int i = 0; i < unitHeight; i++) |
847 | | adi[i] = refSample; |
848 | | |
849 | | adi += unitHeight; |
850 | | curr++; |
851 | | } |
852 | | |
853 | | // fill top row |
854 | | while (curr < next) |
855 | | { |
856 | | for (int i = 0; i < unitWidth; i++) |
857 | | adi[i] = refSample; |
858 | | |
859 | | adi += unitWidth; |
860 | | curr++; |
861 | | } |
862 | | #else |
863 | 0 | X265_CHECK(curr <= nextOrTop, "curr must be less than or equal to nextOrTop\n"); |
864 | 0 | if (curr < nextOrTop) |
865 | 0 | { |
866 | 0 | const int fillSize = unitHeight * (nextOrTop - curr); |
867 | 0 | memset(adi, refSample, fillSize * sizeof(pixel)); |
868 | 0 | curr = nextOrTop; |
869 | 0 | adi += fillSize; |
870 | 0 | } |
871 | |
|
872 | 0 | if (curr < next) |
873 | 0 | { |
874 | 0 | const int fillSize = unitWidth * (next - curr); |
875 | 0 | memset(adi, refSample, fillSize * sizeof(pixel)); |
876 | 0 | curr = next; |
877 | 0 | adi += fillSize; |
878 | 0 | } |
879 | 0 | #endif |
880 | 0 | } |
881 | | |
882 | | // pad all other reference samples. |
883 | 0 | while (curr < totalUnits) |
884 | 0 | { |
885 | 0 | if (!bNeighborFlags[curr]) // samples not available |
886 | 0 | { |
887 | 0 | int numSamplesInCurrUnit = (curr >= leftUnits) ? unitWidth : unitHeight; |
888 | 0 | const pixel refSample = *(adi - 1); |
889 | 0 | for (int i = 0; i < numSamplesInCurrUnit; i++) |
890 | 0 | adi[i] = refSample; |
891 | |
|
892 | 0 | adi += numSamplesInCurrUnit; |
893 | 0 | curr++; |
894 | 0 | } |
895 | 0 | else |
896 | 0 | { |
897 | 0 | adi += (curr >= leftUnits) ? unitWidth : unitHeight; |
898 | 0 | curr++; |
899 | 0 | } |
900 | 0 | } |
901 | | |
902 | | // Copy processed samples |
903 | 0 | adi = adiLineBuffer + refSize + unitWidth - 2; |
904 | 0 | memcpy(dst, adi, refSize * sizeof(pixel)); |
905 | |
|
906 | 0 | adi = adiLineBuffer + refSize - 1; |
907 | 0 | for (int i = 0; i < (int)refSize - 1; i++) |
908 | 0 | dst[i + refSize] = adi[-(i + 1)]; |
909 | 0 | } |
910 | 0 | } |
911 | | |
912 | | template<bool cip> |
913 | | bool Predict::isAboveLeftAvailable(const CUData& cu, uint32_t partIdxLT) |
914 | 0 | { |
915 | 0 | uint32_t partAboveLeft; |
916 | 0 | const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, partIdxLT); |
917 | |
|
918 | 0 | return cuAboveLeft && (!cip || cuAboveLeft->isIntra(partAboveLeft)); |
919 | 0 | } Unexecuted instantiation: bool x265::Predict::isAboveLeftAvailable<false>(x265::CUData const&, unsigned int) Unexecuted instantiation: bool x265::Predict::isAboveLeftAvailable<true>(x265::CUData const&, unsigned int) |
920 | | |
921 | | template<bool cip> |
922 | | int Predict::isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags) |
923 | 0 | { |
924 | 0 | const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT]; |
925 | 0 | const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT]; |
926 | 0 | const uint32_t idxStep = 1; |
927 | 0 | int numIntra = 0; |
928 | |
|
929 | 0 | for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags++) |
930 | 0 | { |
931 | 0 | uint32_t partAbove; |
932 | 0 | const CUData* cuAbove = cu.getPUAbove(partAbove, g_rasterToZscan[rasterPart]); |
933 | 0 | if (cuAbove && (!cip || cuAbove->isIntra(partAbove))) |
934 | 0 | { |
935 | 0 | numIntra++; |
936 | 0 | *bValidFlags = true; |
937 | 0 | } |
938 | 0 | else |
939 | 0 | *bValidFlags = false; |
940 | 0 | } |
941 | |
|
942 | 0 | return numIntra; |
943 | 0 | } Unexecuted instantiation: int x265::Predict::isAboveAvailable<false>(x265::CUData const&, unsigned int, unsigned int, bool*) Unexecuted instantiation: int x265::Predict::isAboveAvailable<true>(x265::CUData const&, unsigned int, unsigned int, bool*) |
944 | | |
945 | | template<bool cip> |
946 | | int Predict::isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags) |
947 | 0 | { |
948 | 0 | const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT]; |
949 | 0 | const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB]; |
950 | 0 | const uint32_t idxStep = RASTER_SIZE; |
951 | 0 | int numIntra = 0; |
952 | |
|
953 | 0 | for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags--) // opposite direction |
954 | 0 | { |
955 | 0 | uint32_t partLeft; |
956 | 0 | const CUData* cuLeft = cu.getPULeft(partLeft, g_rasterToZscan[rasterPart]); |
957 | 0 | if (cuLeft && (!cip || cuLeft->isIntra(partLeft))) |
958 | 0 | { |
959 | 0 | numIntra++; |
960 | 0 | *bValidFlags = true; |
961 | 0 | } |
962 | 0 | else |
963 | 0 | *bValidFlags = false; |
964 | 0 | } |
965 | |
|
966 | 0 | return numIntra; |
967 | 0 | } Unexecuted instantiation: int x265::Predict::isLeftAvailable<false>(x265::CUData const&, unsigned int, unsigned int, bool*) Unexecuted instantiation: int x265::Predict::isLeftAvailable<true>(x265::CUData const&, unsigned int, unsigned int, bool*) |
968 | | |
969 | | template<bool cip> |
970 | | int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxRT, bool* bValidFlags, uint32_t numUnits) |
971 | 0 | { |
972 | 0 | int numIntra = 0; |
973 | |
|
974 | 0 | for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags++) |
975 | 0 | { |
976 | 0 | uint32_t partAboveRight; |
977 | 0 | const CUData* cuAboveRight = cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset); |
978 | 0 | if (cuAboveRight && (!cip || cuAboveRight->isIntra(partAboveRight))) |
979 | 0 | { |
980 | 0 | numIntra++; |
981 | 0 | *bValidFlags = true; |
982 | 0 | } |
983 | 0 | else |
984 | 0 | *bValidFlags = false; |
985 | 0 | } |
986 | |
|
987 | 0 | return numIntra; |
988 | 0 | } Unexecuted instantiation: int x265::Predict::isAboveRightAvailable<false>(x265::CUData const&, unsigned int, bool*, unsigned int) Unexecuted instantiation: int x265::Predict::isAboveRightAvailable<true>(x265::CUData const&, unsigned int, bool*, unsigned int) |
989 | | |
990 | | template<bool cip> |
991 | | int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLB, bool* bValidFlags, uint32_t numUnits) |
992 | 0 | { |
993 | 0 | int numIntra = 0; |
994 | |
|
995 | 0 | for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags--) // opposite direction |
996 | 0 | { |
997 | 0 | uint32_t partBelowLeft; |
998 | 0 | const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, partIdxLB, offset); |
999 | 0 | if (cuBelowLeft && (!cip || cuBelowLeft->isIntra(partBelowLeft))) |
1000 | 0 | { |
1001 | 0 | numIntra++; |
1002 | 0 | *bValidFlags = true; |
1003 | 0 | } |
1004 | 0 | else |
1005 | 0 | *bValidFlags = false; |
1006 | 0 | } |
1007 | |
|
1008 | 0 | return numIntra; |
1009 | 0 | } Unexecuted instantiation: int x265::Predict::isBelowLeftAvailable<false>(x265::CUData const&, unsigned int, bool*, unsigned int) Unexecuted instantiation: int x265::Predict::isBelowLeftAvailable<true>(x265::CUData const&, unsigned int, bool*, unsigned int) |