/src/x265/source/common/predict.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /***************************************************************************** |
2 | | * Copyright (C) 2013-2020 MulticoreWare, Inc |
3 | | * |
4 | | * Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com> |
5 | | * Min Chen <chenm003@163.com> |
6 | | * |
7 | | * This program is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 2 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * This program is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
20 | | * |
21 | | * This program is also available under a commercial proprietary license. |
22 | | * For more information, contact us at license @ x265.com. |
23 | | *****************************************************************************/ |
24 | | |
25 | | #include "common.h" |
26 | | #include "slice.h" |
27 | | #include "framedata.h" |
28 | | #include "picyuv.h" |
29 | | #include "predict.h" |
30 | | #include "primitives.h" |
31 | | |
32 | | using namespace X265_NS; |
33 | | |
34 | | #if _MSC_VER |
35 | | #pragma warning(disable: 4127) // conditional expression is constant |
36 | | #endif |
37 | | |
38 | | PredictionUnit::PredictionUnit(const CUData& cu, const CUGeom& cuGeom, int puIdx) |
39 | 0 | { |
40 | | /* address of CTU */ |
41 | 0 | ctuAddr = cu.m_cuAddr; |
42 | | |
43 | | /* offset of CU */ |
44 | 0 | cuAbsPartIdx = cuGeom.absPartIdx; |
45 | | |
46 | | /* offset and dimensions of PU */ |
47 | 0 | cu.getPartIndexAndSize(puIdx, puAbsPartIdx, width, height); |
48 | 0 | } |
49 | | |
50 | | namespace |
51 | | { |
52 | | inline pixel weightBidir(int w0, int16_t P0, int w1, int16_t P1, int round, int shift, int offset) |
53 | 0 | { |
54 | 0 | return x265_clip((w0 * (P0 + IF_INTERNAL_OFFS) + w1 * (P1 + IF_INTERNAL_OFFS) + round + (offset * (1 << (shift - 1)))) >> shift); |
55 | 0 | } |
56 | | } |
57 | | |
58 | | Predict::Predict() |
59 | 0 | { |
60 | 0 | } |
61 | | |
62 | | Predict::~Predict() |
63 | 0 | { |
64 | 0 | m_predShortYuv[0].destroy(); |
65 | 0 | m_predShortYuv[1].destroy(); |
66 | 0 | } |
67 | | |
68 | | bool Predict::allocBuffers(int csp) |
69 | 0 | { |
70 | 0 | m_csp = csp; |
71 | 0 | m_hChromaShift = CHROMA_H_SHIFT(csp); |
72 | 0 | m_vChromaShift = CHROMA_V_SHIFT(csp); |
73 | |
|
74 | 0 | return m_predShortYuv[0].create(MAX_CU_SIZE, csp) && m_predShortYuv[1].create(MAX_CU_SIZE, csp); |
75 | 0 | } |
76 | | |
77 | | void Predict::motionCompensation(const CUData& cu, const PredictionUnit& pu, Yuv& predYuv, bool bLuma, bool bChroma) |
78 | 0 | { |
79 | 0 | int refIdx0 = cu.m_refIdx[0][pu.puAbsPartIdx]; |
80 | 0 | int refIdx1 = cu.m_refIdx[1][pu.puAbsPartIdx]; |
81 | |
|
82 | 0 | if (cu.m_slice->isInterP()) |
83 | 0 | { |
84 | | /* P Slice */ |
85 | 0 | WeightValues wv0[3]; |
86 | |
|
87 | 0 | X265_CHECK(refIdx0 >= 0, "invalid P refidx\n"); |
88 | 0 | X265_CHECK(refIdx0 < cu.m_slice->m_numRefIdx[0], "P refidx out of range\n"); |
89 | 0 | const WeightParam *wp0 = cu.m_slice->m_weightPredTable[0][refIdx0]; |
90 | |
|
91 | 0 | MV mv0 = cu.m_mv[0][pu.puAbsPartIdx]; |
92 | 0 | cu.clipMv(mv0); |
93 | |
|
94 | 0 | if (cu.m_slice->m_pps->bUseWeightPred && wp0->wtPresent) |
95 | 0 | { |
96 | 0 | for (int plane = 0; plane < (bChroma ? 3 : 1); plane++) |
97 | 0 | { |
98 | 0 | wv0[plane].w = wp0[plane].inputWeight; |
99 | 0 | wv0[plane].offset = wp0[plane].inputOffset * (1 << (X265_DEPTH - 8)); |
100 | 0 | wv0[plane].shift = wp0[plane].log2WeightDenom; |
101 | 0 | wv0[plane].round = wp0[plane].log2WeightDenom >= 1 ? 1 << (wp0[plane].log2WeightDenom - 1) : 0; |
102 | 0 | } |
103 | |
|
104 | 0 | ShortYuv& shortYuv = m_predShortYuv[0]; |
105 | |
|
106 | 0 | if (bLuma) |
107 | 0 | predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
108 | 0 | if (bChroma) |
109 | 0 | predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
110 | |
|
111 | 0 | addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma); |
112 | 0 | } |
113 | 0 | else |
114 | 0 | { |
115 | 0 | if (bLuma) |
116 | 0 | predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
117 | 0 | if (bChroma) |
118 | 0 | predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
119 | 0 | } |
120 | 0 | } |
121 | 0 | else |
122 | 0 | { |
123 | | /* B Slice */ |
124 | |
|
125 | 0 | WeightValues wv0[3], wv1[3]; |
126 | 0 | const WeightParam *pwp0, *pwp1; |
127 | |
|
128 | 0 | X265_CHECK(refIdx0 < cu.m_slice->m_numRefIdx[0], "bidir refidx0 out of range\n"); |
129 | 0 | X265_CHECK(refIdx1 < cu.m_slice->m_numRefIdx[1], "bidir refidx1 out of range\n"); |
130 | |
|
131 | 0 | if (cu.m_slice->m_pps->bUseWeightedBiPred) |
132 | 0 | { |
133 | 0 | pwp0 = refIdx0 >= 0 ? cu.m_slice->m_weightPredTable[0][refIdx0] : NULL; |
134 | 0 | pwp1 = refIdx1 >= 0 ? cu.m_slice->m_weightPredTable[1][refIdx1] : NULL; |
135 | |
|
136 | 0 | if (pwp0 && pwp1 && (pwp0->wtPresent || pwp1->wtPresent)) |
137 | 0 | { |
138 | | /* biprediction weighting */ |
139 | 0 | for (int plane = 0; plane < (bChroma ? 3 : 1); plane++) |
140 | 0 | { |
141 | 0 | wv0[plane].w = pwp0[plane].inputWeight; |
142 | 0 | wv0[plane].o = pwp0[plane].inputOffset * (1 << (X265_DEPTH - 8)); |
143 | 0 | wv0[plane].shift = pwp0[plane].log2WeightDenom; |
144 | 0 | wv0[plane].round = 1 << pwp0[plane].log2WeightDenom; |
145 | |
|
146 | 0 | wv1[plane].w = pwp1[plane].inputWeight; |
147 | 0 | wv1[plane].o = pwp1[plane].inputOffset * (1 << (X265_DEPTH - 8)); |
148 | 0 | wv1[plane].shift = wv0[plane].shift; |
149 | 0 | wv1[plane].round = wv0[plane].round; |
150 | 0 | } |
151 | 0 | } |
152 | 0 | else |
153 | 0 | { |
154 | | /* uniprediction weighting, always outputs to wv0 */ |
155 | 0 | const WeightParam* pwp = (refIdx0 >= 0) ? pwp0 : pwp1; |
156 | 0 | for (int plane = 0; plane < (bChroma ? 3 : 1); plane++) |
157 | 0 | { |
158 | 0 | wv0[plane].w = pwp[plane].inputWeight; |
159 | 0 | wv0[plane].offset = pwp[plane].inputOffset * (1 << (X265_DEPTH - 8)); |
160 | 0 | wv0[plane].shift = pwp[plane].log2WeightDenom; |
161 | 0 | wv0[plane].round = pwp[plane].log2WeightDenom >= 1 ? 1 << (pwp[plane].log2WeightDenom - 1) : 0; |
162 | 0 | } |
163 | 0 | } |
164 | 0 | } |
165 | 0 | else |
166 | 0 | pwp0 = pwp1 = NULL; |
167 | |
|
168 | 0 | if (refIdx0 >= 0 && refIdx1 >= 0) |
169 | 0 | { |
170 | 0 | MV mv0 = cu.m_mv[0][pu.puAbsPartIdx]; |
171 | 0 | MV mv1 = cu.m_mv[1][pu.puAbsPartIdx]; |
172 | 0 | cu.clipMv(mv0); |
173 | 0 | cu.clipMv(mv1); |
174 | |
|
175 | 0 | if (bLuma) |
176 | 0 | { |
177 | 0 | predInterLumaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
178 | 0 | predInterLumaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refReconPicList[1][refIdx1], mv1); |
179 | 0 | } |
180 | 0 | if (bChroma) |
181 | 0 | { |
182 | 0 | predInterChromaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
183 | 0 | predInterChromaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refReconPicList[1][refIdx1], mv1); |
184 | 0 | } |
185 | |
|
186 | 0 | if (pwp0 && pwp1 && (pwp0->wtPresent || pwp1->wtPresent)) |
187 | 0 | addWeightBi(pu, predYuv, m_predShortYuv[0], m_predShortYuv[1], wv0, wv1, bLuma, bChroma); |
188 | 0 | else |
189 | 0 | predYuv.addAvg(m_predShortYuv[0], m_predShortYuv[1], pu.puAbsPartIdx, pu.width, pu.height, bLuma, bChroma); |
190 | 0 | } |
191 | 0 | else if (refIdx0 >= 0) |
192 | 0 | { |
193 | 0 | MV mv0 = cu.m_mv[0][pu.puAbsPartIdx]; |
194 | 0 | cu.clipMv(mv0); |
195 | |
|
196 | 0 | if (pwp0 && pwp0->wtPresent) |
197 | 0 | { |
198 | 0 | ShortYuv& shortYuv = m_predShortYuv[0]; |
199 | |
|
200 | 0 | if (bLuma) |
201 | 0 | predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
202 | 0 | if (bChroma) |
203 | 0 | predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
204 | |
|
205 | 0 | addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma); |
206 | 0 | } |
207 | 0 | else |
208 | 0 | { |
209 | 0 | if (bLuma) |
210 | 0 | predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
211 | 0 | if (bChroma) |
212 | 0 | predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0); |
213 | 0 | } |
214 | 0 | } |
215 | 0 | else |
216 | 0 | { |
217 | 0 | MV mv1 = cu.m_mv[1][pu.puAbsPartIdx]; |
218 | 0 | cu.clipMv(mv1); |
219 | | |
220 | | /* uniprediction to L1 */ |
221 | 0 | X265_CHECK(refIdx1 >= 0, "refidx1 was not positive\n"); |
222 | |
|
223 | 0 | if (pwp1 && pwp1->wtPresent) |
224 | 0 | { |
225 | 0 | ShortYuv& shortYuv = m_predShortYuv[0]; |
226 | |
|
227 | 0 | if (bLuma) |
228 | 0 | predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1); |
229 | 0 | if (bChroma) |
230 | 0 | predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1); |
231 | |
|
232 | 0 | addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma); |
233 | 0 | } |
234 | 0 | else |
235 | 0 | { |
236 | 0 | if (bLuma) |
237 | 0 | predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1); |
238 | 0 | if (bChroma) |
239 | 0 | predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1); |
240 | 0 | } |
241 | 0 | } |
242 | 0 | } |
243 | 0 | } |
244 | | |
245 | | void Predict::predInterLumaPixel(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const |
246 | 0 | { |
247 | 0 | pixel* dst = dstYuv.getLumaAddr(pu.puAbsPartIdx); |
248 | 0 | intptr_t dstStride = dstYuv.m_size; |
249 | |
|
250 | 0 | intptr_t srcStride = refPic.m_stride; |
251 | 0 | intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride; |
252 | 0 | int partEnum = partitionFromSizes(pu.width, pu.height); |
253 | 0 | const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset; |
254 | |
|
255 | 0 | int xFrac = mv.x & 3; |
256 | 0 | int yFrac = mv.y & 3; |
257 | |
|
258 | 0 | if (!(yFrac | xFrac)) |
259 | 0 | primitives.pu[partEnum].copy_pp(dst, dstStride, src, srcStride); |
260 | 0 | else if (!yFrac) |
261 | 0 | primitives.pu[partEnum].luma_hpp(src, srcStride, dst, dstStride, xFrac); |
262 | 0 | else if (!xFrac) |
263 | 0 | primitives.pu[partEnum].luma_vpp(src, srcStride, dst, dstStride, yFrac); |
264 | 0 | else |
265 | 0 | primitives.pu[partEnum].luma_hvpp(src, srcStride, dst, dstStride, xFrac, yFrac); |
266 | 0 | } |
267 | | |
268 | | void Predict::predInterLumaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const |
269 | 0 | { |
270 | 0 | int16_t* dst = dstSYuv.getLumaAddr(pu.puAbsPartIdx); |
271 | 0 | intptr_t dstStride = dstSYuv.m_size; |
272 | |
|
273 | 0 | intptr_t srcStride = refPic.m_stride; |
274 | 0 | intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride; |
275 | 0 | const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset; |
276 | |
|
277 | 0 | int partEnum = partitionFromSizes(pu.width, pu.height); |
278 | |
|
279 | 0 | X265_CHECK((pu.width % 4) + (pu.height % 4) == 0, "width or height not divisible by 4\n"); |
280 | 0 | X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n"); |
281 | |
|
282 | 0 | int xFrac = mv.x & 3; |
283 | 0 | int yFrac = mv.y & 3; |
284 | |
|
285 | 0 | if (!(yFrac | xFrac)) |
286 | 0 | { |
287 | 0 | bool srcbufferAlignCheck = (refPic.m_cuOffsetY[pu.ctuAddr] + refPic.m_buOffsetY[pu.cuAbsPartIdx + pu.puAbsPartIdx] + srcOffset) % 64 == 0; |
288 | 0 | bool dstbufferAlignCheck = (dstSYuv.getAddrOffset(pu.puAbsPartIdx, dstSYuv.m_size) % 64) == 0; |
289 | 0 | primitives.pu[partEnum].convert_p2s[srcStride % 64 == 0 && dstStride % 64 == 0 && srcbufferAlignCheck && dstbufferAlignCheck](src, srcStride, dst, dstStride); |
290 | 0 | } |
291 | 0 | else if (!yFrac) |
292 | 0 | primitives.pu[partEnum].luma_hps(src, srcStride, dst, dstStride, xFrac, 0); |
293 | 0 | else if (!xFrac) |
294 | 0 | primitives.pu[partEnum].luma_vps(src, srcStride, dst, dstStride, yFrac); |
295 | 0 | else |
296 | 0 | { |
297 | 0 | ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]); |
298 | 0 | int immedStride = pu.width; |
299 | 0 | int halfFilterSize = NTAPS_LUMA >> 1; |
300 | |
|
301 | 0 | primitives.pu[partEnum].luma_hps(src, srcStride, immed, immedStride, xFrac, 1); |
302 | 0 | primitives.pu[partEnum].luma_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, yFrac); |
303 | 0 | } |
304 | 0 | } |
305 | | |
306 | | void Predict::predInterChromaPixel(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const |
307 | 0 | { |
308 | 0 | intptr_t dstStride = dstYuv.m_csize; |
309 | 0 | intptr_t refStride = refPic.m_strideC; |
310 | |
|
311 | 0 | int mvx = mv.x << (1 - m_hChromaShift); |
312 | 0 | int mvy = mv.y << (1 - m_vChromaShift); |
313 | |
|
314 | 0 | intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride; |
315 | |
|
316 | 0 | const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset; |
317 | 0 | const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset; |
318 | |
|
319 | 0 | pixel* dstCb = dstYuv.getCbAddr(pu.puAbsPartIdx); |
320 | 0 | pixel* dstCr = dstYuv.getCrAddr(pu.puAbsPartIdx); |
321 | |
|
322 | 0 | int partEnum = partitionFromSizes(pu.width, pu.height); |
323 | |
|
324 | 0 | int xFrac = mvx & 7; |
325 | 0 | int yFrac = mvy & 7; |
326 | |
|
327 | 0 | if (!(yFrac | xFrac)) |
328 | 0 | { |
329 | 0 | primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCb, dstStride, refCb, refStride); |
330 | 0 | primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCr, dstStride, refCr, refStride); |
331 | 0 | } |
332 | 0 | else if (!yFrac) |
333 | 0 | { |
334 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCb, refStride, dstCb, dstStride, xFrac); |
335 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCr, refStride, dstCr, dstStride, xFrac); |
336 | 0 | } |
337 | 0 | else if (!xFrac) |
338 | 0 | { |
339 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCb, refStride, dstCb, dstStride, yFrac); |
340 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCr, refStride, dstCr, dstStride, yFrac); |
341 | 0 | } |
342 | 0 | else |
343 | 0 | { |
344 | 0 | ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_CHROMA - 1)]); |
345 | 0 | int immedStride = pu.width >> m_hChromaShift; |
346 | 0 | int halfFilterSize = NTAPS_CHROMA >> 1; |
347 | |
|
348 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, immed, immedStride, xFrac, 1); |
349 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCb, dstStride, yFrac); |
350 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, immed, immedStride, xFrac, 1); |
351 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCr, dstStride, yFrac); |
352 | 0 | } |
353 | 0 | } |
354 | | |
355 | | void Predict::predInterChromaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const |
356 | 0 | { |
357 | 0 | intptr_t dstStride = dstSYuv.m_csize; |
358 | 0 | intptr_t refStride = refPic.m_strideC; |
359 | |
|
360 | 0 | int mvx = mv.x << (1 - m_hChromaShift); |
361 | 0 | int mvy = mv.y << (1 - m_vChromaShift); |
362 | |
|
363 | 0 | intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride; |
364 | |
|
365 | 0 | const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset; |
366 | 0 | const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset; |
367 | |
|
368 | 0 | int16_t* dstCb = dstSYuv.getCbAddr(pu.puAbsPartIdx); |
369 | 0 | int16_t* dstCr = dstSYuv.getCrAddr(pu.puAbsPartIdx); |
370 | |
|
371 | 0 | int partEnum = partitionFromSizes(pu.width, pu.height); |
372 | | |
373 | 0 | uint32_t cxWidth = pu.width >> m_hChromaShift; |
374 | |
|
375 | 0 | X265_CHECK(((cxWidth | (pu.height >> m_vChromaShift)) % 2) == 0, "chroma block size expected to be multiple of 2\n"); |
376 | |
|
377 | 0 | int xFrac = mvx & 7; |
378 | 0 | int yFrac = mvy & 7; |
379 | |
|
380 | 0 | if (!(yFrac | xFrac)) |
381 | 0 | { |
382 | 0 | bool srcbufferAlignCheckC = (refPic.m_cuOffsetC[pu.ctuAddr] + refPic.m_buOffsetC[pu.cuAbsPartIdx + pu.puAbsPartIdx] + refOffset) % 64 == 0; |
383 | 0 | bool dstbufferAlignCheckC = dstSYuv.getChromaAddrOffset(pu.puAbsPartIdx) % 64 == 0; |
384 | 0 | primitives.chroma[m_csp].pu[partEnum].p2s[refStride % 64 == 0 && dstStride % 64 == 0 && srcbufferAlignCheckC && dstbufferAlignCheckC](refCb, refStride, dstCb, dstStride); |
385 | 0 | primitives.chroma[m_csp].pu[partEnum].p2s[refStride % 64 == 0 && dstStride % 64 == 0 && srcbufferAlignCheckC && dstbufferAlignCheckC](refCr, refStride, dstCr, dstStride); |
386 | 0 | } |
387 | 0 | else if (!yFrac) |
388 | 0 | { |
389 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, dstCb, dstStride, xFrac, 0); |
390 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, dstCr, dstStride, xFrac, 0); |
391 | 0 | } |
392 | 0 | else if (!xFrac) |
393 | 0 | { |
394 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vps(refCb, refStride, dstCb, dstStride, yFrac); |
395 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vps(refCr, refStride, dstCr, dstStride, yFrac); |
396 | 0 | } |
397 | 0 | else |
398 | 0 | { |
399 | 0 | ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_CHROMA - 1)]); |
400 | 0 | int immedStride = cxWidth; |
401 | 0 | int halfFilterSize = NTAPS_CHROMA >> 1; |
402 | |
|
403 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, immed, immedStride, xFrac, 1); |
404 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCb, dstStride, yFrac); |
405 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, immed, immedStride, xFrac, 1); |
406 | 0 | primitives.chroma[m_csp].pu[partEnum].filter_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCr, dstStride, yFrac); |
407 | 0 | } |
408 | 0 | } |
409 | | |
410 | | /* weighted averaging for bi-pred */ |
411 | | void Predict::addWeightBi(const PredictionUnit& pu, Yuv& predYuv, const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, const WeightValues wp0[3], const WeightValues wp1[3], bool bLuma, bool bChroma) const |
412 | 0 | { |
413 | 0 | int x, y; |
414 | |
|
415 | 0 | int w0, w1, offset, shiftNum, shift, round; |
416 | 0 | uint32_t src0Stride, src1Stride, dststride; |
417 | |
|
418 | 0 | if (bLuma) |
419 | 0 | { |
420 | 0 | pixel* dstY = predYuv.getLumaAddr(pu.puAbsPartIdx); |
421 | 0 | const int16_t* srcY0 = srcYuv0.getLumaAddr(pu.puAbsPartIdx); |
422 | 0 | const int16_t* srcY1 = srcYuv1.getLumaAddr(pu.puAbsPartIdx); |
423 | | |
424 | | // Luma |
425 | 0 | w0 = wp0[0].w; |
426 | 0 | offset = wp0[0].o + wp1[0].o; |
427 | 0 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; |
428 | 0 | shift = wp0[0].shift + shiftNum + 1; |
429 | 0 | round = shift ? (1 << (shift - 1)) : 0; |
430 | 0 | w1 = wp1[0].w; |
431 | |
|
432 | 0 | src0Stride = srcYuv0.m_size; |
433 | 0 | src1Stride = srcYuv1.m_size; |
434 | 0 | dststride = predYuv.m_size; |
435 | | |
436 | | // TODO: can we use weight_sp here? |
437 | 0 | for (y = pu.height - 1; y >= 0; y--) |
438 | 0 | { |
439 | 0 | for (x = pu.width - 1; x >= 0; ) |
440 | 0 | { |
441 | | // note: luma min width is 4 |
442 | 0 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); |
443 | 0 | x--; |
444 | 0 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); |
445 | 0 | x--; |
446 | 0 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); |
447 | 0 | x--; |
448 | 0 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); |
449 | 0 | x--; |
450 | 0 | } |
451 | |
|
452 | 0 | srcY0 += src0Stride; |
453 | 0 | srcY1 += src1Stride; |
454 | 0 | dstY += dststride; |
455 | 0 | } |
456 | 0 | } |
457 | |
|
458 | 0 | if (bChroma) |
459 | 0 | { |
460 | 0 | pixel* dstU = predYuv.getCbAddr(pu.puAbsPartIdx); |
461 | 0 | pixel* dstV = predYuv.getCrAddr(pu.puAbsPartIdx); |
462 | 0 | const int16_t* srcU0 = srcYuv0.getCbAddr(pu.puAbsPartIdx); |
463 | 0 | const int16_t* srcV0 = srcYuv0.getCrAddr(pu.puAbsPartIdx); |
464 | 0 | const int16_t* srcU1 = srcYuv1.getCbAddr(pu.puAbsPartIdx); |
465 | 0 | const int16_t* srcV1 = srcYuv1.getCrAddr(pu.puAbsPartIdx); |
466 | | |
467 | | // Chroma U |
468 | 0 | w0 = wp0[1].w; |
469 | 0 | offset = wp0[1].o + wp1[1].o; |
470 | 0 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; |
471 | 0 | shift = wp0[1].shift + shiftNum + 1; |
472 | 0 | round = shift ? (1 << (shift - 1)) : 0; |
473 | 0 | w1 = wp1[1].w; |
474 | |
|
475 | 0 | src0Stride = srcYuv0.m_csize; |
476 | 0 | src1Stride = srcYuv1.m_csize; |
477 | 0 | dststride = predYuv.m_csize; |
478 | |
|
479 | 0 | uint32_t cwidth = pu.width >> srcYuv0.m_hChromaShift; |
480 | 0 | uint32_t cheight = pu.height >> srcYuv0.m_vChromaShift; |
481 | | |
482 | | // TODO: can we use weight_sp here? |
483 | 0 | for (y = cheight - 1; y >= 0; y--) |
484 | 0 | { |
485 | 0 | for (x = cwidth - 1; x >= 0;) |
486 | 0 | { |
487 | | // note: chroma min width is 2 |
488 | 0 | dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset); |
489 | 0 | x--; |
490 | 0 | dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset); |
491 | 0 | x--; |
492 | 0 | } |
493 | |
|
494 | 0 | srcU0 += src0Stride; |
495 | 0 | srcU1 += src1Stride; |
496 | 0 | dstU += dststride; |
497 | 0 | } |
498 | | |
499 | | // Chroma V |
500 | 0 | w0 = wp0[2].w; |
501 | 0 | offset = wp0[2].o + wp1[2].o; |
502 | 0 | shift = wp0[2].shift + shiftNum + 1; |
503 | 0 | round = shift ? (1 << (shift - 1)) : 0; |
504 | 0 | w1 = wp1[2].w; |
505 | |
|
506 | 0 | for (y = cheight - 1; y >= 0; y--) |
507 | 0 | { |
508 | 0 | for (x = cwidth - 1; x >= 0;) |
509 | 0 | { |
510 | | // note: chroma min width is 2 |
511 | 0 | dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset); |
512 | 0 | x--; |
513 | 0 | dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset); |
514 | 0 | x--; |
515 | 0 | } |
516 | |
|
517 | 0 | srcV0 += src0Stride; |
518 | 0 | srcV1 += src1Stride; |
519 | 0 | dstV += dststride; |
520 | 0 | } |
521 | 0 | } |
522 | 0 | } |
523 | | |
524 | | /* weighted averaging for uni-pred */ |
525 | | void Predict::addWeightUni(const PredictionUnit& pu, Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const |
526 | 0 | { |
527 | 0 | int w0, offset, shiftNum, shift, round; |
528 | 0 | uint32_t srcStride, dstStride; |
529 | |
|
530 | 0 | if (bLuma) |
531 | 0 | { |
532 | 0 | pixel* dstY = predYuv.getLumaAddr(pu.puAbsPartIdx); |
533 | 0 | const int16_t* srcY0 = srcYuv.getLumaAddr(pu.puAbsPartIdx); |
534 | | |
535 | | // Luma |
536 | 0 | w0 = wp[0].w; |
537 | 0 | offset = wp[0].offset; |
538 | 0 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; |
539 | 0 | shift = wp[0].shift + shiftNum; |
540 | 0 | round = shift ? (1 << (shift - 1)) : 0; |
541 | 0 | srcStride = srcYuv.m_size; |
542 | 0 | dstStride = predYuv.m_size; |
543 | |
|
544 | 0 | primitives.weight_sp(srcY0, dstY, srcStride, dstStride, pu.width, pu.height, w0, round, shift, offset); |
545 | 0 | } |
546 | |
|
547 | 0 | if (bChroma) |
548 | 0 | { |
549 | 0 | pixel* dstU = predYuv.getCbAddr(pu.puAbsPartIdx); |
550 | 0 | pixel* dstV = predYuv.getCrAddr(pu.puAbsPartIdx); |
551 | 0 | const int16_t* srcU0 = srcYuv.getCbAddr(pu.puAbsPartIdx); |
552 | 0 | const int16_t* srcV0 = srcYuv.getCrAddr(pu.puAbsPartIdx); |
553 | | |
554 | | // Chroma U |
555 | 0 | w0 = wp[1].w; |
556 | 0 | offset = wp[1].offset; |
557 | 0 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; |
558 | 0 | shift = wp[1].shift + shiftNum; |
559 | 0 | round = shift ? (1 << (shift - 1)) : 0; |
560 | |
|
561 | 0 | srcStride = srcYuv.m_csize; |
562 | 0 | dstStride = predYuv.m_csize; |
563 | |
|
564 | 0 | uint32_t cwidth = pu.width >> srcYuv.m_hChromaShift; |
565 | 0 | uint32_t cheight = pu.height >> srcYuv.m_vChromaShift; |
566 | |
|
567 | 0 | primitives.weight_sp(srcU0, dstU, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset); |
568 | | |
569 | | // Chroma V |
570 | 0 | w0 = wp[2].w; |
571 | 0 | offset = wp[2].offset; |
572 | 0 | shift = wp[2].shift + shiftNum; |
573 | 0 | round = shift ? (1 << (shift - 1)) : 0; |
574 | |
|
575 | 0 | primitives.weight_sp(srcV0, dstV, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset); |
576 | 0 | } |
577 | 0 | } |
578 | | |
579 | | void Predict::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSize) |
580 | 0 | { |
581 | 0 | int tuSize = 1 << log2TrSize; |
582 | 0 | int sizeIdx = log2TrSize - 2; |
583 | 0 | X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n"); |
584 | |
|
585 | 0 | int filter = !!(g_intraFilterFlags[dirMode] & tuSize); |
586 | 0 | bool bFilter = log2TrSize <= 4; |
587 | 0 | primitives.cu[sizeIdx].intra_pred[dirMode](dst, stride, intraNeighbourBuf[filter], dirMode, bFilter); |
588 | 0 | } |
589 | | |
590 | | void Predict::predIntraChromaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSizeC) |
591 | 0 | { |
592 | 0 | int tuSize = 1 << log2TrSizeC; |
593 | 0 | int sizeIdx = log2TrSizeC - 2; |
594 | 0 | X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n"); |
595 | |
|
596 | 0 | int filter = !!(m_csp == X265_CSP_I444 && (g_intraFilterFlags[dirMode] & tuSize)); |
597 | 0 | primitives.cu[sizeIdx].intra_pred[dirMode](dst, stride, intraNeighbourBuf[filter], dirMode, 0); |
598 | 0 | } |
599 | | |
600 | | void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t puAbsPartIdx, const IntraNeighbors& intraNeighbors, int dirMode) |
601 | 0 | { |
602 | 0 | int tuSize = 1 << intraNeighbors.log2TrSize; |
603 | 0 | int tuSize2 = tuSize << 1; |
604 | |
|
605 | 0 | PicYuv* reconPic = cu.m_encData->m_reconPic; |
606 | 0 | pixel* adiOrigin = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx); |
607 | 0 | intptr_t picStride = reconPic->m_stride; |
608 | |
|
609 | 0 | fillReferenceSamples(adiOrigin, picStride, intraNeighbors, intraNeighbourBuf[0]); |
610 | |
|
611 | 0 | pixel* refBuf = intraNeighbourBuf[0]; |
612 | 0 | pixel* fltBuf = intraNeighbourBuf[1]; |
613 | |
|
614 | 0 | pixel topLeft = refBuf[0], topLast = refBuf[tuSize2], leftLast = refBuf[tuSize2 + tuSize2]; |
615 | |
|
616 | 0 | if (dirMode == ALL_IDX ? (8 | 16 | 32) & tuSize : g_intraFilterFlags[dirMode] & tuSize) |
617 | 0 | { |
618 | | // generate filtered intra prediction samples |
619 | |
|
620 | 0 | if (cu.m_slice->m_sps->bUseStrongIntraSmoothing && tuSize == 32) |
621 | 0 | { |
622 | 0 | const int threshold = 1 << (X265_DEPTH - 5); |
623 | |
|
624 | 0 | pixel topMiddle = refBuf[32], leftMiddle = refBuf[tuSize2 + 32]; |
625 | |
|
626 | 0 | if (abs(topLeft + topLast - (topMiddle << 1)) < threshold && |
627 | 0 | abs(topLeft + leftLast - (leftMiddle << 1)) < threshold) |
628 | 0 | { |
629 | | // "strong" bilinear interpolation |
630 | 0 | const int shift = 5 + 1; |
631 | 0 | int init = (topLeft << shift) + tuSize; |
632 | 0 | int deltaL, deltaR; |
633 | |
|
634 | 0 | deltaL = leftLast - topLeft; deltaR = topLast - topLeft; |
635 | |
|
636 | 0 | fltBuf[0] = topLeft; |
637 | 0 | for (int i = 1; i < tuSize2; i++) |
638 | 0 | { |
639 | 0 | fltBuf[i + tuSize2] = (pixel)((init + deltaL * i) >> shift); // Left Filtering |
640 | 0 | fltBuf[i] = (pixel)((init + deltaR * i) >> shift); // Above Filtering |
641 | 0 | } |
642 | 0 | fltBuf[tuSize2] = topLast; |
643 | 0 | fltBuf[tuSize2 + tuSize2] = leftLast; |
644 | 0 | return; |
645 | 0 | } |
646 | 0 | } |
647 | | |
648 | 0 | primitives.cu[intraNeighbors.log2TrSize - 2].intra_filter(refBuf, fltBuf); |
649 | 0 | } |
650 | 0 | } |
651 | | |
652 | | void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t puAbsPartIdx, const IntraNeighbors& intraNeighbors, uint32_t chromaId) |
653 | 0 | { |
654 | 0 | PicYuv* reconPic = cu.m_encData->m_reconPic; |
655 | 0 | const pixel* adiOrigin = reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx); |
656 | 0 | intptr_t picStride = reconPic->m_strideC; |
657 | |
|
658 | 0 | fillReferenceSamples(adiOrigin, picStride, intraNeighbors, intraNeighbourBuf[0]); |
659 | |
|
660 | 0 | if (m_csp == X265_CSP_I444) |
661 | 0 | primitives.cu[intraNeighbors.log2TrSize - 2].intra_filter(intraNeighbourBuf[0], intraNeighbourBuf[1]); |
662 | 0 | } |
663 | | |
664 | | void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, bool isLuma, IntraNeighbors *intraNeighbors) |
665 | 0 | { |
666 | 0 | uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth; |
667 | 0 | int log2UnitWidth = LOG2_UNIT_SIZE; |
668 | 0 | int log2UnitHeight = LOG2_UNIT_SIZE; |
669 | |
|
670 | 0 | if (!isLuma) |
671 | 0 | { |
672 | 0 | log2TrSize -= cu.m_hChromaShift; |
673 | 0 | log2UnitWidth -= cu.m_hChromaShift; |
674 | 0 | log2UnitHeight -= cu.m_vChromaShift; |
675 | 0 | } |
676 | |
|
677 | 0 | int numIntraNeighbor; |
678 | 0 | bool* bNeighborFlags = intraNeighbors->bNeighborFlags; |
679 | |
|
680 | 0 | uint32_t tuSize = 1 << log2TrSize; |
681 | 0 | int tuWidthInUnits = tuSize >> log2UnitWidth; |
682 | 0 | int tuHeightInUnits = tuSize >> log2UnitHeight; |
683 | 0 | int aboveUnits = tuWidthInUnits << 1; |
684 | 0 | int leftUnits = tuHeightInUnits << 1; |
685 | 0 | uint32_t partIdxLT = cu.m_absIdxInCTU + absPartIdx; |
686 | 0 | uint32_t partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + tuWidthInUnits - 1]; |
687 | 0 | uint32_t partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) << LOG2_RASTER_SIZE)]; |
688 | |
|
689 | 0 | if (cu.m_slice->isIntra() || !cu.m_slice->m_pps->bConstrainedIntraPred) |
690 | 0 | { |
691 | 0 | bNeighborFlags[leftUnits] = isAboveLeftAvailable<false>(cu, partIdxLT); |
692 | 0 | numIntraNeighbor = (int)(bNeighborFlags[leftUnits]); |
693 | 0 | numIntraNeighbor += isAboveAvailable<false>(cu, partIdxLT, partIdxRT, bNeighborFlags + leftUnits + 1); |
694 | 0 | numIntraNeighbor += isAboveRightAvailable<false>(cu, partIdxRT, bNeighborFlags + leftUnits + 1 + tuWidthInUnits, tuWidthInUnits); |
695 | 0 | numIntraNeighbor += isLeftAvailable<false>(cu, partIdxLT, partIdxLB, bNeighborFlags + leftUnits - 1); |
696 | 0 | numIntraNeighbor += isBelowLeftAvailable<false>(cu, partIdxLB, bNeighborFlags + tuHeightInUnits - 1, tuHeightInUnits); |
697 | 0 | } |
698 | 0 | else |
699 | 0 | { |
700 | 0 | bNeighborFlags[leftUnits] = isAboveLeftAvailable<true>(cu, partIdxLT); |
701 | 0 | numIntraNeighbor = (int)(bNeighborFlags[leftUnits]); |
702 | 0 | numIntraNeighbor += isAboveAvailable<true>(cu, partIdxLT, partIdxRT, bNeighborFlags + leftUnits + 1); |
703 | 0 | numIntraNeighbor += isAboveRightAvailable<true>(cu, partIdxRT, bNeighborFlags + leftUnits + 1 + tuWidthInUnits, tuWidthInUnits); |
704 | 0 | numIntraNeighbor += isLeftAvailable<true>(cu, partIdxLT, partIdxLB, bNeighborFlags + leftUnits - 1); |
705 | 0 | numIntraNeighbor += isBelowLeftAvailable<true>(cu, partIdxLB, bNeighborFlags + tuHeightInUnits - 1, tuHeightInUnits); |
706 | 0 | } |
707 | |
|
708 | 0 | intraNeighbors->numIntraNeighbor = numIntraNeighbor; |
709 | 0 | intraNeighbors->totalUnits = aboveUnits + leftUnits + 1; |
710 | 0 | intraNeighbors->aboveUnits = aboveUnits; |
711 | 0 | intraNeighbors->leftUnits = leftUnits; |
712 | 0 | intraNeighbors->unitWidth = 1 << log2UnitWidth; |
713 | 0 | intraNeighbors->unitHeight = 1 << log2UnitHeight; |
714 | 0 | intraNeighbors->log2TrSize = log2TrSize; |
715 | 0 | } |
716 | | |
717 | | void Predict::fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, const IntraNeighbors& intraNeighbors, pixel dst[258]) |
718 | 0 | { |
719 | 0 | const pixel dcValue = (pixel)(1 << (X265_DEPTH - 1)); |
720 | 0 | int numIntraNeighbor = intraNeighbors.numIntraNeighbor; |
721 | 0 | int totalUnits = intraNeighbors.totalUnits; |
722 | 0 | uint32_t tuSize = 1 << intraNeighbors.log2TrSize; |
723 | 0 | uint32_t refSize = tuSize * 2 + 1; |
724 | | |
725 | | // Nothing is available, perform DC prediction. |
726 | 0 | if (numIntraNeighbor == 0) |
727 | 0 | { |
728 | | // Fill top border with DC value |
729 | 0 | for (uint32_t i = 0; i < refSize; i++) |
730 | 0 | dst[i] = dcValue; |
731 | | |
732 | | // Fill left border with DC value |
733 | 0 | for (uint32_t i = 0; i < refSize - 1; i++) |
734 | 0 | dst[i + refSize] = dcValue; |
735 | 0 | } |
736 | 0 | else if (numIntraNeighbor == totalUnits) |
737 | 0 | { |
738 | | // Fill top border with rec. samples |
739 | 0 | const pixel* adiTemp = adiOrigin - picStride - 1; |
740 | 0 | memcpy(dst, adiTemp, refSize * sizeof(pixel)); |
741 | | |
742 | | // Fill left border with rec. samples |
743 | 0 | adiTemp = adiOrigin - 1; |
744 | 0 | for (uint32_t i = 0; i < refSize - 1; i++) |
745 | 0 | { |
746 | 0 | dst[i + refSize] = adiTemp[0]; |
747 | 0 | adiTemp += picStride; |
748 | 0 | } |
749 | 0 | } |
750 | 0 | else // reference samples are partially available |
751 | 0 | { |
752 | 0 | const bool *bNeighborFlags = intraNeighbors.bNeighborFlags; |
753 | 0 | const bool *pNeighborFlags; |
754 | 0 | int aboveUnits = intraNeighbors.aboveUnits; |
755 | 0 | int leftUnits = intraNeighbors.leftUnits; |
756 | 0 | int unitWidth = intraNeighbors.unitWidth; |
757 | 0 | int unitHeight = intraNeighbors.unitHeight; |
758 | 0 | int totalSamples = (leftUnits * unitHeight) + ((aboveUnits + 1) * unitWidth); |
759 | 0 | pixel adiLineBuffer[5 * MAX_CU_SIZE]; |
760 | 0 | pixel *adi; |
761 | | |
762 | | // Initialize |
763 | 0 | for (int i = 0; i < totalSamples; i++) |
764 | 0 | adiLineBuffer[i] = dcValue; |
765 | | |
766 | | // Fill top-left sample |
767 | 0 | const pixel* adiTemp = adiOrigin - picStride - 1; |
768 | 0 | adi = adiLineBuffer + (leftUnits * unitHeight); |
769 | 0 | pNeighborFlags = bNeighborFlags + leftUnits; |
770 | 0 | if (*pNeighborFlags) |
771 | 0 | { |
772 | 0 | pixel topLeftVal = adiTemp[0]; |
773 | 0 | for (int i = 0; i < unitWidth; i++) |
774 | 0 | adi[i] = topLeftVal; |
775 | 0 | } |
776 | | |
777 | | // Fill left & below-left samples |
778 | 0 | adiTemp += picStride; |
779 | 0 | adi--; |
780 | | // NOTE: over copy here, but reduce condition operators |
781 | 0 | for (int j = 0; j < leftUnits * unitHeight; j++) |
782 | 0 | { |
783 | 0 | adi[-j] = adiTemp[j * picStride]; |
784 | 0 | } |
785 | | |
786 | | // Fill above & above-right samples |
787 | 0 | adiTemp = adiOrigin - picStride; |
788 | 0 | adi = adiLineBuffer + (leftUnits * unitHeight) + unitWidth; |
789 | | // NOTE: over copy here, but reduce condition operators |
790 | 0 | memcpy(adi, adiTemp, aboveUnits * unitWidth * sizeof(*adiTemp)); |
791 | | |
792 | | // Pad reference samples when necessary |
793 | 0 | int curr = 0; |
794 | 0 | int next = 1; |
795 | 0 | adi = adiLineBuffer; |
796 | 0 | int pAdiLineTopRowOffset = leftUnits * (unitHeight - unitWidth); |
797 | 0 | if (!bNeighborFlags[0]) |
798 | 0 | { |
799 | | // very bottom unit of bottom-left; at least one unit will be valid. |
800 | 0 | while (next < totalUnits && !bNeighborFlags[next]) |
801 | 0 | next++; |
802 | |
|
803 | 0 | pixel* pAdiLineNext = adiLineBuffer + ((next < leftUnits) ? (next * unitHeight) : (pAdiLineTopRowOffset + (next * unitWidth))); |
804 | 0 | const pixel refSample = *pAdiLineNext; |
805 | | // Pad unavailable samples with new value |
806 | 0 | int nextOrTop = X265_MIN(next, leftUnits); |
807 | | |
808 | | // fill left column |
809 | | #if HIGH_BIT_DEPTH |
810 | | while (curr < nextOrTop) |
811 | | { |
812 | | for (int i = 0; i < unitHeight; i++) |
813 | | adi[i] = refSample; |
814 | | |
815 | | adi += unitHeight; |
816 | | curr++; |
817 | | } |
818 | | |
819 | | // fill top row |
820 | | while (curr < next) |
821 | | { |
822 | | for (int i = 0; i < unitWidth; i++) |
823 | | adi[i] = refSample; |
824 | | |
825 | | adi += unitWidth; |
826 | | curr++; |
827 | | } |
828 | | #else |
829 | 0 | X265_CHECK(curr <= nextOrTop, "curr must be less than or equal to nextOrTop\n"); |
830 | 0 | if (curr < nextOrTop) |
831 | 0 | { |
832 | 0 | const int fillSize = unitHeight * (nextOrTop - curr); |
833 | 0 | memset(adi, refSample, fillSize * sizeof(pixel)); |
834 | 0 | curr = nextOrTop; |
835 | 0 | adi += fillSize; |
836 | 0 | } |
837 | |
|
838 | 0 | if (curr < next) |
839 | 0 | { |
840 | 0 | const int fillSize = unitWidth * (next - curr); |
841 | 0 | memset(adi, refSample, fillSize * sizeof(pixel)); |
842 | 0 | curr = next; |
843 | 0 | adi += fillSize; |
844 | 0 | } |
845 | 0 | #endif |
846 | 0 | } |
847 | | |
848 | | // pad all other reference samples. |
849 | 0 | while (curr < totalUnits) |
850 | 0 | { |
851 | 0 | if (!bNeighborFlags[curr]) // samples not available |
852 | 0 | { |
853 | 0 | int numSamplesInCurrUnit = (curr >= leftUnits) ? unitWidth : unitHeight; |
854 | 0 | const pixel refSample = *(adi - 1); |
855 | 0 | for (int i = 0; i < numSamplesInCurrUnit; i++) |
856 | 0 | adi[i] = refSample; |
857 | |
|
858 | 0 | adi += numSamplesInCurrUnit; |
859 | 0 | curr++; |
860 | 0 | } |
861 | 0 | else |
862 | 0 | { |
863 | 0 | adi += (curr >= leftUnits) ? unitWidth : unitHeight; |
864 | 0 | curr++; |
865 | 0 | } |
866 | 0 | } |
867 | | |
868 | | // Copy processed samples |
869 | 0 | adi = adiLineBuffer + refSize + unitWidth - 2; |
870 | 0 | memcpy(dst, adi, refSize * sizeof(pixel)); |
871 | |
|
872 | 0 | adi = adiLineBuffer + refSize - 1; |
873 | 0 | for (int i = 0; i < (int)refSize - 1; i++) |
874 | 0 | dst[i + refSize] = adi[-(i + 1)]; |
875 | 0 | } |
876 | 0 | } |
877 | | |
878 | | template<bool cip> |
879 | | bool Predict::isAboveLeftAvailable(const CUData& cu, uint32_t partIdxLT) |
880 | 0 | { |
881 | 0 | uint32_t partAboveLeft; |
882 | 0 | const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, partIdxLT); |
883 | |
|
884 | 0 | return cuAboveLeft && (!cip || cuAboveLeft->isIntra(partAboveLeft)); |
885 | 0 | } Unexecuted instantiation: bool x265::Predict::isAboveLeftAvailable<false>(x265::CUData const&, unsigned int) Unexecuted instantiation: bool x265::Predict::isAboveLeftAvailable<true>(x265::CUData const&, unsigned int) |
886 | | |
887 | | template<bool cip> |
888 | | int Predict::isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags) |
889 | 0 | { |
890 | 0 | const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT]; |
891 | 0 | const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT]; |
892 | 0 | const uint32_t idxStep = 1; |
893 | 0 | int numIntra = 0; |
894 | |
|
895 | 0 | for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags++) |
896 | 0 | { |
897 | 0 | uint32_t partAbove; |
898 | 0 | const CUData* cuAbove = cu.getPUAbove(partAbove, g_rasterToZscan[rasterPart]); |
899 | 0 | if (cuAbove && (!cip || cuAbove->isIntra(partAbove))) |
900 | 0 | { |
901 | 0 | numIntra++; |
902 | 0 | *bValidFlags = true; |
903 | 0 | } |
904 | 0 | else |
905 | 0 | *bValidFlags = false; |
906 | 0 | } |
907 | |
|
908 | 0 | return numIntra; |
909 | 0 | } Unexecuted instantiation: int x265::Predict::isAboveAvailable<false>(x265::CUData const&, unsigned int, unsigned int, bool*) Unexecuted instantiation: int x265::Predict::isAboveAvailable<true>(x265::CUData const&, unsigned int, unsigned int, bool*) |
910 | | |
911 | | template<bool cip> |
912 | | int Predict::isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags) |
913 | 0 | { |
914 | 0 | const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT]; |
915 | 0 | const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB]; |
916 | 0 | const uint32_t idxStep = RASTER_SIZE; |
917 | 0 | int numIntra = 0; |
918 | |
|
919 | 0 | for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags--) // opposite direction |
920 | 0 | { |
921 | 0 | uint32_t partLeft; |
922 | 0 | const CUData* cuLeft = cu.getPULeft(partLeft, g_rasterToZscan[rasterPart]); |
923 | 0 | if (cuLeft && (!cip || cuLeft->isIntra(partLeft))) |
924 | 0 | { |
925 | 0 | numIntra++; |
926 | 0 | *bValidFlags = true; |
927 | 0 | } |
928 | 0 | else |
929 | 0 | *bValidFlags = false; |
930 | 0 | } |
931 | |
|
932 | 0 | return numIntra; |
933 | 0 | } Unexecuted instantiation: int x265::Predict::isLeftAvailable<false>(x265::CUData const&, unsigned int, unsigned int, bool*) Unexecuted instantiation: int x265::Predict::isLeftAvailable<true>(x265::CUData const&, unsigned int, unsigned int, bool*) |
934 | | |
935 | | template<bool cip> |
936 | | int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxRT, bool* bValidFlags, uint32_t numUnits) |
937 | 0 | { |
938 | 0 | int numIntra = 0; |
939 | |
|
940 | 0 | for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags++) |
941 | 0 | { |
942 | 0 | uint32_t partAboveRight; |
943 | 0 | const CUData* cuAboveRight = cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset); |
944 | 0 | if (cuAboveRight && (!cip || cuAboveRight->isIntra(partAboveRight))) |
945 | 0 | { |
946 | 0 | numIntra++; |
947 | 0 | *bValidFlags = true; |
948 | 0 | } |
949 | 0 | else |
950 | 0 | *bValidFlags = false; |
951 | 0 | } |
952 | |
|
953 | 0 | return numIntra; |
954 | 0 | } Unexecuted instantiation: int x265::Predict::isAboveRightAvailable<false>(x265::CUData const&, unsigned int, bool*, unsigned int) Unexecuted instantiation: int x265::Predict::isAboveRightAvailable<true>(x265::CUData const&, unsigned int, bool*, unsigned int) |
955 | | |
956 | | template<bool cip> |
957 | | int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLB, bool* bValidFlags, uint32_t numUnits) |
958 | 0 | { |
959 | 0 | int numIntra = 0; |
960 | |
|
961 | 0 | for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags--) // opposite direction |
962 | 0 | { |
963 | 0 | uint32_t partBelowLeft; |
964 | 0 | const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, partIdxLB, offset); |
965 | 0 | if (cuBelowLeft && (!cip || cuBelowLeft->isIntra(partBelowLeft))) |
966 | 0 | { |
967 | 0 | numIntra++; |
968 | 0 | *bValidFlags = true; |
969 | 0 | } |
970 | 0 | else |
971 | 0 | *bValidFlags = false; |
972 | 0 | } |
973 | |
|
974 | 0 | return numIntra; |
975 | 0 | } Unexecuted instantiation: int x265::Predict::isBelowLeftAvailable<false>(x265::CUData const&, unsigned int, bool*, unsigned int) Unexecuted instantiation: int x265::Predict::isBelowLeftAvailable<true>(x265::CUData const&, unsigned int, bool*, unsigned int) |