Coverage Report

Created: 2025-07-23 08:18

/src/x265/source/common/predict.cpp
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
5
*          Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "slice.h"
27
#include "framedata.h"
28
#include "picyuv.h"
29
#include "predict.h"
30
#include "primitives.h"
31
32
using namespace X265_NS;
33
34
#if _MSC_VER
35
#pragma warning(disable: 4127) // conditional expression is constant
36
#endif
37
38
PredictionUnit::PredictionUnit(const CUData& cu, const CUGeom& cuGeom, int puIdx)
39
0
{
40
    /* address of CTU */
41
0
    ctuAddr = cu.m_cuAddr;
42
43
    /* offset of CU */
44
0
    cuAbsPartIdx = cuGeom.absPartIdx;
45
46
    /* offset and dimensions of PU */
47
0
    cu.getPartIndexAndSize(puIdx, puAbsPartIdx, width, height);
48
0
}
49
50
namespace
51
{
52
inline pixel weightBidir(int w0, int16_t P0, int w1, int16_t P1, int round, int shift, int offset)
53
0
{
54
0
    return x265_clip((w0 * (P0 + IF_INTERNAL_OFFS) + w1 * (P1 + IF_INTERNAL_OFFS) + round + (offset * (1 << (shift - 1)))) >> shift);
55
0
}
56
}
57
58
Predict::Predict()
59
0
{
60
0
}
61
62
Predict::~Predict()
63
0
{
64
0
    m_predShortYuv[0].destroy();
65
0
    m_predShortYuv[1].destroy();
66
0
}
67
68
bool Predict::allocBuffers(int csp)
69
0
{
70
0
    m_csp = csp;
71
0
    m_hChromaShift = CHROMA_H_SHIFT(csp);
72
0
    m_vChromaShift = CHROMA_V_SHIFT(csp);
73
74
0
    return m_predShortYuv[0].create(MAX_CU_SIZE, csp) && m_predShortYuv[1].create(MAX_CU_SIZE, csp);
75
0
}
76
77
void Predict::motionCompensation(const CUData& cu, const PredictionUnit& pu, Yuv& predYuv, bool bLuma, bool bChroma)
78
0
{
79
0
    int refIdx0 = cu.m_refIdx[0][pu.puAbsPartIdx];
80
0
    int refIdx1 = cu.m_refIdx[1][pu.puAbsPartIdx];
81
82
0
    if (cu.m_slice->isInterP())
83
0
    {
84
        /* P Slice */
85
0
        WeightValues wv0[3];
86
87
0
        X265_CHECK(refIdx0 >= 0, "invalid P refidx\n");
88
0
        X265_CHECK(refIdx0 < cu.m_slice->m_numRefIdx[0], "P refidx out of range\n");
89
0
        const WeightParam *wp0 = cu.m_slice->m_weightPredTable[0][refIdx0];
90
91
0
        MV mv0 = cu.m_mv[0][pu.puAbsPartIdx];
92
0
        cu.clipMv(mv0);
93
94
0
        if (cu.m_slice->m_pps->bUseWeightPred && wp0->wtPresent)
95
0
        {
96
0
            for (int plane = 0; plane < (bChroma ? 3 : 1); plane++)
97
0
            {
98
0
                wv0[plane].w      = wp0[plane].inputWeight;
99
0
                wv0[plane].offset = wp0[plane].inputOffset * (1 << (X265_DEPTH - 8));
100
0
                wv0[plane].shift  = wp0[plane].log2WeightDenom;
101
0
                wv0[plane].round  = wp0[plane].log2WeightDenom >= 1 ? 1 << (wp0[plane].log2WeightDenom - 1) : 0;
102
0
            }
103
104
0
            ShortYuv& shortYuv = m_predShortYuv[0];
105
106
0
            if (bLuma)
107
0
                predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
108
0
            if (bChroma)
109
0
                predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
110
111
0
            addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
112
0
        }
113
0
        else
114
0
        {
115
#if ENABLE_SCC_EXT
116
            if (cu.m_slice->m_param->bEnableSCC && refIdx0 == (cu.m_slice->m_numRefIdx[0] - 1))
117
            {
118
                if (bLuma)
119
                    predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refFrameList[0][refIdx0]->m_reconPic[1], mv0);
120
                if (bChroma)
121
                    predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refFrameList[0][refIdx0]->m_reconPic[1], mv0);
122
            }
123
            else
124
#endif
125
0
            {
126
0
                if (bLuma)
127
0
                    predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
128
0
                if (bChroma)
129
0
                    predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
130
0
            }
131
0
        }
132
0
    }
133
0
    else
134
0
    {
135
        /* B Slice */
136
137
0
        WeightValues wv0[3], wv1[3];
138
0
        const WeightParam *pwp0, *pwp1;
139
140
0
        X265_CHECK(refIdx0 < cu.m_slice->m_numRefIdx[0], "bidir refidx0 out of range\n");
141
0
        X265_CHECK(refIdx1 < cu.m_slice->m_numRefIdx[1], "bidir refidx1 out of range\n");
142
143
0
        if (cu.m_slice->m_pps->bUseWeightedBiPred)
144
0
        {
145
0
            pwp0 = refIdx0 >= 0 ? cu.m_slice->m_weightPredTable[0][refIdx0] : NULL;
146
0
            pwp1 = refIdx1 >= 0 ? cu.m_slice->m_weightPredTable[1][refIdx1] : NULL;
147
148
0
            if (pwp0 && pwp1 && (pwp0->wtPresent || pwp1->wtPresent))
149
0
            {
150
                /* biprediction weighting */
151
0
                for (int plane = 0; plane < (bChroma ? 3 : 1); plane++)
152
0
                {
153
0
                    wv0[plane].w = pwp0[plane].inputWeight;
154
0
                    wv0[plane].o = pwp0[plane].inputOffset * (1 << (X265_DEPTH - 8));
155
0
                    wv0[plane].shift = pwp0[plane].log2WeightDenom;
156
0
                    wv0[plane].round = 1 << pwp0[plane].log2WeightDenom;
157
158
0
                    wv1[plane].w = pwp1[plane].inputWeight;
159
0
                    wv1[plane].o = pwp1[plane].inputOffset * (1 << (X265_DEPTH - 8));
160
0
                    wv1[plane].shift = wv0[plane].shift;
161
0
                    wv1[plane].round = wv0[plane].round;
162
0
                }
163
0
            }
164
0
            else
165
0
            {
166
                /* uniprediction weighting, always outputs to wv0 */
167
0
                const WeightParam* pwp = (refIdx0 >= 0) ? pwp0 : pwp1;
168
0
                for (int plane = 0; plane < (bChroma ? 3 : 1); plane++)
169
0
                {
170
0
                    wv0[plane].w = pwp[plane].inputWeight;
171
0
                    wv0[plane].offset = pwp[plane].inputOffset * (1 << (X265_DEPTH - 8));
172
0
                    wv0[plane].shift = pwp[plane].log2WeightDenom;
173
0
                    wv0[plane].round = pwp[plane].log2WeightDenom >= 1 ? 1 << (pwp[plane].log2WeightDenom - 1) : 0;
174
0
                }
175
0
            }
176
0
        }
177
0
        else
178
0
            pwp0 = pwp1 = NULL;
179
180
0
        if (refIdx0 >= 0 && refIdx1 >= 0)
181
0
        {
182
0
            MV mv0 = cu.m_mv[0][pu.puAbsPartIdx];
183
0
            MV mv1 = cu.m_mv[1][pu.puAbsPartIdx];
184
0
            cu.clipMv(mv0);
185
0
            cu.clipMv(mv1);
186
187
0
            if (bLuma)
188
0
            {
189
#if ENABLE_SCC_EXT
190
                if (cu.m_slice->m_param->bEnableSCC && refIdx0 == (cu.m_slice->m_numRefIdx[0] - 1))
191
                    predInterLumaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refFrameList[0][refIdx0]->m_reconPic[1], mv0);
192
                else
193
#endif
194
0
                    predInterLumaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
195
0
                predInterLumaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
196
0
            }
197
0
            if (bChroma)
198
0
            {
199
#if ENABLE_SCC_EXT
200
                if (cu.m_slice->m_param->bEnableSCC && refIdx0 == (cu.m_slice->m_numRefIdx[0] - 1))
201
                    predInterChromaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refFrameList[0][refIdx0]->m_reconPic[1], mv0);
202
                else
203
#endif
204
0
                    predInterChromaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
205
0
                predInterChromaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
206
0
            }
207
208
0
            if (pwp0 && pwp1 && (pwp0->wtPresent || pwp1->wtPresent))
209
0
                addWeightBi(pu, predYuv, m_predShortYuv[0], m_predShortYuv[1], wv0, wv1, bLuma, bChroma);
210
0
            else
211
0
                predYuv.addAvg(m_predShortYuv[0], m_predShortYuv[1], pu.puAbsPartIdx, pu.width, pu.height, bLuma, bChroma);
212
0
        }
213
0
        else if (refIdx0 >= 0)
214
0
        {
215
0
            MV mv0 = cu.m_mv[0][pu.puAbsPartIdx];
216
0
            cu.clipMv(mv0);
217
218
0
            if (pwp0 && pwp0->wtPresent)
219
0
            {
220
0
                ShortYuv& shortYuv = m_predShortYuv[0];
221
222
0
                if (bLuma)
223
0
                    predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
224
0
                if (bChroma)
225
0
                    predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
226
227
0
                addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
228
0
            }
229
0
            else
230
0
            {
231
#if ENABLE_SCC_EXT
232
                if (cu.m_slice->m_param->bEnableSCC && refIdx0 == (cu.m_slice->m_numRefIdx[0] - 1))
233
                {
234
                    if (bLuma)
235
                        predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refFrameList[0][refIdx0]->m_reconPic[1], mv0);
236
                    if (bChroma)
237
                        predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refFrameList[0][refIdx0]->m_reconPic[1], mv0);
238
                }
239
                else
240
#endif
241
0
                {
242
0
                    if (bLuma)
243
0
                        predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
244
0
                    if (bChroma)
245
0
                        predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
246
0
                }
247
0
            }
248
0
        }
249
0
        else
250
0
        {
251
0
            MV mv1 = cu.m_mv[1][pu.puAbsPartIdx];
252
0
            cu.clipMv(mv1);
253
254
            /* uniprediction to L1 */
255
0
            X265_CHECK(refIdx1 >= 0, "refidx1 was not positive\n");
256
257
0
            if (pwp1 && pwp1->wtPresent)
258
0
            {
259
0
                ShortYuv& shortYuv = m_predShortYuv[0];
260
261
0
                if (bLuma)
262
0
                    predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
263
0
                if (bChroma)
264
0
                    predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
265
266
0
                addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
267
0
            }
268
0
            else
269
0
            {
270
0
                if (bLuma)
271
0
                    predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
272
0
                if (bChroma)
273
0
                    predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
274
0
            }
275
0
        }
276
0
    }
277
0
}
278
279
void Predict::predInterLumaPixel(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const
280
0
{
281
0
    pixel* dst = dstYuv.getLumaAddr(pu.puAbsPartIdx);
282
0
    intptr_t dstStride = dstYuv.m_size;
283
284
0
    intptr_t srcStride = refPic.m_stride;
285
0
    intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
286
0
    int partEnum = partitionFromSizes(pu.width, pu.height);
287
0
    const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset;
288
289
0
    int xFrac = mv.x & 3;
290
0
    int yFrac = mv.y & 3;
291
292
0
    if (!(yFrac | xFrac))
293
0
        primitives.pu[partEnum].copy_pp(dst, dstStride, src, srcStride);
294
0
    else if (!yFrac)
295
0
        primitives.pu[partEnum].luma_hpp(src, srcStride, dst, dstStride, xFrac);
296
0
    else if (!xFrac)
297
0
        primitives.pu[partEnum].luma_vpp(src, srcStride, dst, dstStride, yFrac);
298
0
    else
299
0
        primitives.pu[partEnum].luma_hvpp(src, srcStride, dst, dstStride, xFrac, yFrac);
300
0
}
301
302
void Predict::predInterLumaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
303
0
{
304
0
    int16_t* dst = dstSYuv.getLumaAddr(pu.puAbsPartIdx);
305
0
    intptr_t dstStride = dstSYuv.m_size;
306
307
0
    intptr_t srcStride = refPic.m_stride;
308
0
    intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
309
0
    const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset;
310
311
0
    int partEnum = partitionFromSizes(pu.width, pu.height);
312
313
0
    X265_CHECK((pu.width % 4) + (pu.height % 4) == 0, "width or height not divisible by 4\n");
314
0
    X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n");
315
316
0
    int xFrac = mv.x & 3;
317
0
    int yFrac = mv.y & 3;
318
319
0
    if (!(yFrac | xFrac))
320
0
    {
321
0
        bool srcbufferAlignCheck = (refPic.m_cuOffsetY[pu.ctuAddr] + refPic.m_buOffsetY[pu.cuAbsPartIdx + pu.puAbsPartIdx] + srcOffset) % 64 == 0;
322
0
        bool dstbufferAlignCheck = (dstSYuv.getAddrOffset(pu.puAbsPartIdx, dstSYuv.m_size) % 64) == 0;
323
0
        primitives.pu[partEnum].convert_p2s[srcStride % 64 == 0 && dstStride % 64 == 0 && srcbufferAlignCheck && dstbufferAlignCheck](src, srcStride, dst, dstStride);
324
0
    }
325
0
    else if (!yFrac)
326
0
        primitives.pu[partEnum].luma_hps(src, srcStride, dst, dstStride, xFrac, 0);
327
0
    else if (!xFrac)
328
0
        primitives.pu[partEnum].luma_vps(src, srcStride, dst, dstStride, yFrac);
329
0
    else
330
0
    {
331
0
        ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]);
332
0
        int immedStride = pu.width;
333
0
        int halfFilterSize = NTAPS_LUMA >> 1;
334
335
0
        primitives.pu[partEnum].luma_hps(src, srcStride, immed, immedStride, xFrac, 1);
336
0
        primitives.pu[partEnum].luma_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, yFrac);
337
0
    }
338
0
}
339
340
void Predict::predInterChromaPixel(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const
341
0
{
342
0
    intptr_t dstStride = dstYuv.m_csize;
343
0
    intptr_t refStride = refPic.m_strideC;
344
345
0
    int mvx = mv.x << (1 - m_hChromaShift);
346
0
    int mvy = mv.y << (1 - m_vChromaShift);
347
348
0
    intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride;
349
350
0
    const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
351
0
    const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
352
353
0
    pixel* dstCb = dstYuv.getCbAddr(pu.puAbsPartIdx);
354
0
    pixel* dstCr = dstYuv.getCrAddr(pu.puAbsPartIdx);
355
356
0
    int partEnum = partitionFromSizes(pu.width, pu.height);
357
358
0
    int xFrac = mvx & 7;
359
0
    int yFrac = mvy & 7;
360
361
0
    if (!(yFrac | xFrac))
362
0
    {
363
0
        primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCb, dstStride, refCb, refStride);
364
0
        primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCr, dstStride, refCr, refStride);
365
0
    }
366
0
    else if (!yFrac)
367
0
    {
368
0
        primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCb, refStride, dstCb, dstStride, xFrac);
369
0
        primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCr, refStride, dstCr, dstStride, xFrac);
370
0
    }
371
0
    else if (!xFrac)
372
0
    {
373
0
        primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCb, refStride, dstCb, dstStride, yFrac);
374
0
        primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCr, refStride, dstCr, dstStride, yFrac);
375
0
    }
376
0
    else
377
0
    {
378
0
        ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_CHROMA - 1)]);
379
0
        int immedStride = pu.width >> m_hChromaShift;
380
0
        int halfFilterSize = NTAPS_CHROMA >> 1;
381
382
0
        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, immed, immedStride, xFrac, 1);
383
0
        primitives.chroma[m_csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCb, dstStride, yFrac);
384
0
        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, immed, immedStride, xFrac, 1);
385
0
        primitives.chroma[m_csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCr, dstStride, yFrac);
386
0
    }
387
0
}
388
389
void Predict::predInterChromaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
390
0
{
391
0
    intptr_t dstStride = dstSYuv.m_csize;
392
0
    intptr_t refStride = refPic.m_strideC;
393
394
0
    int mvx = mv.x << (1 - m_hChromaShift);
395
0
    int mvy = mv.y << (1 - m_vChromaShift);
396
397
0
    intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride;
398
399
0
    const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
400
0
    const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
401
402
0
    int16_t* dstCb = dstSYuv.getCbAddr(pu.puAbsPartIdx);
403
0
    int16_t* dstCr = dstSYuv.getCrAddr(pu.puAbsPartIdx);
404
405
0
    int partEnum = partitionFromSizes(pu.width, pu.height);
406
    
407
0
    uint32_t cxWidth  = pu.width >> m_hChromaShift;
408
409
0
    X265_CHECK(((cxWidth | (pu.height >> m_vChromaShift)) % 2) == 0, "chroma block size expected to be multiple of 2\n");
410
411
0
    int xFrac = mvx & 7;
412
0
    int yFrac = mvy & 7;
413
414
0
    if (!(yFrac | xFrac))
415
0
    {
416
0
        bool srcbufferAlignCheckC = (refPic.m_cuOffsetC[pu.ctuAddr] + refPic.m_buOffsetC[pu.cuAbsPartIdx + pu.puAbsPartIdx] + refOffset) % 64 == 0;
417
0
        bool dstbufferAlignCheckC = dstSYuv.getChromaAddrOffset(pu.puAbsPartIdx) % 64 == 0;
418
0
        primitives.chroma[m_csp].pu[partEnum].p2s[refStride % 64 == 0 && dstStride % 64 == 0 && srcbufferAlignCheckC && dstbufferAlignCheckC](refCb, refStride, dstCb, dstStride);
419
0
        primitives.chroma[m_csp].pu[partEnum].p2s[refStride % 64 == 0 && dstStride % 64 == 0 && srcbufferAlignCheckC && dstbufferAlignCheckC](refCr, refStride, dstCr, dstStride);
420
0
    }
421
0
    else if (!yFrac)
422
0
    {
423
0
        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, dstCb, dstStride, xFrac, 0);
424
0
        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, dstCr, dstStride, xFrac, 0);
425
0
    }
426
0
    else if (!xFrac)
427
0
    {
428
0
        primitives.chroma[m_csp].pu[partEnum].filter_vps(refCb, refStride, dstCb, dstStride, yFrac);
429
0
        primitives.chroma[m_csp].pu[partEnum].filter_vps(refCr, refStride, dstCr, dstStride, yFrac);
430
0
    }
431
0
    else
432
0
    {
433
0
        ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_CHROMA - 1)]);
434
0
        int immedStride = cxWidth;
435
0
        int halfFilterSize = NTAPS_CHROMA >> 1;
436
437
0
        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, immed, immedStride, xFrac, 1);
438
0
        primitives.chroma[m_csp].pu[partEnum].filter_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCb, dstStride, yFrac);
439
0
        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, immed, immedStride, xFrac, 1);
440
0
        primitives.chroma[m_csp].pu[partEnum].filter_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCr, dstStride, yFrac);
441
0
    }
442
0
}
443
444
/* weighted averaging for bi-pred */
445
void Predict::addWeightBi(const PredictionUnit& pu, Yuv& predYuv, const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, const WeightValues wp0[3], const WeightValues wp1[3], bool bLuma, bool bChroma) const
446
0
{
447
0
    int x, y;
448
449
0
    int w0, w1, offset, shiftNum, shift, round;
450
0
    uint32_t src0Stride, src1Stride, dststride;
451
452
0
    if (bLuma)
453
0
    {
454
0
        pixel* dstY = predYuv.getLumaAddr(pu.puAbsPartIdx);
455
0
        const int16_t* srcY0 = srcYuv0.getLumaAddr(pu.puAbsPartIdx);
456
0
        const int16_t* srcY1 = srcYuv1.getLumaAddr(pu.puAbsPartIdx);
457
458
        // Luma
459
0
        w0      = wp0[0].w;
460
0
        offset  = wp0[0].o + wp1[0].o;
461
0
        shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
462
0
        shift   = wp0[0].shift + shiftNum + 1;
463
0
        round   = shift ? (1 << (shift - 1)) : 0;
464
0
        w1      = wp1[0].w;
465
466
0
        src0Stride = srcYuv0.m_size;
467
0
        src1Stride = srcYuv1.m_size;
468
0
        dststride = predYuv.m_size;
469
470
        // TODO: can we use weight_sp here?
471
0
        for (y = pu.height - 1; y >= 0; y--)
472
0
        {
473
0
            for (x = pu.width - 1; x >= 0; )
474
0
            {
475
                // note: luma min width is 4
476
0
                dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
477
0
                x--;
478
0
                dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
479
0
                x--;
480
0
                dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
481
0
                x--;
482
0
                dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
483
0
                x--;
484
0
            }
485
486
0
            srcY0 += src0Stride;
487
0
            srcY1 += src1Stride;
488
0
            dstY  += dststride;
489
0
        }
490
0
    }
491
492
0
    if (bChroma)
493
0
    {
494
0
        pixel* dstU = predYuv.getCbAddr(pu.puAbsPartIdx);
495
0
        pixel* dstV = predYuv.getCrAddr(pu.puAbsPartIdx);
496
0
        const int16_t* srcU0 = srcYuv0.getCbAddr(pu.puAbsPartIdx);
497
0
        const int16_t* srcV0 = srcYuv0.getCrAddr(pu.puAbsPartIdx);
498
0
        const int16_t* srcU1 = srcYuv1.getCbAddr(pu.puAbsPartIdx);
499
0
        const int16_t* srcV1 = srcYuv1.getCrAddr(pu.puAbsPartIdx);
500
501
        // Chroma U
502
0
        w0      = wp0[1].w;
503
0
        offset  = wp0[1].o + wp1[1].o;
504
0
        shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
505
0
        shift   = wp0[1].shift + shiftNum + 1;
506
0
        round   = shift ? (1 << (shift - 1)) : 0;
507
0
        w1      = wp1[1].w;
508
509
0
        src0Stride = srcYuv0.m_csize;
510
0
        src1Stride = srcYuv1.m_csize;
511
0
        dststride  = predYuv.m_csize;
512
513
0
        uint32_t cwidth = pu.width >> srcYuv0.m_hChromaShift;
514
0
        uint32_t cheight = pu.height >> srcYuv0.m_vChromaShift;
515
516
        // TODO: can we use weight_sp here?
517
0
        for (y = cheight - 1; y >= 0; y--)
518
0
        {
519
0
            for (x = cwidth - 1; x >= 0;)
520
0
            {
521
                // note: chroma min width is 2
522
0
                dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset);
523
0
                x--;
524
0
                dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset);
525
0
                x--;
526
0
            }
527
528
0
            srcU0 += src0Stride;
529
0
            srcU1 += src1Stride;
530
0
            dstU  += dststride;
531
0
        }
532
533
        // Chroma V
534
0
        w0     = wp0[2].w;
535
0
        offset = wp0[2].o + wp1[2].o;
536
0
        shift  = wp0[2].shift + shiftNum + 1;
537
0
        round  = shift ? (1 << (shift - 1)) : 0;
538
0
        w1     = wp1[2].w;
539
540
0
        for (y = cheight - 1; y >= 0; y--)
541
0
        {
542
0
            for (x = cwidth - 1; x >= 0;)
543
0
            {
544
                // note: chroma min width is 2
545
0
                dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset);
546
0
                x--;
547
0
                dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset);
548
0
                x--;
549
0
            }
550
551
0
            srcV0 += src0Stride;
552
0
            srcV1 += src1Stride;
553
0
            dstV  += dststride;
554
0
        }
555
0
    }
556
0
}
557
558
/* weighted averaging for uni-pred */
559
void Predict::addWeightUni(const PredictionUnit& pu, Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const
560
0
{
561
0
    int w0, offset, shiftNum, shift, round;
562
0
    uint32_t srcStride, dstStride;
563
564
0
    if (bLuma)
565
0
    {
566
0
        pixel* dstY = predYuv.getLumaAddr(pu.puAbsPartIdx);
567
0
        const int16_t* srcY0 = srcYuv.getLumaAddr(pu.puAbsPartIdx);
568
569
        // Luma
570
0
        w0      = wp[0].w;
571
0
        offset  = wp[0].offset;
572
0
        shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
573
0
        shift   = wp[0].shift + shiftNum;
574
0
        round   = shift ? (1 << (shift - 1)) : 0;
575
0
        srcStride = srcYuv.m_size;
576
0
        dstStride = predYuv.m_size;
577
578
0
        primitives.weight_sp(srcY0, dstY, srcStride, dstStride, pu.width, pu.height, w0, round, shift, offset);
579
0
    }
580
581
0
    if (bChroma)
582
0
    {
583
0
        pixel* dstU = predYuv.getCbAddr(pu.puAbsPartIdx);
584
0
        pixel* dstV = predYuv.getCrAddr(pu.puAbsPartIdx);
585
0
        const int16_t* srcU0 = srcYuv.getCbAddr(pu.puAbsPartIdx);
586
0
        const int16_t* srcV0 = srcYuv.getCrAddr(pu.puAbsPartIdx);
587
588
        // Chroma U
589
0
        w0      = wp[1].w;
590
0
        offset  = wp[1].offset;
591
0
        shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
592
0
        shift   = wp[1].shift + shiftNum;
593
0
        round   = shift ? (1 << (shift - 1)) : 0;
594
595
0
        srcStride = srcYuv.m_csize;
596
0
        dstStride = predYuv.m_csize;
597
598
0
        uint32_t cwidth = pu.width >> srcYuv.m_hChromaShift;
599
0
        uint32_t cheight = pu.height >> srcYuv.m_vChromaShift;
600
601
0
        primitives.weight_sp(srcU0, dstU, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
602
603
        // Chroma V
604
0
        w0     = wp[2].w;
605
0
        offset = wp[2].offset;
606
0
        shift  = wp[2].shift + shiftNum;
607
0
        round  = shift ? (1 << (shift - 1)) : 0;
608
609
0
        primitives.weight_sp(srcV0, dstV, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
610
0
    }
611
0
}
612
613
void Predict::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSize)
614
0
{
615
0
    int tuSize = 1 << log2TrSize;
616
0
    int sizeIdx = log2TrSize - 2;
617
0
    X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
618
619
0
    int filter = !!(g_intraFilterFlags[dirMode] & tuSize);
620
0
    bool bFilter = log2TrSize <= 4;
621
0
    primitives.cu[sizeIdx].intra_pred[dirMode](dst, stride, intraNeighbourBuf[filter], dirMode, bFilter);
622
0
}
623
624
void Predict::predIntraChromaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSizeC)
625
0
{
626
0
    int tuSize = 1 << log2TrSizeC;
627
0
    int sizeIdx = log2TrSizeC - 2;
628
0
    X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
629
630
0
    int filter = !!(m_csp == X265_CSP_I444 && (g_intraFilterFlags[dirMode] & tuSize));
631
0
    primitives.cu[sizeIdx].intra_pred[dirMode](dst, stride, intraNeighbourBuf[filter], dirMode, 0);
632
0
}
633
634
void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t puAbsPartIdx, const IntraNeighbors& intraNeighbors, int dirMode)
635
0
{
636
0
    int tuSize = 1 << intraNeighbors.log2TrSize;
637
0
    int tuSize2 = tuSize << 1;
638
639
0
    PicYuv* reconPic = cu.m_encData->m_reconPic[0];
640
0
    pixel* adiOrigin = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx);
641
0
    intptr_t picStride = reconPic->m_stride;
642
643
0
    fillReferenceSamples(adiOrigin, picStride, intraNeighbors, intraNeighbourBuf[0]);
644
645
0
    pixel* refBuf = intraNeighbourBuf[0];
646
0
    pixel* fltBuf = intraNeighbourBuf[1];
647
648
0
    pixel topLeft = refBuf[0], topLast = refBuf[tuSize2], leftLast = refBuf[tuSize2 + tuSize2];
649
650
0
    if (dirMode == ALL_IDX ? (8 | 16 | 32) & tuSize : g_intraFilterFlags[dirMode] & tuSize)
651
0
    {
652
        // generate filtered intra prediction samples
653
654
0
        if (cu.m_slice->m_sps->bUseStrongIntraSmoothing && tuSize == 32)
655
0
        {
656
0
            const int threshold = 1 << (X265_DEPTH - 5);
657
658
0
            pixel topMiddle = refBuf[32], leftMiddle = refBuf[tuSize2 + 32];
659
660
0
            if (abs(topLeft + topLast  - (topMiddle  << 1)) < threshold &&
661
0
                abs(topLeft + leftLast - (leftMiddle << 1)) < threshold)
662
0
            {
663
                // "strong" bilinear interpolation
664
0
                const int shift = 5 + 1;
665
0
                int init = (topLeft << shift) + tuSize;
666
0
                int deltaL, deltaR;
667
668
0
                deltaL = leftLast - topLeft; deltaR = topLast - topLeft;
669
670
0
                fltBuf[0] = topLeft;
671
0
                for (int i = 1; i < tuSize2; i++)
672
0
                {
673
0
                    fltBuf[i + tuSize2] = (pixel)((init + deltaL * i) >> shift); // Left Filtering
674
0
                    fltBuf[i] = (pixel)((init + deltaR * i) >> shift);           // Above Filtering
675
0
                }
676
0
                fltBuf[tuSize2] = topLast;
677
0
                fltBuf[tuSize2 + tuSize2] = leftLast;
678
0
                return;
679
0
            }
680
0
        }
681
682
0
        primitives.cu[intraNeighbors.log2TrSize - 2].intra_filter(refBuf, fltBuf);
683
0
    }
684
0
}
685
686
void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t puAbsPartIdx, const IntraNeighbors& intraNeighbors, uint32_t chromaId)
687
0
{
688
0
    PicYuv* reconPic = cu.m_encData->m_reconPic[0];
689
0
    const pixel* adiOrigin = reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx);
690
0
    intptr_t picStride = reconPic->m_strideC;
691
692
0
    fillReferenceSamples(adiOrigin, picStride, intraNeighbors, intraNeighbourBuf[0]);
693
694
0
    if (m_csp == X265_CSP_I444)
695
0
        primitives.cu[intraNeighbors.log2TrSize - 2].intra_filter(intraNeighbourBuf[0], intraNeighbourBuf[1]);
696
0
}
697
698
void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, bool isLuma, IntraNeighbors *intraNeighbors)
699
0
{
700
0
    uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
701
0
    int log2UnitWidth = LOG2_UNIT_SIZE;
702
0
    int log2UnitHeight = LOG2_UNIT_SIZE;
703
704
0
    if (!isLuma)
705
0
    {
706
0
        log2TrSize -= cu.m_hChromaShift;
707
0
        log2UnitWidth -= cu.m_hChromaShift;
708
0
        log2UnitHeight -= cu.m_vChromaShift;
709
0
    }
710
711
0
    int numIntraNeighbor;
712
0
    bool* bNeighborFlags = intraNeighbors->bNeighborFlags;
713
714
0
    uint32_t tuSize = 1 << log2TrSize;
715
0
    int  tuWidthInUnits = tuSize >> log2UnitWidth;
716
0
    int  tuHeightInUnits = tuSize >> log2UnitHeight;
717
0
    int  aboveUnits = tuWidthInUnits << 1;
718
0
    int  leftUnits = tuHeightInUnits << 1;
719
0
    uint32_t partIdxLT = cu.m_absIdxInCTU + absPartIdx;
720
0
    uint32_t partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + tuWidthInUnits - 1];
721
0
    uint32_t partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) << LOG2_RASTER_SIZE)];
722
723
0
    if (cu.m_slice->isIntra() || !cu.m_slice->m_pps->bConstrainedIntraPred)
724
0
    {
725
0
        bNeighborFlags[leftUnits] = isAboveLeftAvailable<false>(cu, partIdxLT);
726
0
        numIntraNeighbor  = (int)(bNeighborFlags[leftUnits]);
727
0
        numIntraNeighbor += isAboveAvailable<false>(cu, partIdxLT, partIdxRT, bNeighborFlags + leftUnits + 1);
728
0
        numIntraNeighbor += isAboveRightAvailable<false>(cu, partIdxRT, bNeighborFlags + leftUnits + 1 + tuWidthInUnits, tuWidthInUnits);
729
0
        numIntraNeighbor += isLeftAvailable<false>(cu, partIdxLT, partIdxLB, bNeighborFlags + leftUnits - 1);
730
0
        numIntraNeighbor += isBelowLeftAvailable<false>(cu, partIdxLB, bNeighborFlags + tuHeightInUnits - 1, tuHeightInUnits);
731
0
    }
732
0
    else
733
0
    {
734
0
        bNeighborFlags[leftUnits] = isAboveLeftAvailable<true>(cu, partIdxLT);
735
0
        numIntraNeighbor  = (int)(bNeighborFlags[leftUnits]);
736
0
        numIntraNeighbor += isAboveAvailable<true>(cu, partIdxLT, partIdxRT, bNeighborFlags + leftUnits + 1);
737
0
        numIntraNeighbor += isAboveRightAvailable<true>(cu, partIdxRT, bNeighborFlags + leftUnits + 1 + tuWidthInUnits, tuWidthInUnits);
738
0
        numIntraNeighbor += isLeftAvailable<true>(cu, partIdxLT, partIdxLB, bNeighborFlags + leftUnits - 1);
739
0
        numIntraNeighbor += isBelowLeftAvailable<true>(cu, partIdxLB, bNeighborFlags + tuHeightInUnits - 1, tuHeightInUnits);
740
0
    }
741
742
0
    intraNeighbors->numIntraNeighbor = numIntraNeighbor;
743
0
    intraNeighbors->totalUnits = aboveUnits + leftUnits + 1;
744
0
    intraNeighbors->aboveUnits = aboveUnits;
745
0
    intraNeighbors->leftUnits = leftUnits;
746
0
    intraNeighbors->unitWidth = 1 << log2UnitWidth;
747
0
    intraNeighbors->unitHeight = 1 << log2UnitHeight;
748
0
    intraNeighbors->log2TrSize = log2TrSize;
749
0
}
750
751
void Predict::fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, const IntraNeighbors& intraNeighbors, pixel dst[258])
752
0
{
753
0
    const pixel dcValue = (pixel)(1 << (X265_DEPTH - 1));
754
0
    int numIntraNeighbor = intraNeighbors.numIntraNeighbor;
755
0
    int totalUnits = intraNeighbors.totalUnits;
756
0
    uint32_t tuSize = 1 << intraNeighbors.log2TrSize;
757
0
    uint32_t refSize = tuSize * 2 + 1;
758
759
    // Nothing is available, perform DC prediction.
760
0
    if (numIntraNeighbor == 0)
761
0
    {
762
        // Fill top border with DC value
763
0
        for (uint32_t i = 0; i < refSize; i++)
764
0
            dst[i] = dcValue;
765
766
        // Fill left border with DC value
767
0
        for (uint32_t i = 0; i < refSize - 1; i++)
768
0
            dst[i + refSize] = dcValue;
769
0
    }
770
0
    else if (numIntraNeighbor == totalUnits)
771
0
    {
772
        // Fill top border with rec. samples
773
0
        const pixel* adiTemp = adiOrigin - picStride - 1;
774
0
        memcpy(dst, adiTemp, refSize * sizeof(pixel));
775
776
        // Fill left border with rec. samples
777
0
        adiTemp = adiOrigin - 1;
778
0
        for (uint32_t i = 0; i < refSize - 1; i++)
779
0
        {
780
0
            dst[i + refSize] = adiTemp[0];
781
0
            adiTemp += picStride;
782
0
        }
783
0
    }
784
0
    else // reference samples are partially available
785
0
    {
786
0
        const bool *bNeighborFlags = intraNeighbors.bNeighborFlags;
787
0
        const bool *pNeighborFlags;
788
0
        int aboveUnits = intraNeighbors.aboveUnits;
789
0
        int leftUnits = intraNeighbors.leftUnits;
790
0
        int unitWidth = intraNeighbors.unitWidth;
791
0
        int unitHeight = intraNeighbors.unitHeight;
792
0
        int totalSamples = (leftUnits * unitHeight) + ((aboveUnits + 1) * unitWidth);
793
0
        pixel adiLineBuffer[5 * MAX_CU_SIZE];
794
0
        pixel *adi;
795
796
        // Initialize
797
0
        for (int i = 0; i < totalSamples; i++)
798
0
            adiLineBuffer[i] = dcValue;
799
800
        // Fill top-left sample
801
0
        const pixel* adiTemp = adiOrigin - picStride - 1;
802
0
        adi = adiLineBuffer + (leftUnits * unitHeight);
803
0
        pNeighborFlags = bNeighborFlags + leftUnits;
804
0
        if (*pNeighborFlags)
805
0
        {
806
0
            pixel topLeftVal = adiTemp[0];
807
0
            for (int i = 0; i < unitWidth; i++)
808
0
                adi[i] = topLeftVal;
809
0
        }
810
811
        // Fill left & below-left samples
812
0
        adiTemp += picStride;
813
0
        adi--;
814
        // NOTE: over copy here, but reduce condition operators
815
0
        for (int j = 0; j < leftUnits * unitHeight; j++)
816
0
        {
817
0
            adi[-j] = adiTemp[j * picStride];
818
0
        }
819
820
        // Fill above & above-right samples
821
0
        adiTemp = adiOrigin - picStride;
822
0
        adi = adiLineBuffer + (leftUnits * unitHeight) + unitWidth;
823
        // NOTE: over copy here, but reduce condition operators
824
0
        memcpy(adi, adiTemp, aboveUnits * unitWidth * sizeof(*adiTemp));
825
826
        // Pad reference samples when necessary
827
0
        int curr = 0;
828
0
        int next = 1;
829
0
        adi = adiLineBuffer;
830
0
        int pAdiLineTopRowOffset = leftUnits * (unitHeight - unitWidth);
831
0
        if (!bNeighborFlags[0])
832
0
        {
833
            // very bottom unit of bottom-left; at least one unit will be valid.
834
0
            while (next < totalUnits && !bNeighborFlags[next])
835
0
                next++;
836
837
0
            pixel* pAdiLineNext = adiLineBuffer + ((next < leftUnits) ? (next * unitHeight) : (pAdiLineTopRowOffset + (next * unitWidth)));
838
0
            const pixel refSample = *pAdiLineNext;
839
            // Pad unavailable samples with new value
840
0
            int nextOrTop = X265_MIN(next, leftUnits);
841
842
            // fill left column
843
#if HIGH_BIT_DEPTH
844
            while (curr < nextOrTop)
845
            {
846
                for (int i = 0; i < unitHeight; i++)
847
                    adi[i] = refSample;
848
849
                adi += unitHeight;
850
                curr++;
851
            }
852
853
            // fill top row
854
            while (curr < next)
855
            {
856
                for (int i = 0; i < unitWidth; i++)
857
                    adi[i] = refSample;
858
859
                adi += unitWidth;
860
                curr++;
861
            }
862
#else
863
0
            X265_CHECK(curr <= nextOrTop, "curr must be less than or equal to nextOrTop\n");
864
0
            if (curr < nextOrTop)
865
0
            {
866
0
                const int fillSize = unitHeight * (nextOrTop - curr);
867
0
                memset(adi, refSample, fillSize * sizeof(pixel));
868
0
                curr = nextOrTop;
869
0
                adi += fillSize;
870
0
            }
871
872
0
            if (curr < next)
873
0
            {
874
0
                const int fillSize = unitWidth * (next - curr);
875
0
                memset(adi, refSample, fillSize * sizeof(pixel));
876
0
                curr = next;
877
0
                adi += fillSize;
878
0
            }
879
0
#endif
880
0
        }
881
882
        // pad all other reference samples.
883
0
        while (curr < totalUnits)
884
0
        {
885
0
            if (!bNeighborFlags[curr]) // samples not available
886
0
            {
887
0
                int numSamplesInCurrUnit = (curr >= leftUnits) ? unitWidth : unitHeight;
888
0
                const pixel refSample = *(adi - 1);
889
0
                for (int i = 0; i < numSamplesInCurrUnit; i++)
890
0
                    adi[i] = refSample;
891
892
0
                adi += numSamplesInCurrUnit;
893
0
                curr++;
894
0
            }
895
0
            else
896
0
            {
897
0
                adi += (curr >= leftUnits) ? unitWidth : unitHeight;
898
0
                curr++;
899
0
            }
900
0
        }
901
902
        // Copy processed samples
903
0
        adi = adiLineBuffer + refSize + unitWidth - 2;
904
0
        memcpy(dst, adi, refSize * sizeof(pixel));
905
906
0
        adi = adiLineBuffer + refSize - 1;
907
0
        for (int i = 0; i < (int)refSize - 1; i++)
908
0
            dst[i + refSize] = adi[-(i + 1)];
909
0
    }
910
0
}
911
912
template<bool cip>
913
bool Predict::isAboveLeftAvailable(const CUData& cu, uint32_t partIdxLT)
914
0
{
915
0
    uint32_t partAboveLeft;
916
0
    const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, partIdxLT);
917
918
0
    return cuAboveLeft && (!cip || cuAboveLeft->isIntra(partAboveLeft));
919
0
}
Unexecuted instantiation: bool x265::Predict::isAboveLeftAvailable<false>(x265::CUData const&, unsigned int)
Unexecuted instantiation: bool x265::Predict::isAboveLeftAvailable<true>(x265::CUData const&, unsigned int)
920
921
template<bool cip>
922
int Predict::isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags)
923
0
{
924
0
    const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
925
0
    const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT];
926
0
    const uint32_t idxStep = 1;
927
0
    int numIntra = 0;
928
929
0
    for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags++)
930
0
    {
931
0
        uint32_t partAbove;
932
0
        const CUData* cuAbove = cu.getPUAbove(partAbove, g_rasterToZscan[rasterPart]);
933
0
        if (cuAbove && (!cip || cuAbove->isIntra(partAbove)))
934
0
        {
935
0
            numIntra++;
936
0
            *bValidFlags = true;
937
0
        }
938
0
        else
939
0
            *bValidFlags = false;
940
0
    }
941
942
0
    return numIntra;
943
0
}
Unexecuted instantiation: int x265::Predict::isAboveAvailable<false>(x265::CUData const&, unsigned int, unsigned int, bool*)
Unexecuted instantiation: int x265::Predict::isAboveAvailable<true>(x265::CUData const&, unsigned int, unsigned int, bool*)
944
945
template<bool cip>
946
int Predict::isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags)
947
0
{
948
0
    const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
949
0
    const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB];
950
0
    const uint32_t idxStep = RASTER_SIZE;
951
0
    int numIntra = 0;
952
953
0
    for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags--) // opposite direction
954
0
    {
955
0
        uint32_t partLeft;
956
0
        const CUData* cuLeft = cu.getPULeft(partLeft, g_rasterToZscan[rasterPart]);
957
0
        if (cuLeft && (!cip || cuLeft->isIntra(partLeft)))
958
0
        {
959
0
            numIntra++;
960
0
            *bValidFlags = true;
961
0
        }
962
0
        else
963
0
            *bValidFlags = false;
964
0
    }
965
966
0
    return numIntra;
967
0
}
Unexecuted instantiation: int x265::Predict::isLeftAvailable<false>(x265::CUData const&, unsigned int, unsigned int, bool*)
Unexecuted instantiation: int x265::Predict::isLeftAvailable<true>(x265::CUData const&, unsigned int, unsigned int, bool*)
968
969
template<bool cip>
970
int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxRT, bool* bValidFlags, uint32_t numUnits)
971
0
{
972
0
    int numIntra = 0;
973
974
0
    for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags++)
975
0
    {
976
0
        uint32_t partAboveRight;
977
0
        const CUData* cuAboveRight = cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset);
978
0
        if (cuAboveRight && (!cip || cuAboveRight->isIntra(partAboveRight)))
979
0
        {
980
0
            numIntra++;
981
0
            *bValidFlags = true;
982
0
        }
983
0
        else
984
0
            *bValidFlags = false;
985
0
    }
986
987
0
    return numIntra;
988
0
}
Unexecuted instantiation: int x265::Predict::isAboveRightAvailable<false>(x265::CUData const&, unsigned int, bool*, unsigned int)
Unexecuted instantiation: int x265::Predict::isAboveRightAvailable<true>(x265::CUData const&, unsigned int, bool*, unsigned int)
989
990
template<bool cip>
991
int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLB, bool* bValidFlags, uint32_t numUnits)
992
0
{
993
0
    int numIntra = 0;
994
995
0
    for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags--) // opposite direction
996
0
    {
997
0
        uint32_t partBelowLeft;
998
0
        const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, partIdxLB, offset);
999
0
        if (cuBelowLeft && (!cip || cuBelowLeft->isIntra(partBelowLeft)))
1000
0
        {
1001
0
            numIntra++;
1002
0
            *bValidFlags = true;
1003
0
        }
1004
0
        else
1005
0
            *bValidFlags = false;
1006
0
    }
1007
1008
0
    return numIntra;
1009
0
}
Unexecuted instantiation: int x265::Predict::isBelowLeftAvailable<false>(x265::CUData const&, unsigned int, bool*, unsigned int)
Unexecuted instantiation: int x265::Predict::isBelowLeftAvailable<true>(x265::CUData const&, unsigned int, bool*, unsigned int)