Coverage Report

Created: 2022-08-24 06:15

/src/x265/source/common/predict.cpp
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
* Copyright (C) 2013-2020 MulticoreWare, Inc
3
*
4
* Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
5
*          Min Chen <chenm003@163.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2 of the License, or
10
* (at your option) any later version.
11
*
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
* GNU General Public License for more details.
16
*
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
*
21
* This program is also available under a commercial proprietary license.
22
* For more information, contact us at license @ x265.com.
23
*****************************************************************************/
24
25
#include "common.h"
26
#include "slice.h"
27
#include "framedata.h"
28
#include "picyuv.h"
29
#include "predict.h"
30
#include "primitives.h"
31
32
using namespace X265_NS;
33
34
#if _MSC_VER
35
#pragma warning(disable: 4127) // conditional expression is constant
36
#endif
37
38
PredictionUnit::PredictionUnit(const CUData& cu, const CUGeom& cuGeom, int puIdx)
39
0
{
40
    /* address of CTU */
41
0
    ctuAddr = cu.m_cuAddr;
42
43
    /* offset of CU */
44
0
    cuAbsPartIdx = cuGeom.absPartIdx;
45
46
    /* offset and dimensions of PU */
47
0
    cu.getPartIndexAndSize(puIdx, puAbsPartIdx, width, height);
48
0
}
49
50
namespace
51
{
52
inline pixel weightBidir(int w0, int16_t P0, int w1, int16_t P1, int round, int shift, int offset)
53
0
{
54
0
    return x265_clip((w0 * (P0 + IF_INTERNAL_OFFS) + w1 * (P1 + IF_INTERNAL_OFFS) + round + (offset * (1 << (shift - 1)))) >> shift);
55
0
}
56
}
57
58
Predict::Predict()
59
22.6k
{
60
22.6k
}
61
62
Predict::~Predict()
63
22.6k
{
64
22.6k
    m_predShortYuv[0].destroy();
65
22.6k
    m_predShortYuv[1].destroy();
66
22.6k
}
67
68
bool Predict::allocBuffers(int csp)
69
22.6k
{
70
22.6k
    m_csp = csp;
71
22.6k
    m_hChromaShift = CHROMA_H_SHIFT(csp);
72
22.6k
    m_vChromaShift = CHROMA_V_SHIFT(csp);
73
74
22.6k
    return m_predShortYuv[0].create(MAX_CU_SIZE, csp) && m_predShortYuv[1].create(MAX_CU_SIZE, csp);
75
22.6k
}
76
77
void Predict::motionCompensation(const CUData& cu, const PredictionUnit& pu, Yuv& predYuv, bool bLuma, bool bChroma)
78
0
{
79
0
    int refIdx0 = cu.m_refIdx[0][pu.puAbsPartIdx];
80
0
    int refIdx1 = cu.m_refIdx[1][pu.puAbsPartIdx];
81
82
0
    if (cu.m_slice->isInterP())
83
0
    {
84
        /* P Slice */
85
0
        WeightValues wv0[3];
86
87
0
        X265_CHECK(refIdx0 >= 0, "invalid P refidx\n");
88
0
        X265_CHECK(refIdx0 < cu.m_slice->m_numRefIdx[0], "P refidx out of range\n");
89
0
        const WeightParam *wp0 = cu.m_slice->m_weightPredTable[0][refIdx0];
90
91
0
        MV mv0 = cu.m_mv[0][pu.puAbsPartIdx];
92
0
        cu.clipMv(mv0);
93
94
0
        if (cu.m_slice->m_pps->bUseWeightPred && wp0->wtPresent)
95
0
        {
96
0
            for (int plane = 0; plane < (bChroma ? 3 : 1); plane++)
97
0
            {
98
0
                wv0[plane].w      = wp0[plane].inputWeight;
99
0
                wv0[plane].offset = wp0[plane].inputOffset * (1 << (X265_DEPTH - 8));
100
0
                wv0[plane].shift  = wp0[plane].log2WeightDenom;
101
0
                wv0[plane].round  = wp0[plane].log2WeightDenom >= 1 ? 1 << (wp0[plane].log2WeightDenom - 1) : 0;
102
0
            }
103
104
0
            ShortYuv& shortYuv = m_predShortYuv[0];
105
106
0
            if (bLuma)
107
0
                predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
108
0
            if (bChroma)
109
0
                predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
110
111
0
            addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
112
0
        }
113
0
        else
114
0
        {
115
0
            if (bLuma)
116
0
                predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
117
0
            if (bChroma)
118
0
                predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
119
0
        }
120
0
    }
121
0
    else
122
0
    {
123
        /* B Slice */
124
125
0
        WeightValues wv0[3], wv1[3];
126
0
        const WeightParam *pwp0, *pwp1;
127
128
0
        X265_CHECK(refIdx0 < cu.m_slice->m_numRefIdx[0], "bidir refidx0 out of range\n");
129
0
        X265_CHECK(refIdx1 < cu.m_slice->m_numRefIdx[1], "bidir refidx1 out of range\n");
130
131
0
        if (cu.m_slice->m_pps->bUseWeightedBiPred)
132
0
        {
133
0
            pwp0 = refIdx0 >= 0 ? cu.m_slice->m_weightPredTable[0][refIdx0] : NULL;
134
0
            pwp1 = refIdx1 >= 0 ? cu.m_slice->m_weightPredTable[1][refIdx1] : NULL;
135
136
0
            if (pwp0 && pwp1 && (pwp0->wtPresent || pwp1->wtPresent))
137
0
            {
138
                /* biprediction weighting */
139
0
                for (int plane = 0; plane < (bChroma ? 3 : 1); plane++)
140
0
                {
141
0
                    wv0[plane].w = pwp0[plane].inputWeight;
142
0
                    wv0[plane].o = pwp0[plane].inputOffset * (1 << (X265_DEPTH - 8));
143
0
                    wv0[plane].shift = pwp0[plane].log2WeightDenom;
144
0
                    wv0[plane].round = 1 << pwp0[plane].log2WeightDenom;
145
146
0
                    wv1[plane].w = pwp1[plane].inputWeight;
147
0
                    wv1[plane].o = pwp1[plane].inputOffset * (1 << (X265_DEPTH - 8));
148
0
                    wv1[plane].shift = wv0[plane].shift;
149
0
                    wv1[plane].round = wv0[plane].round;
150
0
                }
151
0
            }
152
0
            else
153
0
            {
154
                /* uniprediction weighting, always outputs to wv0 */
155
0
                const WeightParam* pwp = (refIdx0 >= 0) ? pwp0 : pwp1;
156
0
                for (int plane = 0; plane < (bChroma ? 3 : 1); plane++)
157
0
                {
158
0
                    wv0[plane].w = pwp[plane].inputWeight;
159
0
                    wv0[plane].offset = pwp[plane].inputOffset * (1 << (X265_DEPTH - 8));
160
0
                    wv0[plane].shift = pwp[plane].log2WeightDenom;
161
0
                    wv0[plane].round = pwp[plane].log2WeightDenom >= 1 ? 1 << (pwp[plane].log2WeightDenom - 1) : 0;
162
0
                }
163
0
            }
164
0
        }
165
0
        else
166
0
            pwp0 = pwp1 = NULL;
167
168
0
        if (refIdx0 >= 0 && refIdx1 >= 0)
169
0
        {
170
0
            MV mv0 = cu.m_mv[0][pu.puAbsPartIdx];
171
0
            MV mv1 = cu.m_mv[1][pu.puAbsPartIdx];
172
0
            cu.clipMv(mv0);
173
0
            cu.clipMv(mv1);
174
175
0
            if (bLuma)
176
0
            {
177
0
                predInterLumaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
178
0
                predInterLumaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
179
0
            }
180
0
            if (bChroma)
181
0
            {
182
0
                predInterChromaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
183
0
                predInterChromaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
184
0
            }
185
186
0
            if (pwp0 && pwp1 && (pwp0->wtPresent || pwp1->wtPresent))
187
0
                addWeightBi(pu, predYuv, m_predShortYuv[0], m_predShortYuv[1], wv0, wv1, bLuma, bChroma);
188
0
            else
189
0
                predYuv.addAvg(m_predShortYuv[0], m_predShortYuv[1], pu.puAbsPartIdx, pu.width, pu.height, bLuma, bChroma);
190
0
        }
191
0
        else if (refIdx0 >= 0)
192
0
        {
193
0
            MV mv0 = cu.m_mv[0][pu.puAbsPartIdx];
194
0
            cu.clipMv(mv0);
195
196
0
            if (pwp0 && pwp0->wtPresent)
197
0
            {
198
0
                ShortYuv& shortYuv = m_predShortYuv[0];
199
200
0
                if (bLuma)
201
0
                    predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
202
0
                if (bChroma)
203
0
                    predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
204
205
0
                addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
206
0
            }
207
0
            else
208
0
            {
209
0
                if (bLuma)
210
0
                    predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
211
0
                if (bChroma)
212
0
                    predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
213
0
            }
214
0
        }
215
0
        else
216
0
        {
217
0
            MV mv1 = cu.m_mv[1][pu.puAbsPartIdx];
218
0
            cu.clipMv(mv1);
219
220
            /* uniprediction to L1 */
221
0
            X265_CHECK(refIdx1 >= 0, "refidx1 was not positive\n");
222
223
0
            if (pwp1 && pwp1->wtPresent)
224
0
            {
225
0
                ShortYuv& shortYuv = m_predShortYuv[0];
226
227
0
                if (bLuma)
228
0
                    predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
229
0
                if (bChroma)
230
0
                    predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
231
232
0
                addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
233
0
            }
234
0
            else
235
0
            {
236
0
                if (bLuma)
237
0
                    predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
238
0
                if (bChroma)
239
0
                    predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
240
0
            }
241
0
        }
242
0
    }
243
0
}
244
245
void Predict::predInterLumaPixel(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const
246
0
{
247
0
    pixel* dst = dstYuv.getLumaAddr(pu.puAbsPartIdx);
248
0
    intptr_t dstStride = dstYuv.m_size;
249
250
0
    intptr_t srcStride = refPic.m_stride;
251
0
    intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
252
0
    int partEnum = partitionFromSizes(pu.width, pu.height);
253
0
    const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset;
254
255
0
    int xFrac = mv.x & 3;
256
0
    int yFrac = mv.y & 3;
257
258
0
    if (!(yFrac | xFrac))
259
0
        primitives.pu[partEnum].copy_pp(dst, dstStride, src, srcStride);
260
0
    else if (!yFrac)
261
0
        primitives.pu[partEnum].luma_hpp(src, srcStride, dst, dstStride, xFrac);
262
0
    else if (!xFrac)
263
0
        primitives.pu[partEnum].luma_vpp(src, srcStride, dst, dstStride, yFrac);
264
0
    else
265
0
        primitives.pu[partEnum].luma_hvpp(src, srcStride, dst, dstStride, xFrac, yFrac);
266
0
}
267
268
void Predict::predInterLumaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
269
0
{
270
0
    int16_t* dst = dstSYuv.getLumaAddr(pu.puAbsPartIdx);
271
0
    intptr_t dstStride = dstSYuv.m_size;
272
273
0
    intptr_t srcStride = refPic.m_stride;
274
0
    intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
275
0
    const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset;
276
277
0
    int partEnum = partitionFromSizes(pu.width, pu.height);
278
279
0
    X265_CHECK((pu.width % 4) + (pu.height % 4) == 0, "width or height not divisible by 4\n");
280
0
    X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n");
281
282
0
    int xFrac = mv.x & 3;
283
0
    int yFrac = mv.y & 3;
284
285
0
    if (!(yFrac | xFrac))
286
0
    {
287
0
        bool srcbufferAlignCheck = (refPic.m_cuOffsetY[pu.ctuAddr] + refPic.m_buOffsetY[pu.cuAbsPartIdx + pu.puAbsPartIdx] + srcOffset) % 64 == 0;
288
0
        bool dstbufferAlignCheck = (dstSYuv.getAddrOffset(pu.puAbsPartIdx, dstSYuv.m_size) % 64) == 0;
289
0
        primitives.pu[partEnum].convert_p2s[srcStride % 64 == 0 && dstStride % 64 == 0 && srcbufferAlignCheck && dstbufferAlignCheck](src, srcStride, dst, dstStride);
290
0
    }
291
0
    else if (!yFrac)
292
0
        primitives.pu[partEnum].luma_hps(src, srcStride, dst, dstStride, xFrac, 0);
293
0
    else if (!xFrac)
294
0
        primitives.pu[partEnum].luma_vps(src, srcStride, dst, dstStride, yFrac);
295
0
    else
296
0
    {
297
0
        ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]);
298
0
        int immedStride = pu.width;
299
0
        int halfFilterSize = NTAPS_LUMA >> 1;
300
301
0
        primitives.pu[partEnum].luma_hps(src, srcStride, immed, immedStride, xFrac, 1);
302
0
        primitives.pu[partEnum].luma_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, yFrac);
303
0
    }
304
0
}
305
306
void Predict::predInterChromaPixel(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const
307
0
{
308
0
    intptr_t dstStride = dstYuv.m_csize;
309
0
    intptr_t refStride = refPic.m_strideC;
310
311
0
    int mvx = mv.x << (1 - m_hChromaShift);
312
0
    int mvy = mv.y << (1 - m_vChromaShift);
313
314
0
    intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride;
315
316
0
    const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
317
0
    const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
318
319
0
    pixel* dstCb = dstYuv.getCbAddr(pu.puAbsPartIdx);
320
0
    pixel* dstCr = dstYuv.getCrAddr(pu.puAbsPartIdx);
321
322
0
    int partEnum = partitionFromSizes(pu.width, pu.height);
323
324
0
    int xFrac = mvx & 7;
325
0
    int yFrac = mvy & 7;
326
327
0
    if (!(yFrac | xFrac))
328
0
    {
329
0
        primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCb, dstStride, refCb, refStride);
330
0
        primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCr, dstStride, refCr, refStride);
331
0
    }
332
0
    else if (!yFrac)
333
0
    {
334
0
        primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCb, refStride, dstCb, dstStride, xFrac);
335
0
        primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCr, refStride, dstCr, dstStride, xFrac);
336
0
    }
337
0
    else if (!xFrac)
338
0
    {
339
0
        primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCb, refStride, dstCb, dstStride, yFrac);
340
0
        primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCr, refStride, dstCr, dstStride, yFrac);
341
0
    }
342
0
    else
343
0
    {
344
0
        ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_CHROMA - 1)]);
345
0
        int immedStride = pu.width >> m_hChromaShift;
346
0
        int halfFilterSize = NTAPS_CHROMA >> 1;
347
348
0
        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, immed, immedStride, xFrac, 1);
349
0
        primitives.chroma[m_csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCb, dstStride, yFrac);
350
0
        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, immed, immedStride, xFrac, 1);
351
0
        primitives.chroma[m_csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCr, dstStride, yFrac);
352
0
    }
353
0
}
354
355
void Predict::predInterChromaShort(const PredictionUnit& pu, ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const
356
0
{
357
0
    intptr_t dstStride = dstSYuv.m_csize;
358
0
    intptr_t refStride = refPic.m_strideC;
359
360
0
    int mvx = mv.x << (1 - m_hChromaShift);
361
0
    int mvy = mv.y << (1 - m_vChromaShift);
362
363
0
    intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride;
364
365
0
    const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
366
0
    const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset;
367
368
0
    int16_t* dstCb = dstSYuv.getCbAddr(pu.puAbsPartIdx);
369
0
    int16_t* dstCr = dstSYuv.getCrAddr(pu.puAbsPartIdx);
370
371
0
    int partEnum = partitionFromSizes(pu.width, pu.height);
372
    
373
0
    uint32_t cxWidth  = pu.width >> m_hChromaShift;
374
375
0
    X265_CHECK(((cxWidth | (pu.height >> m_vChromaShift)) % 2) == 0, "chroma block size expected to be multiple of 2\n");
376
377
0
    int xFrac = mvx & 7;
378
0
    int yFrac = mvy & 7;
379
380
0
    if (!(yFrac | xFrac))
381
0
    {
382
0
        bool srcbufferAlignCheckC = (refPic.m_cuOffsetC[pu.ctuAddr] + refPic.m_buOffsetC[pu.cuAbsPartIdx + pu.puAbsPartIdx] + refOffset) % 64 == 0;
383
0
        bool dstbufferAlignCheckC = dstSYuv.getChromaAddrOffset(pu.puAbsPartIdx) % 64 == 0;
384
0
        primitives.chroma[m_csp].pu[partEnum].p2s[refStride % 64 == 0 && dstStride % 64 == 0 && srcbufferAlignCheckC && dstbufferAlignCheckC](refCb, refStride, dstCb, dstStride);
385
0
        primitives.chroma[m_csp].pu[partEnum].p2s[refStride % 64 == 0 && dstStride % 64 == 0 && srcbufferAlignCheckC && dstbufferAlignCheckC](refCr, refStride, dstCr, dstStride);
386
0
    }
387
0
    else if (!yFrac)
388
0
    {
389
0
        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, dstCb, dstStride, xFrac, 0);
390
0
        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, dstCr, dstStride, xFrac, 0);
391
0
    }
392
0
    else if (!xFrac)
393
0
    {
394
0
        primitives.chroma[m_csp].pu[partEnum].filter_vps(refCb, refStride, dstCb, dstStride, yFrac);
395
0
        primitives.chroma[m_csp].pu[partEnum].filter_vps(refCr, refStride, dstCr, dstStride, yFrac);
396
0
    }
397
0
    else
398
0
    {
399
0
        ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_CHROMA - 1)]);
400
0
        int immedStride = cxWidth;
401
0
        int halfFilterSize = NTAPS_CHROMA >> 1;
402
403
0
        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride, immed, immedStride, xFrac, 1);
404
0
        primitives.chroma[m_csp].pu[partEnum].filter_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCb, dstStride, yFrac);
405
0
        primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride, immed, immedStride, xFrac, 1);
406
0
        primitives.chroma[m_csp].pu[partEnum].filter_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dstCr, dstStride, yFrac);
407
0
    }
408
0
}
409
410
/* weighted averaging for bi-pred */
411
void Predict::addWeightBi(const PredictionUnit& pu, Yuv& predYuv, const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, const WeightValues wp0[3], const WeightValues wp1[3], bool bLuma, bool bChroma) const
412
0
{
413
0
    int x, y;
414
415
0
    int w0, w1, offset, shiftNum, shift, round;
416
0
    uint32_t src0Stride, src1Stride, dststride;
417
418
0
    if (bLuma)
419
0
    {
420
0
        pixel* dstY = predYuv.getLumaAddr(pu.puAbsPartIdx);
421
0
        const int16_t* srcY0 = srcYuv0.getLumaAddr(pu.puAbsPartIdx);
422
0
        const int16_t* srcY1 = srcYuv1.getLumaAddr(pu.puAbsPartIdx);
423
424
        // Luma
425
0
        w0      = wp0[0].w;
426
0
        offset  = wp0[0].o + wp1[0].o;
427
0
        shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
428
0
        shift   = wp0[0].shift + shiftNum + 1;
429
0
        round   = shift ? (1 << (shift - 1)) : 0;
430
0
        w1      = wp1[0].w;
431
432
0
        src0Stride = srcYuv0.m_size;
433
0
        src1Stride = srcYuv1.m_size;
434
0
        dststride = predYuv.m_size;
435
436
        // TODO: can we use weight_sp here?
437
0
        for (y = pu.height - 1; y >= 0; y--)
438
0
        {
439
0
            for (x = pu.width - 1; x >= 0; )
440
0
            {
441
                // note: luma min width is 4
442
0
                dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
443
0
                x--;
444
0
                dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
445
0
                x--;
446
0
                dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
447
0
                x--;
448
0
                dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset);
449
0
                x--;
450
0
            }
451
452
0
            srcY0 += src0Stride;
453
0
            srcY1 += src1Stride;
454
0
            dstY  += dststride;
455
0
        }
456
0
    }
457
458
0
    if (bChroma)
459
0
    {
460
0
        pixel* dstU = predYuv.getCbAddr(pu.puAbsPartIdx);
461
0
        pixel* dstV = predYuv.getCrAddr(pu.puAbsPartIdx);
462
0
        const int16_t* srcU0 = srcYuv0.getCbAddr(pu.puAbsPartIdx);
463
0
        const int16_t* srcV0 = srcYuv0.getCrAddr(pu.puAbsPartIdx);
464
0
        const int16_t* srcU1 = srcYuv1.getCbAddr(pu.puAbsPartIdx);
465
0
        const int16_t* srcV1 = srcYuv1.getCrAddr(pu.puAbsPartIdx);
466
467
        // Chroma U
468
0
        w0      = wp0[1].w;
469
0
        offset  = wp0[1].o + wp1[1].o;
470
0
        shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
471
0
        shift   = wp0[1].shift + shiftNum + 1;
472
0
        round   = shift ? (1 << (shift - 1)) : 0;
473
0
        w1      = wp1[1].w;
474
475
0
        src0Stride = srcYuv0.m_csize;
476
0
        src1Stride = srcYuv1.m_csize;
477
0
        dststride  = predYuv.m_csize;
478
479
0
        uint32_t cwidth = pu.width >> srcYuv0.m_hChromaShift;
480
0
        uint32_t cheight = pu.height >> srcYuv0.m_vChromaShift;
481
482
        // TODO: can we use weight_sp here?
483
0
        for (y = cheight - 1; y >= 0; y--)
484
0
        {
485
0
            for (x = cwidth - 1; x >= 0;)
486
0
            {
487
                // note: chroma min width is 2
488
0
                dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset);
489
0
                x--;
490
0
                dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset);
491
0
                x--;
492
0
            }
493
494
0
            srcU0 += src0Stride;
495
0
            srcU1 += src1Stride;
496
0
            dstU  += dststride;
497
0
        }
498
499
        // Chroma V
500
0
        w0     = wp0[2].w;
501
0
        offset = wp0[2].o + wp1[2].o;
502
0
        shift  = wp0[2].shift + shiftNum + 1;
503
0
        round  = shift ? (1 << (shift - 1)) : 0;
504
0
        w1     = wp1[2].w;
505
506
0
        for (y = cheight - 1; y >= 0; y--)
507
0
        {
508
0
            for (x = cwidth - 1; x >= 0;)
509
0
            {
510
                // note: chroma min width is 2
511
0
                dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset);
512
0
                x--;
513
0
                dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset);
514
0
                x--;
515
0
            }
516
517
0
            srcV0 += src0Stride;
518
0
            srcV1 += src1Stride;
519
0
            dstV  += dststride;
520
0
        }
521
0
    }
522
0
}
523
524
/* weighted averaging for uni-pred */
525
void Predict::addWeightUni(const PredictionUnit& pu, Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const
526
0
{
527
0
    int w0, offset, shiftNum, shift, round;
528
0
    uint32_t srcStride, dstStride;
529
530
0
    if (bLuma)
531
0
    {
532
0
        pixel* dstY = predYuv.getLumaAddr(pu.puAbsPartIdx);
533
0
        const int16_t* srcY0 = srcYuv.getLumaAddr(pu.puAbsPartIdx);
534
535
        // Luma
536
0
        w0      = wp[0].w;
537
0
        offset  = wp[0].offset;
538
0
        shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
539
0
        shift   = wp[0].shift + shiftNum;
540
0
        round   = shift ? (1 << (shift - 1)) : 0;
541
0
        srcStride = srcYuv.m_size;
542
0
        dstStride = predYuv.m_size;
543
544
0
        primitives.weight_sp(srcY0, dstY, srcStride, dstStride, pu.width, pu.height, w0, round, shift, offset);
545
0
    }
546
547
0
    if (bChroma)
548
0
    {
549
0
        pixel* dstU = predYuv.getCbAddr(pu.puAbsPartIdx);
550
0
        pixel* dstV = predYuv.getCrAddr(pu.puAbsPartIdx);
551
0
        const int16_t* srcU0 = srcYuv.getCbAddr(pu.puAbsPartIdx);
552
0
        const int16_t* srcV0 = srcYuv.getCrAddr(pu.puAbsPartIdx);
553
554
        // Chroma U
555
0
        w0      = wp[1].w;
556
0
        offset  = wp[1].offset;
557
0
        shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
558
0
        shift   = wp[1].shift + shiftNum;
559
0
        round   = shift ? (1 << (shift - 1)) : 0;
560
561
0
        srcStride = srcYuv.m_csize;
562
0
        dstStride = predYuv.m_csize;
563
564
0
        uint32_t cwidth = pu.width >> srcYuv.m_hChromaShift;
565
0
        uint32_t cheight = pu.height >> srcYuv.m_vChromaShift;
566
567
0
        primitives.weight_sp(srcU0, dstU, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
568
569
        // Chroma V
570
0
        w0     = wp[2].w;
571
0
        offset = wp[2].offset;
572
0
        shift  = wp[2].shift + shiftNum;
573
0
        round  = shift ? (1 << (shift - 1)) : 0;
574
575
0
        primitives.weight_sp(srcV0, dstV, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset);
576
0
    }
577
0
}
578
579
void Predict::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSize)
580
5.33M
{
581
5.33M
    int tuSize = 1 << log2TrSize;
582
5.33M
    int sizeIdx = log2TrSize - 2;
583
5.33M
    X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
584
585
5.33M
    int filter = !!(g_intraFilterFlags[dirMode] & tuSize);
586
5.33M
    bool bFilter = log2TrSize <= 4;
587
5.33M
    primitives.cu[sizeIdx].intra_pred[dirMode](dst, stride, intraNeighbourBuf[filter], dirMode, bFilter);
588
5.33M
}
589
590
void Predict::predIntraChromaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSizeC)
591
7.73M
{
592
7.73M
    int tuSize = 1 << log2TrSizeC;
593
7.73M
    int sizeIdx = log2TrSizeC - 2;
594
7.73M
    X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n");
595
596
7.73M
    int filter = !!(m_csp == X265_CSP_I444 && (g_intraFilterFlags[dirMode] & tuSize));
597
7.73M
    primitives.cu[sizeIdx].intra_pred[dirMode](dst, stride, intraNeighbourBuf[filter], dirMode, 0);
598
7.73M
}
599
600
void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t puAbsPartIdx, const IntraNeighbors& intraNeighbors, int dirMode)
601
7.03M
{
602
7.03M
    int tuSize = 1 << intraNeighbors.log2TrSize;
603
7.03M
    int tuSize2 = tuSize << 1;
604
605
7.03M
    PicYuv* reconPic = cu.m_encData->m_reconPic;
606
7.03M
    pixel* adiOrigin = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx);
607
7.03M
    intptr_t picStride = reconPic->m_stride;
608
609
7.03M
    fillReferenceSamples(adiOrigin, picStride, intraNeighbors, intraNeighbourBuf[0]);
610
611
7.03M
    pixel* refBuf = intraNeighbourBuf[0];
612
7.03M
    pixel* fltBuf = intraNeighbourBuf[1];
613
614
7.03M
    pixel topLeft = refBuf[0], topLast = refBuf[tuSize2], leftLast = refBuf[tuSize2 + tuSize2];
615
616
7.03M
    if (dirMode == ALL_IDX ? (8 | 16 | 32) & tuSize : g_intraFilterFlags[dirMode] & tuSize)
617
1.38M
    {
618
        // generate filtered intra prediction samples
619
620
1.38M
        if (cu.m_slice->m_sps->bUseStrongIntraSmoothing && tuSize == 32)
621
38.0k
        {
622
38.0k
            const int threshold = 1 << (X265_DEPTH - 5);
623
624
38.0k
            pixel topMiddle = refBuf[32], leftMiddle = refBuf[tuSize2 + 32];
625
626
38.0k
            if (abs(topLeft + topLast  - (topMiddle  << 1)) < threshold &&
627
38.0k
                abs(topLeft + leftLast - (leftMiddle << 1)) < threshold)
628
38.0k
            {
629
                // "strong" bilinear interpolation
630
38.0k
                const int shift = 5 + 1;
631
38.0k
                int init = (topLeft << shift) + tuSize;
632
38.0k
                int deltaL, deltaR;
633
634
38.0k
                deltaL = leftLast - topLeft; deltaR = topLast - topLeft;
635
636
38.0k
                fltBuf[0] = topLeft;
637
2.43M
                for (int i = 1; i < tuSize2; i++)
638
2.39M
                {
639
2.39M
                    fltBuf[i + tuSize2] = (pixel)((init + deltaL * i) >> shift); // Left Filtering
640
2.39M
                    fltBuf[i] = (pixel)((init + deltaR * i) >> shift);           // Above Filtering
641
2.39M
                }
642
38.0k
                fltBuf[tuSize2] = topLast;
643
38.0k
                fltBuf[tuSize2 + tuSize2] = leftLast;
644
38.0k
                return;
645
38.0k
            }
646
38.0k
        }
647
648
1.34M
        primitives.cu[intraNeighbors.log2TrSize - 2].intra_filter(refBuf, fltBuf);
649
1.34M
    }
650
7.03M
}
651
652
void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t puAbsPartIdx, const IntraNeighbors& intraNeighbors, uint32_t chromaId)
653
7.70M
{
654
7.70M
    PicYuv* reconPic = cu.m_encData->m_reconPic;
655
7.70M
    const pixel* adiOrigin = reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx);
656
7.70M
    intptr_t picStride = reconPic->m_strideC;
657
658
7.70M
    fillReferenceSamples(adiOrigin, picStride, intraNeighbors, intraNeighbourBuf[0]);
659
660
7.70M
    if (m_csp == X265_CSP_I444)
661
0
        primitives.cu[intraNeighbors.log2TrSize - 2].intra_filter(intraNeighbourBuf[0], intraNeighbourBuf[1]);
662
7.70M
}
663
664
void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, bool isLuma, IntraNeighbors *intraNeighbors)
665
10.8M
{
666
10.8M
    uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth;
667
10.8M
    int log2UnitWidth = LOG2_UNIT_SIZE;
668
10.8M
    int log2UnitHeight = LOG2_UNIT_SIZE;
669
670
10.8M
    if (!isLuma)
671
3.87M
    {
672
3.87M
        log2TrSize -= cu.m_hChromaShift;
673
3.87M
        log2UnitWidth -= cu.m_hChromaShift;
674
3.87M
        log2UnitHeight -= cu.m_vChromaShift;
675
3.87M
    }
676
677
10.8M
    int numIntraNeighbor;
678
10.8M
    bool* bNeighborFlags = intraNeighbors->bNeighborFlags;
679
680
10.8M
    uint32_t tuSize = 1 << log2TrSize;
681
10.8M
    int  tuWidthInUnits = tuSize >> log2UnitWidth;
682
10.8M
    int  tuHeightInUnits = tuSize >> log2UnitHeight;
683
10.8M
    int  aboveUnits = tuWidthInUnits << 1;
684
10.8M
    int  leftUnits = tuHeightInUnits << 1;
685
10.8M
    uint32_t partIdxLT = cu.m_absIdxInCTU + absPartIdx;
686
10.8M
    uint32_t partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + tuWidthInUnits - 1];
687
10.8M
    uint32_t partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) << LOG2_RASTER_SIZE)];
688
689
10.8M
    if (cu.m_slice->isIntra() || !cu.m_slice->m_pps->bConstrainedIntraPred)
690
10.8M
    {
691
10.8M
        bNeighborFlags[leftUnits] = isAboveLeftAvailable<false>(cu, partIdxLT);
692
10.8M
        numIntraNeighbor  = (int)(bNeighborFlags[leftUnits]);
693
10.8M
        numIntraNeighbor += isAboveAvailable<false>(cu, partIdxLT, partIdxRT, bNeighborFlags + leftUnits + 1);
694
10.8M
        numIntraNeighbor += isAboveRightAvailable<false>(cu, partIdxRT, bNeighborFlags + leftUnits + 1 + tuWidthInUnits, tuWidthInUnits);
695
10.8M
        numIntraNeighbor += isLeftAvailable<false>(cu, partIdxLT, partIdxLB, bNeighborFlags + leftUnits - 1);
696
10.8M
        numIntraNeighbor += isBelowLeftAvailable<false>(cu, partIdxLB, bNeighborFlags + tuHeightInUnits - 1, tuHeightInUnits);
697
10.8M
    }
698
9.07k
    else
699
9.07k
    {
700
9.07k
        bNeighborFlags[leftUnits] = isAboveLeftAvailable<true>(cu, partIdxLT);
701
9.07k
        numIntraNeighbor  = (int)(bNeighborFlags[leftUnits]);
702
9.07k
        numIntraNeighbor += isAboveAvailable<true>(cu, partIdxLT, partIdxRT, bNeighborFlags + leftUnits + 1);
703
9.07k
        numIntraNeighbor += isAboveRightAvailable<true>(cu, partIdxRT, bNeighborFlags + leftUnits + 1 + tuWidthInUnits, tuWidthInUnits);
704
9.07k
        numIntraNeighbor += isLeftAvailable<true>(cu, partIdxLT, partIdxLB, bNeighborFlags + leftUnits - 1);
705
9.07k
        numIntraNeighbor += isBelowLeftAvailable<true>(cu, partIdxLB, bNeighborFlags + tuHeightInUnits - 1, tuHeightInUnits);
706
9.07k
    }
707
708
10.8M
    intraNeighbors->numIntraNeighbor = numIntraNeighbor;
709
10.8M
    intraNeighbors->totalUnits = aboveUnits + leftUnits + 1;
710
10.8M
    intraNeighbors->aboveUnits = aboveUnits;
711
10.8M
    intraNeighbors->leftUnits = leftUnits;
712
10.8M
    intraNeighbors->unitWidth = 1 << log2UnitWidth;
713
10.8M
    intraNeighbors->unitHeight = 1 << log2UnitHeight;
714
10.8M
    intraNeighbors->log2TrSize = log2TrSize;
715
10.8M
}
716
717
void Predict::fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, const IntraNeighbors& intraNeighbors, pixel dst[258])
718
14.6M
{
719
14.6M
    const pixel dcValue = (pixel)(1 << (X265_DEPTH - 1));
720
14.6M
    int numIntraNeighbor = intraNeighbors.numIntraNeighbor;
721
14.6M
    int totalUnits = intraNeighbors.totalUnits;
722
14.6M
    uint32_t tuSize = 1 << intraNeighbors.log2TrSize;
723
14.6M
    uint32_t refSize = tuSize * 2 + 1;
724
725
    // Nothing is available, perform DC prediction.
726
14.6M
    if (numIntraNeighbor == 0)
727
51.7k
    {
728
        // Fill top border with DC value
729
1.12M
        for (uint32_t i = 0; i < refSize; i++)
730
1.06M
            dst[i] = dcValue;
731
732
        // Fill left border with DC value
733
1.06M
        for (uint32_t i = 0; i < refSize - 1; i++)
734
1.01M
            dst[i + refSize] = dcValue;
735
51.7k
    }
736
14.5M
    else if (numIntraNeighbor == totalUnits)
737
3.93M
    {
738
        // Fill top border with rec. samples
739
3.93M
        const pixel* adiTemp = adiOrigin - picStride - 1;
740
3.93M
        memcpy(dst, adiTemp, refSize * sizeof(pixel));
741
742
        // Fill left border with rec. samples
743
3.93M
        adiTemp = adiOrigin - 1;
744
41.4M
        for (uint32_t i = 0; i < refSize - 1; i++)
745
37.5M
        {
746
37.5M
            dst[i + refSize] = adiTemp[0];
747
37.5M
            adiTemp += picStride;
748
37.5M
        }
749
3.93M
    }
750
10.6M
    else // reference samples are partially available
751
10.6M
    {
752
10.6M
        const bool *bNeighborFlags = intraNeighbors.bNeighborFlags;
753
10.6M
        const bool *pNeighborFlags;
754
10.6M
        int aboveUnits = intraNeighbors.aboveUnits;
755
10.6M
        int leftUnits = intraNeighbors.leftUnits;
756
10.6M
        int unitWidth = intraNeighbors.unitWidth;
757
10.6M
        int unitHeight = intraNeighbors.unitHeight;
758
10.6M
        int totalSamples = (leftUnits * unitHeight) + ((aboveUnits + 1) * unitWidth);
759
10.6M
        pixel adiLineBuffer[5 * MAX_CU_SIZE];
760
10.6M
        pixel *adi;
761
762
        // Initialize
763
264M
        for (int i = 0; i < totalSamples; i++)
764
253M
            adiLineBuffer[i] = dcValue;
765
766
        // Fill top-left sample
767
10.6M
        const pixel* adiTemp = adiOrigin - picStride - 1;
768
10.6M
        adi = adiLineBuffer + (leftUnits * unitHeight);
769
10.6M
        pNeighborFlags = bNeighborFlags + leftUnits;
770
10.6M
        if (*pNeighborFlags)
771
9.60M
        {
772
9.60M
            pixel topLeftVal = adiTemp[0];
773
37.9M
            for (int i = 0; i < unitWidth; i++)
774
28.3M
                adi[i] = topLeftVal;
775
9.60M
        }
776
777
        // Fill left & below-left samples
778
10.6M
        adiTemp += picStride;
779
10.6M
        adi--;
780
        // NOTE: over copy here, but reduce condition operators
781
121M
        for (int j = 0; j < leftUnits * unitHeight; j++)
782
111M
        {
783
111M
            adi[-j] = adiTemp[j * picStride];
784
111M
        }
785
786
        // Fill above & above-right samples
787
10.6M
        adiTemp = adiOrigin - picStride;
788
10.6M
        adi = adiLineBuffer + (leftUnits * unitHeight) + unitWidth;
789
        // NOTE: over copy here, but reduce condition operators
790
10.6M
        memcpy(adi, adiTemp, aboveUnits * unitWidth * sizeof(*adiTemp));
791
792
        // Pad reference samples when necessary
793
10.6M
        int curr = 0;
794
10.6M
        int next = 1;
795
10.6M
        adi = adiLineBuffer;
796
10.6M
        int pAdiLineTopRowOffset = leftUnits * (unitHeight - unitWidth);
797
10.6M
        if (!bNeighborFlags[0])
798
10.4M
        {
799
            // very bottom unit of bottom-left; at least one unit will be valid.
800
21.9M
            while (next < totalUnits && !bNeighborFlags[next])
801
11.5M
                next++;
802
803
10.4M
            pixel* pAdiLineNext = adiLineBuffer + ((next < leftUnits) ? (next * unitHeight) : (pAdiLineTopRowOffset + (next * unitWidth)));
804
10.4M
            const pixel refSample = *pAdiLineNext;
805
            // Pad unavailable samples with new value
806
10.4M
            int nextOrTop = X265_MIN(next, leftUnits);
807
808
            // fill left column
809
#if HIGH_BIT_DEPTH
810
            while (curr < nextOrTop)
811
            {
812
                for (int i = 0; i < unitHeight; i++)
813
                    adi[i] = refSample;
814
815
                adi += unitHeight;
816
                curr++;
817
            }
818
819
            // fill top row
820
            while (curr < next)
821
            {
822
                for (int i = 0; i < unitWidth; i++)
823
                    adi[i] = refSample;
824
825
                adi += unitWidth;
826
                curr++;
827
            }
828
#else
829
10.4M
            X265_CHECK(curr <= nextOrTop, "curr must be less than or equal to nextOrTop\n");
830
10.4M
            if (curr < nextOrTop)
831
10.4M
            {
832
10.4M
                const int fillSize = unitHeight * (nextOrTop - curr);
833
10.4M
                memset(adi, refSample, fillSize * sizeof(pixel));
834
10.4M
                curr = nextOrTop;
835
10.4M
                adi += fillSize;
836
10.4M
            }
837
838
10.4M
            if (curr < next)
839
580k
            {
840
580k
                const int fillSize = unitWidth * (next - curr);
841
580k
                memset(adi, refSample, fillSize * sizeof(pixel));
842
580k
                curr = next;
843
580k
                adi += fillSize;
844
580k
            }
845
10.4M
#endif
846
10.4M
        }
847
848
        // pad all other reference samples.
849
83.0M
        while (curr < totalUnits)
850
72.4M
        {
851
72.4M
            if (!bNeighborFlags[curr]) // samples not available
852
12.5M
            {
853
12.5M
                int numSamplesInCurrUnit = (curr >= leftUnits) ? unitWidth : unitHeight;
854
12.5M
                const pixel refSample = *(adi - 1);
855
46.1M
                for (int i = 0; i < numSamplesInCurrUnit; i++)
856
33.5M
                    adi[i] = refSample;
857
858
12.5M
                adi += numSamplesInCurrUnit;
859
12.5M
                curr++;
860
12.5M
            }
861
59.8M
            else
862
59.8M
            {
863
59.8M
                adi += (curr >= leftUnits) ? unitWidth : unitHeight;
864
59.8M
                curr++;
865
59.8M
            }
866
72.4M
        }
867
868
        // Copy processed samples
869
10.6M
        adi = adiLineBuffer + refSize + unitWidth - 2;
870
10.6M
        memcpy(dst, adi, refSize * sizeof(pixel));
871
872
10.6M
        adi = adiLineBuffer + refSize - 1;
873
122M
        for (int i = 0; i < (int)refSize - 1; i++)
874
112M
            dst[i + refSize] = adi[-(i + 1)];
875
10.6M
    }
876
14.6M
}
877
878
template<bool cip>
879
bool Predict::isAboveLeftAvailable(const CUData& cu, uint32_t partIdxLT)
880
10.8M
{
881
10.8M
    uint32_t partAboveLeft;
882
10.8M
    const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, partIdxLT);
883
884
10.8M
    return cuAboveLeft && (!cip || cuAboveLeft->isIntra(partAboveLeft));
885
10.8M
}
bool x265::Predict::isAboveLeftAvailable<false>(x265::CUData const&, unsigned int)
Line
Count
Source
880
10.8M
{
881
10.8M
    uint32_t partAboveLeft;
882
10.8M
    const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, partIdxLT);
883
884
10.8M
    return cuAboveLeft && (!cip || cuAboveLeft->isIntra(partAboveLeft));
885
10.8M
}
Unexecuted instantiation: bool x265::Predict::isAboveLeftAvailable<true>(x265::CUData const&, unsigned int)
886
887
template<bool cip>
888
int Predict::isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags)
889
10.8M
{
890
10.8M
    const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
891
10.8M
    const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT];
892
10.8M
    const uint32_t idxStep = 1;
893
10.8M
    int numIntra = 0;
894
895
29.5M
    for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags++)
896
18.6M
    {
897
18.6M
        uint32_t partAbove;
898
18.6M
        const CUData* cuAbove = cu.getPUAbove(partAbove, g_rasterToZscan[rasterPart]);
899
18.6M
        if (cuAbove && (!cip || cuAbove->isIntra(partAbove)))
900
17.5M
        {
901
17.5M
            numIntra++;
902
17.5M
            *bValidFlags = true;
903
17.5M
        }
904
1.07M
        else
905
1.07M
            *bValidFlags = false;
906
18.6M
    }
907
908
10.8M
    return numIntra;
909
10.8M
}
int x265::Predict::isAboveAvailable<false>(x265::CUData const&, unsigned int, unsigned int, bool*)
Line
Count
Source
889
10.8M
{
890
10.8M
    const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
891
10.8M
    const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT];
892
10.8M
    const uint32_t idxStep = 1;
893
10.8M
    int numIntra = 0;
894
895
29.5M
    for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags++)
896
18.6M
    {
897
18.6M
        uint32_t partAbove;
898
18.6M
        const CUData* cuAbove = cu.getPUAbove(partAbove, g_rasterToZscan[rasterPart]);
899
18.6M
        if (cuAbove && (!cip || cuAbove->isIntra(partAbove)))
900
17.5M
        {
901
17.5M
            numIntra++;
902
17.5M
            *bValidFlags = true;
903
17.5M
        }
904
1.07M
        else
905
1.07M
            *bValidFlags = false;
906
18.6M
    }
907
908
10.8M
    return numIntra;
909
10.8M
}
Unexecuted instantiation: int x265::Predict::isAboveAvailable<true>(x265::CUData const&, unsigned int, unsigned int, bool*)
910
911
template<bool cip>
912
int Predict::isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags)
913
10.8M
{
914
10.8M
    const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
915
10.8M
    const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB];
916
10.8M
    const uint32_t idxStep = RASTER_SIZE;
917
10.8M
    int numIntra = 0;
918
919
29.4M
    for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags--) // opposite direction
920
18.6M
    {
921
18.6M
        uint32_t partLeft;
922
18.6M
        const CUData* cuLeft = cu.getPULeft(partLeft, g_rasterToZscan[rasterPart]);
923
18.6M
        if (cuLeft && (!cip || cuLeft->isIntra(partLeft)))
924
17.6M
        {
925
17.6M
            numIntra++;
926
17.6M
            *bValidFlags = true;
927
17.6M
        }
928
1.01M
        else
929
1.01M
            *bValidFlags = false;
930
18.6M
    }
931
932
10.8M
    return numIntra;
933
10.8M
}
int x265::Predict::isLeftAvailable<false>(x265::CUData const&, unsigned int, unsigned int, bool*)
Line
Count
Source
913
10.8M
{
914
10.8M
    const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
915
10.8M
    const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB];
916
10.8M
    const uint32_t idxStep = RASTER_SIZE;
917
10.8M
    int numIntra = 0;
918
919
29.4M
    for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags--) // opposite direction
920
18.6M
    {
921
18.6M
        uint32_t partLeft;
922
18.6M
        const CUData* cuLeft = cu.getPULeft(partLeft, g_rasterToZscan[rasterPart]);
923
18.6M
        if (cuLeft && (!cip || cuLeft->isIntra(partLeft)))
924
17.6M
        {
925
17.6M
            numIntra++;
926
17.6M
            *bValidFlags = true;
927
17.6M
        }
928
1.01M
        else
929
1.01M
            *bValidFlags = false;
930
18.6M
    }
931
932
10.8M
    return numIntra;
933
10.8M
}
Unexecuted instantiation: int x265::Predict::isLeftAvailable<true>(x265::CUData const&, unsigned int, unsigned int, bool*)
934
935
template<bool cip>
936
int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxRT, bool* bValidFlags, uint32_t numUnits)
937
10.8M
{
938
10.8M
    int numIntra = 0;
939
940
29.4M
    for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags++)
941
18.6M
    {
942
18.6M
        uint32_t partAboveRight;
943
18.6M
        const CUData* cuAboveRight = cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset);
944
18.6M
        if (cuAboveRight && (!cip || cuAboveRight->isIntra(partAboveRight)))
945
11.6M
        {
946
11.6M
            numIntra++;
947
11.6M
            *bValidFlags = true;
948
11.6M
        }
949
7.03M
        else
950
7.03M
            *bValidFlags = false;
951
18.6M
    }
952
953
10.8M
    return numIntra;
954
10.8M
}
int x265::Predict::isAboveRightAvailable<false>(x265::CUData const&, unsigned int, bool*, unsigned int)
Line
Count
Source
937
10.8M
{
938
10.8M
    int numIntra = 0;
939
940
29.4M
    for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags++)
941
18.6M
    {
942
18.6M
        uint32_t partAboveRight;
943
18.6M
        const CUData* cuAboveRight = cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset);
944
18.6M
        if (cuAboveRight && (!cip || cuAboveRight->isIntra(partAboveRight)))
945
11.6M
        {
946
11.6M
            numIntra++;
947
11.6M
            *bValidFlags = true;
948
11.6M
        }
949
7.03M
        else
950
7.03M
            *bValidFlags = false;
951
18.6M
    }
952
953
10.8M
    return numIntra;
954
10.8M
}
Unexecuted instantiation: int x265::Predict::isAboveRightAvailable<true>(x265::CUData const&, unsigned int, bool*, unsigned int)
955
956
template<bool cip>
957
int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLB, bool* bValidFlags, uint32_t numUnits)
958
10.8M
{
959
10.8M
    int numIntra = 0;
960
961
29.4M
    for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags--) // opposite direction
962
18.6M
    {
963
18.6M
        uint32_t partBelowLeft;
964
18.6M
        const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, partIdxLB, offset);
965
18.6M
        if (cuBelowLeft && (!cip || cuBelowLeft->isIntra(partBelowLeft)))
966
5.29M
        {
967
5.29M
            numIntra++;
968
5.29M
            *bValidFlags = true;
969
5.29M
        }
970
13.3M
        else
971
13.3M
            *bValidFlags = false;
972
18.6M
    }
973
974
10.8M
    return numIntra;
975
10.8M
}
int x265::Predict::isBelowLeftAvailable<false>(x265::CUData const&, unsigned int, bool*, unsigned int)
Line
Count
Source
958
10.8M
{
959
10.8M
    int numIntra = 0;
960
961
29.4M
    for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags--) // opposite direction
962
18.6M
    {
963
18.6M
        uint32_t partBelowLeft;
964
18.6M
        const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, partIdxLB, offset);
965
18.6M
        if (cuBelowLeft && (!cip || cuBelowLeft->isIntra(partBelowLeft)))
966
5.29M
        {
967
5.29M
            numIntra++;
968
5.29M
            *bValidFlags = true;
969
5.29M
        }
970
13.3M
        else
971
13.3M
            *bValidFlags = false;
972
18.6M
    }
973
974
10.8M
    return numIntra;
975
10.8M
}
Unexecuted instantiation: int x265::Predict::isBelowLeftAvailable<true>(x265::CUData const&, unsigned int, bool*, unsigned int)