Coverage Report

Created: 2025-07-23 08:18

/src/x265/source/common/lowres.cpp
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
 * Copyright (C) 2013-2020 MulticoreWare, Inc
3
 *
4
 * Authors: Gopu Govindaswamy <gopu@multicorewareinc.com>
5
 *          Ashok Kumar Mishra <ashok@multicorewareinc.com>
6
 *
7
 * This program is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 2 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * This program is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
 *
21
 * This program is also available under a commercial proprietary license.
22
 * For more information, contact us at license @ x265.com.
23
 *****************************************************************************/
24
25
#include "picyuv.h"
26
#include "lowres.h"
27
#include "mv.h"
28
29
using namespace X265_NS;
30
31
/*
32
 * Down Sample input picture
33
 */
34
static
35
void frame_lowres_core(const pixel* src0, pixel* dst0,
36
    intptr_t src_stride, intptr_t dst_stride, int width, int height)
37
0
{
38
0
    for (int y = 0; y < height; y++)
39
0
    {
40
0
        const pixel* src1 = src0 + src_stride;
41
0
        for (int x = 0; x < width; x++)
42
0
        {
43
            // slower than naive bilinear, but matches asm
44
0
#define FILTER(a, b, c, d) ((((a + b + 1) >> 1) + ((c + d + 1) >> 1) + 1) >> 1)
45
0
            dst0[x] = FILTER(src0[2 * x], src1[2 * x], src0[2 * x + 1], src1[2 * x + 1]);
46
0
#undef FILTER
47
0
        }
48
0
        src0 += src_stride * 2;
49
0
        dst0 += dst_stride;
50
0
    }
51
0
}
52
53
bool PicQPAdaptationLayer::create(uint32_t width, uint32_t height, uint32_t partWidth, uint32_t partHeight, uint32_t numAQPartInWidthExt, uint32_t numAQPartInHeightExt)
54
0
{
55
0
    aqPartWidth = partWidth;
56
0
    aqPartHeight = partHeight;
57
0
    numAQPartInWidth = (width + partWidth - 1) / partWidth;
58
0
    numAQPartInHeight = (height + partHeight - 1) / partHeight;
59
60
0
    CHECKED_MALLOC_ZERO(dActivity, double, numAQPartInWidthExt * numAQPartInHeightExt);
61
0
    CHECKED_MALLOC_ZERO(dQpOffset, double, numAQPartInWidthExt * numAQPartInHeightExt);
62
0
    CHECKED_MALLOC_ZERO(dCuTreeOffset, double, numAQPartInWidthExt * numAQPartInHeightExt);
63
64
0
    if (bQpSize)
65
0
        CHECKED_MALLOC_ZERO(dCuTreeOffset8x8, double, numAQPartInWidthExt * numAQPartInHeightExt);
66
67
0
    return true;
68
0
fail:
69
0
    return false;
70
0
}
71
72
bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize)
73
0
{
74
0
    isLowres = true;
75
0
    bframes = param->bframes;
76
0
    widthFullRes = origPic->m_picWidth;
77
0
    heightFullRes = origPic->m_picHeight;
78
0
    width = origPic->m_picWidth / 2;
79
0
    lines = origPic->m_picHeight / 2;
80
0
    bEnableHME = param->bEnableHME ? 1 : 0;
81
0
    lumaStride = width + 2 * origPic->m_lumaMarginX;
82
0
    if (lumaStride & 31)
83
0
        lumaStride += 32 - (lumaStride & 31);
84
0
    maxBlocksInRow = (width + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
85
0
    maxBlocksInCol = (lines + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
86
0
    maxBlocksInRowFullRes = maxBlocksInRow * 2;
87
0
    maxBlocksInColFullRes = maxBlocksInCol * 2;
88
0
    int cuCount = maxBlocksInRow * maxBlocksInCol;
89
0
    int cuCountFullRes = (qgSize > 8) ? cuCount : cuCount << 2;
90
0
    isHMELowres = param->bEnableHME ? 1 : 0;
91
92
    /* rounding the width to multiple of lowres CU size */
93
0
    width = maxBlocksInRow * X265_LOWRES_CU_SIZE;
94
0
    lines = maxBlocksInCol * X265_LOWRES_CU_SIZE;
95
96
0
    size_t planesize = lumaStride * (lines + 2 * origPic->m_lumaMarginY);
97
0
    size_t padoffset = lumaStride * origPic->m_lumaMarginY + origPic->m_lumaMarginX;
98
0
    if (!!param->rc.aqMode || !!param->rc.hevcAq || !!param->bAQMotion || !!param->bEnableWeightedPred || !!param->bEnableWeightedBiPred)
99
0
    {
100
0
        CHECKED_MALLOC_ZERO(qpAqOffset, double, cuCountFullRes);
101
0
        CHECKED_MALLOC_ZERO(invQscaleFactor, int, cuCountFullRes);
102
0
        CHECKED_MALLOC_ZERO(qpCuTreeOffset, double, cuCountFullRes);
103
0
        if (qgSize == 8)
104
0
            CHECKED_MALLOC_ZERO(invQscaleFactor8x8, int, cuCount);
105
0
        CHECKED_MALLOC_ZERO(edgeInclined, int, cuCountFullRes);
106
0
    }
107
108
0
    if (origPic->m_param->bAQMotion)
109
0
        CHECKED_MALLOC_ZERO(qpAqMotionOffset, double, cuCountFullRes);
110
0
    if (origPic->m_param->bDynamicRefine || origPic->m_param->bEnableFades)
111
0
        CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes);
112
113
0
    if (!!param->rc.hevcAq)
114
0
    {
115
0
        m_maxCUSize = param->maxCUSize;
116
0
        m_qgSize = qgSize;
117
118
0
        uint32_t partWidth, partHeight, nAQPartInWidth, nAQPartInHeight;
119
120
0
        pAQLayer = new PicQPAdaptationLayer[4];
121
0
        maxAQDepth = 0;
122
0
        for (uint32_t d = 0; d < 4; d++)
123
0
        {
124
0
            int ctuSizeIdx = 6 - g_log2Size[param->maxCUSize];
125
0
            int aqDepth = g_log2Size[param->maxCUSize] - g_log2Size[qgSize];
126
0
            if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
127
0
                continue;
128
129
0
            pAQLayer->minAQDepth = d;
130
0
            partWidth = param->maxCUSize >> d;
131
0
            partHeight = param->maxCUSize >> d;
132
133
0
            if (minAQSize[ctuSizeIdx] == d)
134
0
            {
135
0
                pAQLayer[d].bQpSize = true;
136
0
                nAQPartInWidth = maxBlocksInRow * 2;
137
0
                nAQPartInHeight = maxBlocksInCol * 2;
138
0
            }
139
0
            else
140
0
            {
141
0
                pAQLayer[d].bQpSize = false;
142
0
                nAQPartInWidth = (origPic->m_picWidth + partWidth - 1) / partWidth;
143
0
                nAQPartInHeight = (origPic->m_picHeight + partHeight - 1) / partHeight;
144
0
            }
145
146
0
            maxAQDepth++;
147
148
0
            pAQLayer[d].create(origPic->m_picWidth, origPic->m_picHeight, partWidth, partHeight, nAQPartInWidth, nAQPartInHeight);
149
0
        }
150
0
    }
151
0
    CHECKED_MALLOC(propagateCost, uint16_t, cuCount);
152
153
    /* allocate lowres buffers */
154
0
    CHECKED_MALLOC_ZERO(buffer[0], pixel, 4 * planesize);
155
156
0
    buffer[1] = buffer[0] + planesize;
157
0
    buffer[2] = buffer[1] + planesize;
158
0
    buffer[3] = buffer[2] + planesize;
159
160
0
    lowresPlane[0] = buffer[0] + padoffset;
161
0
    lowresPlane[1] = buffer[1] + padoffset;
162
0
    lowresPlane[2] = buffer[2] + padoffset;
163
0
    lowresPlane[3] = buffer[3] + padoffset;
164
165
0
    if (bEnableHME || param->bEnableTemporalFilter)
166
0
    {
167
0
        intptr_t lumaStrideHalf = lumaStride / 2;
168
0
        if (lumaStrideHalf & 31)
169
0
            lumaStrideHalf += 32 - (lumaStrideHalf & 31);
170
0
        size_t planesizeHalf = planesize / 2;
171
0
        size_t padoffsetHalf = padoffset / 2;
172
        /* allocate lower-res buffers */
173
0
        CHECKED_MALLOC_ZERO(lowerResBuffer[0], pixel, 4 * planesizeHalf);
174
175
0
        lowerResBuffer[1] = lowerResBuffer[0] + planesizeHalf;
176
0
        lowerResBuffer[2] = lowerResBuffer[1] + planesizeHalf;
177
0
        lowerResBuffer[3] = lowerResBuffer[2] + planesizeHalf;
178
179
0
        lowerResPlane[0] = lowerResBuffer[0] + padoffsetHalf;
180
0
        lowerResPlane[1] = lowerResBuffer[1] + padoffsetHalf;
181
0
        lowerResPlane[2] = lowerResBuffer[2] + padoffsetHalf;
182
0
        lowerResPlane[3] = lowerResBuffer[3] + padoffsetHalf;
183
0
    }
184
185
0
    CHECKED_MALLOC(intraCost, int32_t, cuCount);
186
0
    CHECKED_MALLOC(intraMode, uint8_t, cuCount);
187
188
0
    for (int i = 0; i < bframes + 2; i++)
189
0
    {
190
0
        for (int j = 0; j < bframes + 2; j++)
191
0
        {
192
0
            CHECKED_MALLOC(rowSatds[i][j], int32_t, maxBlocksInCol);
193
0
            CHECKED_MALLOC(lowresCosts[i][j], uint16_t, cuCount);
194
0
        }
195
0
    }
196
197
0
    for (int i = 0; i < 4; i++)
198
0
    {
199
0
        CHECKED_MALLOC(lowresMcstfMvs[0][i], MV, cuCount);
200
0
    }
201
202
0
    for (int i = 0; i < bframes + 2; i++)
203
0
    {
204
0
        CHECKED_MALLOC(lowresMvs[0][i], MV, cuCount);
205
0
        CHECKED_MALLOC(lowresMvs[1][i], MV, cuCount);
206
0
        CHECKED_MALLOC(lowresMvCosts[0][i], int32_t, cuCount);
207
0
        CHECKED_MALLOC(lowresMvCosts[1][i], int32_t, cuCount);
208
0
        if (bEnableHME)
209
0
        {
210
0
            int maxBlocksInRowLowerRes = ((width/2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
211
0
            int maxBlocksInColLowerRes = ((lines/2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
212
0
            int cuCountLowerRes = maxBlocksInRowLowerRes * maxBlocksInColLowerRes;
213
0
            CHECKED_MALLOC(lowerResMvs[0][i], MV, cuCountLowerRes);
214
0
            CHECKED_MALLOC(lowerResMvs[1][i], MV, cuCountLowerRes);
215
0
            CHECKED_MALLOC(lowerResMvCosts[0][i], int32_t, cuCountLowerRes);
216
0
            CHECKED_MALLOC(lowerResMvCosts[1][i], int32_t, cuCountLowerRes);
217
0
        }
218
0
    }
219
220
0
    if (param->bHistBasedSceneCut)
221
0
    {
222
0
        quarterSampleLowResWidth = widthFullRes / 4;
223
0
        quarterSampleLowResHeight = heightFullRes / 4;
224
0
        quarterSampleLowResOriginX = 16;
225
0
        quarterSampleLowResOriginY = 16;
226
0
        quarterSampleLowResStrideY = quarterSampleLowResWidth + 2 * quarterSampleLowResOriginY;
227
228
0
        size_t quarterSampleLowResPlanesize = quarterSampleLowResStrideY * (quarterSampleLowResHeight + 2 * quarterSampleLowResOriginX);
229
        /* allocate quarter sampled lowres buffers */
230
0
        CHECKED_MALLOC_ZERO(quarterSampleLowResBuffer, pixel, quarterSampleLowResPlanesize);
231
232
        // Allocate memory for Histograms
233
0
        picHistogram = X265_MALLOC(uint32_t***, NUMBER_OF_SEGMENTS_IN_WIDTH * sizeof(uint32_t***));
234
0
        picHistogram[0] = X265_MALLOC(uint32_t**, NUMBER_OF_SEGMENTS_IN_WIDTH * NUMBER_OF_SEGMENTS_IN_HEIGHT);
235
0
        for (uint32_t wd = 1; wd < NUMBER_OF_SEGMENTS_IN_WIDTH; wd++) {
236
0
            picHistogram[wd] = picHistogram[0] + wd * NUMBER_OF_SEGMENTS_IN_HEIGHT;
237
0
        }
238
239
0
        for (uint32_t regionInPictureWidthIndex = 0; regionInPictureWidthIndex < NUMBER_OF_SEGMENTS_IN_WIDTH; regionInPictureWidthIndex++)
240
0
        {
241
0
            for (uint32_t regionInPictureHeightIndex = 0; regionInPictureHeightIndex < NUMBER_OF_SEGMENTS_IN_HEIGHT; regionInPictureHeightIndex++)
242
0
            {
243
0
                picHistogram[regionInPictureWidthIndex][regionInPictureHeightIndex] = X265_MALLOC(uint32_t*, NUMBER_OF_SEGMENTS_IN_WIDTH *sizeof(uint32_t*));
244
0
                picHistogram[regionInPictureWidthIndex][regionInPictureHeightIndex][0] = X265_MALLOC(uint32_t, 3 * HISTOGRAM_NUMBER_OF_BINS * sizeof(uint32_t));
245
0
                for (uint32_t wd = 1; wd < 3; wd++) {
246
0
                    picHistogram[regionInPictureWidthIndex][regionInPictureHeightIndex][wd] = picHistogram[regionInPictureWidthIndex][regionInPictureHeightIndex][0] + wd * HISTOGRAM_NUMBER_OF_BINS;
247
0
                }
248
0
            }
249
0
        }
250
0
    }
251
252
0
    return true;
253
254
0
fail:
255
0
    return false;
256
0
}
257
258
void Lowres::destroy(x265_param* param)
259
0
{
260
0
    X265_FREE(buffer[0]);
261
0
    if(bEnableHME || param->bEnableTemporalFilter)
262
0
        X265_FREE(lowerResBuffer[0]);
263
0
    X265_FREE(intraCost);
264
0
    X265_FREE(intraMode);
265
266
0
    for (int i = 0; i < bframes + 2; i++)
267
0
    {
268
0
        for (int j = 0; j < bframes + 2; j++)
269
0
        {
270
0
            X265_FREE(rowSatds[i][j]);
271
0
            X265_FREE(lowresCosts[i][j]);
272
0
        }
273
0
    }
274
275
0
    for (int i = 0; i < bframes + 2; i++)
276
0
    {
277
0
        X265_FREE(lowresMvs[0][i]);
278
0
        X265_FREE(lowresMvs[1][i]);
279
0
        X265_FREE(lowresMvCosts[0][i]);
280
0
        X265_FREE(lowresMvCosts[1][i]);
281
0
        if (bEnableHME)
282
0
        {
283
0
            X265_FREE(lowerResMvs[0][i]);
284
0
            X265_FREE(lowerResMvs[1][i]);
285
0
            X265_FREE(lowerResMvCosts[0][i]);
286
0
            X265_FREE(lowerResMvCosts[1][i]);
287
0
        }
288
0
    }
289
290
0
    for (int i = 0; i < 4; i++)
291
0
    {
292
0
        X265_FREE(lowresMcstfMvs[0][i]);
293
0
    }
294
0
    X265_FREE(qpAqOffset);
295
0
    X265_FREE(invQscaleFactor);
296
0
    X265_FREE(qpCuTreeOffset);
297
0
    X265_FREE(propagateCost);
298
0
    X265_FREE(invQscaleFactor8x8);
299
0
    X265_FREE(edgeInclined);
300
0
    X265_FREE(qpAqMotionOffset);
301
0
    if (param->bDynamicRefine || param->bEnableFades)
302
0
        X265_FREE(blockVariance);
303
0
    if (maxAQDepth > 0)
304
0
    {
305
0
        for (uint32_t d = 0; d < 4; d++)
306
0
        {
307
0
            int ctuSizeIdx = 6 - g_log2Size[m_maxCUSize];
308
0
            int aqDepth = g_log2Size[m_maxCUSize] - g_log2Size[m_qgSize];
309
0
            if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
310
0
                continue;
311
312
0
            X265_FREE(pAQLayer[d].dActivity);
313
0
            X265_FREE(pAQLayer[d].dQpOffset);
314
0
            X265_FREE(pAQLayer[d].dCuTreeOffset);
315
316
0
            if (pAQLayer[d].bQpSize == true)
317
0
                X265_FREE(pAQLayer[d].dCuTreeOffset8x8);
318
0
        }
319
320
0
        delete[] pAQLayer;
321
0
    }
322
323
    // Histograms
324
0
    if (param->bHistBasedSceneCut)
325
0
    {
326
0
        for (uint32_t segmentInFrameWidthIdx = 0; segmentInFrameWidthIdx < NUMBER_OF_SEGMENTS_IN_WIDTH; segmentInFrameWidthIdx++)
327
0
        {
328
0
            if (picHistogram[segmentInFrameWidthIdx])
329
0
            {
330
0
                for (uint32_t segmentInFrameHeightIdx = 0; segmentInFrameHeightIdx < NUMBER_OF_SEGMENTS_IN_HEIGHT; segmentInFrameHeightIdx++)
331
0
                {
332
0
                    if (picHistogram[segmentInFrameWidthIdx][segmentInFrameHeightIdx])
333
0
                        X265_FREE(picHistogram[segmentInFrameWidthIdx][segmentInFrameHeightIdx][0]);
334
0
                    X265_FREE(picHistogram[segmentInFrameWidthIdx][segmentInFrameHeightIdx]);
335
0
                }
336
0
            }
337
0
        }
338
0
        if (picHistogram)
339
0
            X265_FREE(picHistogram[0]);
340
0
        X265_FREE(picHistogram);
341
342
0
        X265_FREE(quarterSampleLowResBuffer);
343
344
0
    }
345
0
}
346
// (re) initialize lowres state
347
void Lowres::init(PicYuv *origPic, int poc)
348
0
{
349
0
    bLastMiniGopBFrame = false;
350
0
    bKeyframe = false; // Not a keyframe unless identified by lookahead
351
0
    bIsFadeEnd = false;
352
0
    frameNum = poc;
353
0
    leadingBframes = 0;
354
0
    indB = 0;
355
0
    memset(costEst, -1, sizeof(costEst));
356
0
    memset(weightedCostDelta, 0, sizeof(weightedCostDelta));
357
358
0
    if (qpAqOffset && invQscaleFactor)
359
0
        memset(costEstAq, -1, sizeof(costEstAq));
360
361
0
    for (int y = 0; y < bframes + 2; y++)
362
0
        for (int x = 0; x < bframes + 2; x++)
363
0
            rowSatds[y][x][0] = -1;
364
365
0
    for (int i = 0; i < bframes + 2; i++)
366
0
    {
367
0
        lowresMvs[0][i][0].x = 0x7FFF;
368
0
        lowresMvs[1][i][0].x = 0x7FFF;
369
0
    }
370
371
0
    for (int i = 0; i < 4; i++)
372
0
    {
373
0
        lowresMcstfMvs[0][i][0].x = 0x7FFF;
374
0
    }
375
376
0
    for (int i = 0; i < bframes + 2; i++)
377
0
        intraMbs[i] = 0;
378
0
    if (origPic->m_param->rc.vbvBufferSize)
379
0
        for (int i = 0; i < X265_LOOKAHEAD_MAX + 1; i++)
380
0
            plannedType[i] = X265_TYPE_AUTO;
381
382
    /* downscale and generate 4 hpel planes for lookahead */
383
0
    primitives.frameInitLowres(origPic->m_picOrg[0],
384
0
                               lowresPlane[0], lowresPlane[1], lowresPlane[2], lowresPlane[3],
385
0
                               origPic->m_stride, lumaStride, width, lines);
386
387
    /* extend hpel planes for motion search */
388
0
    extendPicBorder(lowresPlane[0], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
389
0
    extendPicBorder(lowresPlane[1], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
390
0
    extendPicBorder(lowresPlane[2], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
391
0
    extendPicBorder(lowresPlane[3], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
392
    
393
0
    if (origPic->m_param->bEnableHME || origPic->m_param->bEnableTemporalFilter)
394
0
    {
395
0
        primitives.frameInitLowerRes(lowresPlane[0],
396
0
            lowerResPlane[0], lowerResPlane[1], lowerResPlane[2], lowerResPlane[3],
397
0
            lumaStride, lumaStride/2, (width / 2), (lines / 2));
398
0
        extendPicBorder(lowerResPlane[0], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
399
0
        extendPicBorder(lowerResPlane[1], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
400
0
        extendPicBorder(lowerResPlane[2], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
401
0
        extendPicBorder(lowerResPlane[3], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
402
0
        fpelLowerResPlane[0] = lowerResPlane[0];
403
0
    }
404
405
0
    fpelPlane[0] = lowresPlane[0];
406
407
0
    if (origPic->m_param->bHistBasedSceneCut)
408
0
    {
409
        // Quarter Sampled Input Picture Formation
410
        // TO DO: Replace with ASM function
411
0
        frame_lowres_core(
412
0
            lowresPlane[0],
413
0
            quarterSampleLowResBuffer + quarterSampleLowResOriginX + quarterSampleLowResOriginY * quarterSampleLowResStrideY,
414
0
            lumaStride,
415
0
            quarterSampleLowResStrideY,
416
0
            widthFullRes / 4, heightFullRes / 4);
417
0
    }
418
0
    int cuCount = maxBlocksInRow * maxBlocksInCol;
419
0
    int cuCountFullRes = (origPic->m_param->rc.qgSize > 8) ? cuCount : cuCount << 2;
420
0
    memset(intraCost, 0, sizeof(int32_t) * cuCount);
421
0
    if (!!origPic->m_param->rc.aqMode || !!origPic->m_param->rc.hevcAq || !!origPic->m_param->bAQMotion || !!origPic->m_param->bEnableWeightedPred || !!origPic->m_param->bEnableWeightedBiPred)
422
0
        {
423
0
        memset(qpAqOffset, 0, sizeof(double) * cuCountFullRes);
424
0
        memset(qpCuTreeOffset, 0,sizeof(double) * cuCountFullRes);
425
0
        memset(edgeInclined, 0, sizeof(int) * cuCountFullRes);
426
0
        }
427
0
     if (origPic->m_param->bAQMotion)
428
0
        memset(qpAqMotionOffset, 0, sizeof(double) * cuCountFullRes);
429
0
}