Coverage Report

Created: 2025-07-23 08:18

/src/x265/source/encoder/frameencoder.cpp
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
 * Copyright (C) 2013-2020 MulticoreWare, Inc
3
 *
4
 * Authors: Chung Shin Yee <shinyee@multicorewareinc.com>
5
 *          Min Chen <chenm003@163.com>
6
 *          Steve Borho <steve@borho.org>
7
 *
8
 * This program is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with this program; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
21
 *
22
 * This program is also available under a commercial proprietary license.
23
 * For more information, contact us at license @ x265.com.
24
 *****************************************************************************/
25
26
#include "common.h"
27
#include "frame.h"
28
#include "framedata.h"
29
#include "wavefront.h"
30
#include "param.h"
31
32
#include "encoder.h"
33
#include "frameencoder.h"
34
#include "common.h"
35
#include "slicetype.h"
36
#include "nal.h"
37
#include "temporalfilter.h"
38
39
namespace X265_NS {
40
void weightAnalyse(Slice& slice, Frame& frame, x265_param& param);
41
42
FrameEncoder::FrameEncoder()
43
0
{
44
0
    m_reconfigure = false;
45
0
    m_isFrameEncoder = true;
46
0
    m_threadActive = true;
47
0
    m_activeWorkerCount = 0;
48
0
    m_completionCount = 0;
49
0
    m_outStreams = NULL;
50
0
    m_backupStreams = NULL;
51
0
    m_substreamSizes = NULL;
52
0
    m_nr = NULL;
53
0
    m_tld = NULL;
54
0
    m_rows = NULL;
55
0
    m_top = NULL;
56
0
    m_param = NULL;
57
0
    m_cuGeoms = NULL;
58
0
    m_ctuGeomMap = NULL;
59
0
    m_localTldIdx = 0;
60
0
    memset(&m_rce, 0, sizeof(RateControlEntry));
61
0
    for (int layer = 0; layer < MAX_LAYERS; layer++)
62
0
    {
63
0
        m_prevOutputTime[layer] = x265_mdate();
64
0
        m_slicetypeWaitTime[layer] = 0;
65
0
        m_frame[layer] = NULL;
66
0
    }
67
0
}
68
69
void FrameEncoder::destroy()
70
0
{
71
0
    if (m_pool)
72
0
    {
73
0
        if (!m_jpId)
74
0
        {
75
0
            int numTLD = m_pool->m_numWorkers;
76
0
            if (!m_param->bEnableWavefront)
77
0
                numTLD += m_pool->m_numProviders;
78
0
            for (int i = 0; i < numTLD; i++)
79
0
                m_tld[i].destroy();
80
0
            delete [] m_tld;
81
0
        }
82
0
    }
83
0
    else
84
0
    {
85
0
        m_tld->destroy();
86
0
        delete m_tld;
87
0
    }
88
89
0
    delete[] m_rows;
90
0
    delete[] m_outStreams;
91
0
    delete[] m_backupStreams;
92
0
    X265_FREE(m_sliceBaseRow);
93
0
    X265_FREE((void*)m_bAllRowsStop);
94
0
    X265_FREE((void*)m_vbvResetTriggerRow);
95
0
    X265_FREE(m_sliceMaxBlockRow);
96
0
    X265_FREE(m_cuGeoms);
97
0
    X265_FREE(m_ctuGeomMap);
98
0
    X265_FREE(m_substreamSizes);
99
0
    X265_FREE(m_nr);
100
0
    X265_FREE(m_retFrameBuffer);
101
102
0
    m_frameFilter.destroy();
103
104
0
    if (m_param->bEmitHRDSEI || !!m_param->interlaceMode)
105
0
    {
106
0
        delete m_rce.picTimingSEI;
107
0
        delete m_rce.hrdTiming;
108
0
    }
109
0
}
110
111
bool FrameEncoder::init(Encoder *top, int numRows, int numCols)
112
0
{
113
0
    m_top = top;
114
0
    m_param = top->m_param;
115
0
    m_numRows = numRows;
116
0
    m_numCols = numCols;
117
0
    m_reconfigure = false;
118
0
    m_filterRowDelay = ((m_param->bEnableSAO && m_param->bSaoNonDeblocked)
119
0
                        || (!m_param->bEnableLoopFilter && m_param->bEnableSAO)) ?
120
0
                        2 : (m_param->bEnableSAO || m_param->bEnableLoopFilter ? 1 : 0);
121
0
    m_filterRowDelayCus = m_filterRowDelay * numCols;
122
0
    m_rows = new CTURow[m_numRows];
123
0
    bool ok = !!m_numRows;
124
125
0
    m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
126
0
    m_bAllRowsStop = X265_MALLOC(bool, m_param->maxSlices);
127
0
    m_vbvResetTriggerRow = X265_MALLOC(int, m_param->maxSlices);
128
0
    ok &= !!m_sliceBaseRow;
129
0
    m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) / m_param->maxSlices;
130
0
    uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;    
131
0
    uint32_t rowSum = sliceGroupSizeAccu;
132
0
    uint32_t sidx = 0;
133
0
    for (uint32_t i = 0; i < m_numRows; i++)
134
0
    {
135
0
        const uint32_t rowRange = (rowSum >> 8);
136
0
        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
137
0
        {
138
0
            rowSum += sliceGroupSizeAccu;
139
0
            m_sliceBaseRow[++sidx] = i;
140
0
        }
141
0
    }
142
0
    X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
143
0
    m_sliceBaseRow[0] = 0;
144
0
    m_sliceBaseRow[m_param->maxSlices] = m_numRows;
145
146
0
    m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
147
0
    ok &= !!m_sliceMaxBlockRow;
148
0
    uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16;
149
0
    sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices;
150
0
    rowSum = sliceGroupSizeAccu;
151
0
    sidx = 0;
152
0
    for (uint32_t i = 0; i < maxBlockRows; i++)
153
0
    {
154
0
        const uint32_t rowRange = (rowSum >> 8);
155
0
        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
156
0
        {
157
0
            rowSum += sliceGroupSizeAccu;
158
0
            m_sliceMaxBlockRow[++sidx] = i;
159
0
        }
160
0
    }
161
0
    m_sliceMaxBlockRow[0] = 0;
162
0
    m_sliceMaxBlockRow[m_param->maxSlices] = maxBlockRows;
163
164
    /* determine full motion search range */
165
0
    int range  = m_param->searchRange;       /* fpel search */
166
0
    range += !!(m_param->searchMethod < 2);  /* diamond/hex range check lag */
167
0
    range += NTAPS_LUMA / 2;                 /* subpel filter half-length */
168
0
    range += 2 + (MotionEstimate::hpelIterationCount(m_param->subpelRefine) + 1) / 2; /* subpel refine steps */
169
0
    m_refLagRows = /*(m_param->maxSlices > 1 ? 1 : 0) +*/ 1 + ((range + m_param->maxCUSize - 1) / m_param->maxCUSize);
170
171
    // NOTE: 2 times of numRows because both Encoder and Filter in same queue
172
0
    if (!WaveFront::init(m_numRows * 2))
173
0
    {
174
0
        x265_log(m_param, X265_LOG_ERROR, "unable to initialize wavefront queue\n");
175
0
        m_pool = NULL;
176
0
    }
177
178
0
    m_frameFilter.init(top, this, numRows, numCols);
179
180
    // initialize HRD parameters of SPS
181
0
    if (m_param->bEmitHRDSEI || !!m_param->interlaceMode)
182
0
    {
183
0
        m_rce.picTimingSEI = new SEIPictureTiming;
184
0
        m_rce.hrdTiming = new HRDTiming;
185
186
0
        ok &= m_rce.picTimingSEI && m_rce.hrdTiming;
187
0
    }
188
189
0
    if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
190
0
        m_nr = X265_MALLOC(NoiseReduction, 1);
191
0
    if (m_nr)
192
0
        memset(m_nr, 0, sizeof(NoiseReduction));
193
0
    else
194
0
        m_param->noiseReductionIntra = m_param->noiseReductionInter = 0;
195
196
    // 7.4.7.1 - Ceil( Log2( PicSizeInCtbsY ) ) bits
197
0
    {
198
0
        unsigned long tmp;
199
0
        BSR(tmp, (numRows * numCols - 1));
200
0
        m_sliceAddrBits = (uint16_t)(tmp + 1);
201
0
    }
202
203
0
    m_retFrameBuffer = X265_MALLOC(Frame*, m_param->numLayers);
204
0
    for (int layer = 0; layer < m_param->numLayers; layer++)
205
0
        m_retFrameBuffer[layer] = NULL;
206
0
    return ok;
207
0
}
208
209
/* Generate a complete list of unique geom sets for the current picture dimensions */
210
bool FrameEncoder::initializeGeoms()
211
0
{
212
    /* Geoms only vary between CTUs in the presence of picture edges */
213
0
    int maxCUSize = m_param->maxCUSize;
214
0
    int minCUSize = m_param->minCUSize;
215
0
    int heightRem = m_param->sourceHeight & (maxCUSize - 1);
216
0
    int widthRem = m_param->sourceWidth & (maxCUSize - 1);
217
0
    int allocGeoms = 1; // body
218
0
    if (heightRem && widthRem)
219
0
        allocGeoms = 4; // body, right, bottom, corner
220
0
    else if (heightRem || widthRem)
221
0
        allocGeoms = 2; // body, right or bottom
222
223
0
    m_ctuGeomMap = X265_MALLOC(uint32_t, m_numRows * m_numCols);
224
0
    m_cuGeoms = X265_MALLOC(CUGeom, allocGeoms * CUGeom::MAX_GEOMS);
225
0
    if (!m_cuGeoms || !m_ctuGeomMap)
226
0
        return false;
227
228
    // body
229
0
    CUData::calcCTUGeoms(maxCUSize, maxCUSize, maxCUSize, minCUSize, m_cuGeoms);
230
0
    memset(m_ctuGeomMap, 0, sizeof(uint32_t) * m_numRows * m_numCols);
231
0
    if (allocGeoms == 1)
232
0
        return true;
233
234
0
    int countGeoms = 1;
235
0
    if (widthRem)
236
0
    {
237
        // right
238
0
        CUData::calcCTUGeoms(widthRem, maxCUSize, maxCUSize, minCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS);
239
0
        for (uint32_t i = 0; i < m_numRows; i++)
240
0
        {
241
0
            uint32_t ctuAddr = m_numCols * (i + 1) - 1;
242
0
            m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS;
243
0
        }
244
0
        countGeoms++;
245
0
    }
246
0
    if (heightRem)
247
0
    {
248
        // bottom
249
0
        CUData::calcCTUGeoms(maxCUSize, heightRem, maxCUSize, minCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS);
250
0
        for (uint32_t i = 0; i < m_numCols; i++)
251
0
        {
252
0
            uint32_t ctuAddr = m_numCols * (m_numRows - 1) + i;
253
0
            m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS;
254
0
        }
255
0
        countGeoms++;
256
257
0
        if (widthRem)
258
0
        {
259
            // corner
260
0
            CUData::calcCTUGeoms(widthRem, heightRem, maxCUSize, minCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS);
261
262
0
            uint32_t ctuAddr = m_numCols * m_numRows - 1;
263
0
            m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS;
264
0
            countGeoms++;
265
0
        }
266
0
        X265_CHECK(countGeoms == allocGeoms, "geometry match check failure\n");
267
0
    }
268
269
0
    return true;
270
0
}
271
272
bool FrameEncoder::startCompressFrame(Frame* curFrame[MAX_LAYERS])
273
0
{
274
0
    for (int layer = 0; layer < m_param->numLayers; layer++)
275
0
    {
276
0
        m_slicetypeWaitTime[layer] = x265_mdate() - m_prevOutputTime[layer];
277
0
        m_frame[layer] = curFrame[layer];
278
0
        curFrame[layer]->m_encData->m_frameEncoderID = m_jpId;
279
0
        curFrame[layer]->m_encData->m_jobProvider = this;
280
0
        curFrame[layer]->m_encData->m_slice->m_mref = m_mref;
281
0
    }
282
0
    m_sliceType = curFrame[0]->m_lowres.sliceType;
283
284
0
    if (!m_cuGeoms)
285
0
    {
286
0
        if (!initializeGeoms())
287
0
            return false;
288
0
    }
289
290
0
    m_enable.trigger();
291
0
    return true;
292
0
}
293
294
void FrameEncoder::threadMain()
295
0
{
296
0
    THREAD_NAME("Frame", m_jpId);
297
298
0
    if (m_pool)
299
0
    {
300
0
        m_pool->setCurrentThreadAffinity();
301
302
        /* the first FE on each NUMA node is responsible for allocating thread
303
         * local data for all worker threads in that pool. If WPP is disabled, then
304
         * each FE also needs a TLD instance */
305
0
        if (!m_jpId)
306
0
        {
307
0
            int numTLD = m_pool->m_numWorkers;
308
0
            if (!m_param->bEnableWavefront)
309
0
                numTLD += m_pool->m_numProviders;
310
311
0
            m_tld = new ThreadLocalData[numTLD];
312
0
            for (int i = 0; i < numTLD; i++)
313
0
            {
314
0
                m_tld[i].analysis.initSearch(*m_param, m_top->m_scalingList);
315
0
                m_tld[i].analysis.create(m_tld);
316
0
            }
317
318
0
            for (int i = 0; i < m_pool->m_numProviders; i++)
319
0
            {
320
0
                if (m_pool->m_jpTable[i]->m_isFrameEncoder) /* ugh; over-allocation and other issues here */
321
0
                {
322
0
                    FrameEncoder *peer = dynamic_cast<FrameEncoder*>(m_pool->m_jpTable[i]);
323
0
                    peer->m_tld = m_tld;
324
0
                }
325
0
            }
326
0
        }
327
328
0
        if (m_param->bEnableWavefront)
329
0
            m_localTldIdx = -1; // cause exception if used
330
0
        else
331
0
            m_localTldIdx = m_pool->m_numWorkers + m_jpId;
332
0
    }
333
0
    else
334
0
    {
335
0
        m_tld = new ThreadLocalData;
336
0
        m_tld->analysis.initSearch(*m_param, m_top->m_scalingList);
337
0
        m_tld->analysis.create(NULL);
338
0
        m_localTldIdx = 0;
339
0
    }
340
341
0
    m_done.trigger();     /* signal that thread is initialized */
342
0
    m_enable.wait();      /* Encoder::encode() triggers this event */
343
344
0
    while (m_threadActive)
345
0
    {
346
0
        if (m_param->bCTUInfo)
347
0
        {
348
0
            while (!m_frame[0]->m_ctuInfo)
349
0
                m_frame[0]->m_copied.wait();
350
0
        }
351
0
        if ((m_param->bAnalysisType == AVC_INFO) && !strlen(m_param->analysisSave) && !strlen(m_param->analysisLoad) && !(IS_X265_TYPE_I(m_frame[0]->m_lowres.sliceType)))
352
0
        {
353
0
            while (((m_frame[0]->m_analysisData.interData == NULL && m_frame[0]->m_analysisData.intraData == NULL) || (uint32_t)m_frame[0]->m_poc != m_frame[0]->m_analysisData.poc))
354
0
                m_frame[0]->m_copyMVType.wait();
355
0
        }
356
357
0
        for (int layer = 0; layer < m_param->numLayers; layer++)
358
0
            compressFrame(layer);
359
0
        m_done.trigger(); /* FrameEncoder::getEncodedPicture() blocks for this event */
360
0
        m_enable.wait();
361
0
    }
362
0
}
363
364
void FrameEncoder::WeightAnalysis::processTasks(int /* workerThreadId */)
365
0
{
366
0
    Frame* frame = master.m_frame[master.m_sLayerId];
367
0
    weightAnalyse(*frame->m_encData->m_slice, *frame, *master.m_param);
368
0
}
369
370
371
uint32_t getBsLength( int32_t code )
372
0
{
373
0
    uint32_t ucode = (code <= 0) ? -code << 1 : (code << 1) - 1;
374
375
0
    ++ucode;
376
0
    unsigned long idx;
377
0
    BSR( idx, ucode );
378
0
    uint32_t length = (uint32_t)idx * 2 + 1;
379
380
0
    return length;
381
0
}
382
383
bool FrameEncoder::writeToneMapInfo(x265_sei_payload *payload)
384
0
{
385
0
    bool payloadChange = false;
386
0
    if (m_top->m_prevTonemapPayload.payload != NULL && payload->payloadSize == m_top->m_prevTonemapPayload.payloadSize)
387
0
    {
388
0
        if (memcmp(m_top->m_prevTonemapPayload.payload, payload->payload, payload->payloadSize) != 0)
389
0
            payloadChange = true;
390
0
    }
391
0
    else
392
0
    {
393
0
        payloadChange = true;
394
0
        if (m_top->m_prevTonemapPayload.payload != NULL)
395
0
            x265_free(m_top->m_prevTonemapPayload.payload);
396
0
        m_top->m_prevTonemapPayload.payload = (uint8_t*)x265_malloc(sizeof(uint8_t)* payload->payloadSize);
397
0
    }
398
399
0
    if (payloadChange)
400
0
    {
401
0
        m_top->m_prevTonemapPayload.payloadType = payload->payloadType;
402
0
        m_top->m_prevTonemapPayload.payloadSize = payload->payloadSize;
403
0
        memcpy(m_top->m_prevTonemapPayload.payload, payload->payload, payload->payloadSize);
404
0
    }
405
406
0
    bool isIDR = m_frame[0]->m_lowres.sliceType == X265_TYPE_IDR;
407
0
    return (payloadChange || isIDR);
408
0
}
409
410
void FrameEncoder::writeTrailingSEIMessages(int layer)
411
0
{
412
0
    Slice* slice = m_frame[layer]->m_encData->m_slice;
413
0
    int planes = (m_param->internalCsp != X265_CSP_I400) ? 3 : 1;
414
0
    int32_t payloadSize = 0;
415
416
0
    if (m_param->decodedPictureHashSEI == 1)
417
0
    {
418
0
        m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::MD5;
419
0
        for (int i = 0; i < planes; i++)
420
0
            MD5Final(&m_seiReconPictureDigest.m_state[i], m_seiReconPictureDigest.m_digest[i]);
421
0
        payloadSize = 1 + 16 * planes;
422
0
    }
423
0
    else if (m_param->decodedPictureHashSEI == 2)
424
0
    {
425
0
        m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::CRC;
426
0
        for (int i = 0; i < planes; i++)
427
0
            crcFinish(m_seiReconPictureDigest.m_crc[i], m_seiReconPictureDigest.m_digest[i]);
428
0
        payloadSize = 1 + 2 * planes;
429
0
    }
430
0
    else if (m_param->decodedPictureHashSEI == 3)
431
0
    {
432
0
        m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::CHECKSUM;
433
0
        for (int i = 0; i < planes; i++)
434
0
            checksumFinish(m_seiReconPictureDigest.m_checksum[i], m_seiReconPictureDigest.m_digest[i]);
435
0
        payloadSize = 1 + 4 * planes;
436
0
    }
437
438
0
    m_seiReconPictureDigest.setSize(payloadSize);
439
0
    m_seiReconPictureDigest.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_SUFFIX_SEI, m_nalList, false, layer);
440
0
}
441
442
void FrameEncoder::compressFrame(int layer)
443
0
{
444
0
    ProfileScopeEvent(frameThread);
445
446
0
    m_startCompressTime[layer] = x265_mdate();
447
0
    m_totalActiveWorkerCount = 0;
448
0
    m_activeWorkerCountSamples = 0;
449
0
    m_totalWorkerElapsedTime[layer] = 0;
450
0
    m_totalNoWorkerTime[layer] = 0;
451
0
    m_countRowBlocks = 0;
452
0
    m_allRowsAvailableTime[layer] = 0;
453
0
    m_stallStartTime[layer] = 0;
454
455
0
    m_completionCount = 0;
456
0
    memset((void*)m_bAllRowsStop, 0, sizeof(bool) * m_param->maxSlices);
457
0
    memset((void*)m_vbvResetTriggerRow, -1, sizeof(int) * m_param->maxSlices);
458
0
    m_rowSliceTotalBits[0] = 0;
459
0
    m_rowSliceTotalBits[1] = 0;
460
461
0
    m_SSDY[layer] = m_SSDU[layer] = m_SSDV[layer] = 0;
462
0
    m_ssim[layer] = 0;
463
0
    m_ssimCnt[layer] = 0;
464
0
    memset(&(m_frame[layer]->m_encData->m_frameStats), 0, sizeof(m_frame[layer]->m_encData->m_frameStats));
465
0
    m_sLayerId = layer;
466
467
0
    if (m_param->rc.aqMode != X265_AQ_EDGE && m_param->recursionSkipMode == EDGE_BASED_RSKIP)
468
0
    {
469
0
        int height = m_frame[layer]->m_fencPic->m_picHeight;
470
0
        int width = m_frame[layer]->m_fencPic->m_picWidth;
471
0
        intptr_t stride = m_frame[layer]->m_fencPic->m_stride;
472
473
0
        if (!computeEdge(m_frame[layer]->m_edgeBitPic, m_frame[layer]->m_fencPic->m_picOrg[0], NULL, stride, height, width, false, 1))
474
0
        {
475
0
            x265_log(m_param, X265_LOG_ERROR, " Failed to compute edge !");
476
0
        }
477
0
    }
478
479
    /* Emit access unit delimiter unless this is the first frame and the user is
480
     * not repeating headers (since AUD is supposed to be the first NAL in the access
481
     * unit) */
482
0
    Slice* slice = m_frame[layer]->m_encData->m_slice;
483
484
0
    if (m_param->bEnableEndOfSequence && m_frame[layer]->m_lowres.sliceType == X265_TYPE_IDR && m_frame[layer]->m_poc)
485
0
    {
486
0
        m_bs.resetBits();
487
0
        m_nalList.serialize(NAL_UNIT_EOS, m_bs);
488
0
    }
489
490
0
    if (m_param->bEnableAccessUnitDelimiters && (m_frame[layer]->m_poc || m_param->bRepeatHeaders))
491
0
    {
492
0
        m_bs.resetBits();
493
0
        m_entropyCoder.setBitstream(&m_bs);
494
0
        m_entropyCoder.codeAUD(*slice);
495
0
        m_bs.writeByteAlignment();
496
0
        m_nalList.serialize(NAL_UNIT_ACCESS_UNIT_DELIMITER, m_bs);
497
0
        if (m_param->bSingleSeiNal)
498
0
            m_bs.resetBits();
499
0
    }
500
0
    if (m_frame[layer]->m_lowres.bKeyframe && m_param->bRepeatHeaders)
501
0
    {
502
0
        if (m_param->bOptRefListLengthPPS)
503
0
        {
504
0
            ScopedLock refIdxLock(m_top->m_sliceRefIdxLock);
505
0
            m_top->updateRefIdx();
506
0
        }
507
0
        if (m_top->m_param->rc.bStatRead  && m_top->m_param->bMultiPassOptRPS)
508
0
        {
509
0
            ScopedLock refIdxLock(m_top->m_rpsInSpsLock);
510
0
            if (!m_top->computeSPSRPSIndex())
511
0
            {
512
0
                x265_log(m_param, X265_LOG_ERROR, "compute commonly RPS failed!\n");
513
0
                m_top->m_aborted = true;
514
0
            }
515
0
            m_top->getStreamHeaders(m_nalList, m_entropyCoder, m_bs);
516
0
        }
517
0
        else
518
0
            m_top->getStreamHeaders(m_nalList, m_entropyCoder, m_bs);
519
0
    }
520
521
0
    if (m_top->m_param->rc.bStatRead && m_top->m_param->bMultiPassOptRPS)
522
0
        m_frame[layer]->m_encData->m_slice->m_rpsIdx = (m_top->m_rateControl->m_rce2Pass + m_frame[layer]->m_encodeOrder)->rpsIdx;
523
524
    // Weighted Prediction parameters estimation.
525
0
    bool bUseWeightP = slice->m_sliceType == P_SLICE && slice->m_pps->bUseWeightPred && !layer;
526
0
    bool bUseWeightB = slice->m_sliceType == B_SLICE && slice->m_pps->bUseWeightedBiPred && !layer;
527
528
0
    WeightParam* reuseWP = NULL;
529
0
    if (m_param->analysisLoad[0] && (bUseWeightP || bUseWeightB))
530
0
        reuseWP = (WeightParam*)m_frame[layer]->m_analysisData.wt;
531
532
0
    if (bUseWeightP || bUseWeightB)
533
0
    {
534
#if DETAILED_CU_STATS
535
        m_cuStats.countWeightAnalyze++;
536
        ScopedElapsedTime time(m_cuStats.weightAnalyzeTime);
537
#endif
538
0
        if (strlen(m_param->analysisLoad))
539
0
        {
540
0
            for (int list = 0; list < slice->isInterB() + 1; list++) 
541
0
            {
542
0
                for (int plane = 0; plane < (m_param->internalCsp != X265_CSP_I400 ? 3 : 1); plane++)
543
0
                {
544
0
                    for (int ref = 1; ref < slice->m_numRefIdx[list]; ref++)
545
0
                        SET_WEIGHT(slice->m_weightPredTable[list][ref][plane], false, 1 << reuseWP->log2WeightDenom, reuseWP->log2WeightDenom, 0);
546
0
                    slice->m_weightPredTable[list][0][plane] = *(reuseWP++);
547
0
                }
548
0
            }
549
0
        }
550
0
        else
551
0
        {
552
0
            WeightAnalysis wa(*this);
553
0
            if (m_pool && wa.tryBondPeers(*this, 1))
554
                /* use an idle worker for weight analysis */
555
0
                wa.waitForExit();
556
0
            else
557
0
                weightAnalyse(*slice, *m_frame[layer], *m_param);
558
0
        }
559
0
    }
560
0
    else
561
0
        slice->disableWeights();
562
563
0
    if (strlen(m_param->analysisSave) && (bUseWeightP || bUseWeightB))
564
0
        reuseWP = (WeightParam*)m_frame[layer]->m_analysisData.wt;
565
    // Generate motion references
566
0
    int numPredDir = slice->isInterP() ? 1 : slice->isInterB() ? 2 : 0;
567
0
    for (int l = 0; l < numPredDir; l++)
568
0
    {
569
0
        for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
570
0
        {
571
0
            WeightParam *w = NULL;
572
0
            if ((bUseWeightP || bUseWeightB) && slice->m_weightPredTable[l][ref][0].wtPresent)
573
0
                w = slice->m_weightPredTable[l][ref];
574
0
            slice->m_refReconPicList[l][ref] = slice->m_refFrameList[l][ref]->m_reconPic[0];
575
0
            m_mref[l][ref].init(slice->m_refReconPicList[l][ref], w, *m_param);
576
0
        }
577
0
        if (strlen(m_param->analysisSave) && (bUseWeightP || bUseWeightB))
578
0
        {
579
0
            for (int i = 0; i < (m_param->internalCsp != X265_CSP_I400 ? 3 : 1); i++)
580
0
                *(reuseWP++) = slice->m_weightPredTable[l][0][i];
581
0
        }
582
583
0
    }
584
585
0
    int numTLD;
586
0
    if (m_pool)
587
0
        numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers : m_pool->m_numWorkers + m_pool->m_numProviders;
588
0
    else
589
0
        numTLD = 1;
590
591
    /* Get the QP for this frame from rate control. This call may block until
592
     * frames ahead of it in encode order have called rateControlEnd() */
593
0
    int qp = (layer == 0) ? m_top->m_rateControl->rateControlStart(m_frame[layer], &m_rce, m_top) : (int)m_rce.newQp;
594
595
0
    m_rce.newQp = qp;
596
597
0
    if (!!layer && m_top->m_lookahead->m_bAdaptiveQuant)
598
0
    {
599
0
        int ncu;
600
0
        if (m_param->rc.qgSize == 8)
601
0
            ncu = m_top->m_rateControl->m_ncu * 4;
602
0
        else
603
0
            ncu = m_top->m_rateControl->m_ncu;
604
0
        if (m_param->numViews > 1)
605
0
        {
606
0
            for (int i = 0; i < ncu; i++)
607
0
            {
608
0
                m_frame[layer]->m_lowres.qpCuTreeOffset[i] = m_frame[0]->m_lowres.qpCuTreeOffset[i];
609
0
                m_frame[layer]->m_lowres.qpAqOffset[i] = m_frame[0]->m_lowres.qpAqOffset[i];
610
0
            }
611
0
        }
612
0
        else if (m_param->numScalableLayers > 1)
613
0
        {
614
0
            memset(m_frame[layer]->m_lowres.qpCuTreeOffset, 0, sizeof(double)*ncu);
615
0
            memset(m_frame[layer]->m_lowres.qpAqOffset, 0, sizeof(double)* ncu);
616
0
        }
617
618
0
        m_frame[layer]->m_encData->m_avgQpAq = m_frame[0]->m_encData->m_avgQpAq;
619
0
        m_frame[layer]->m_encData->m_avgQpRc = m_frame[0]->m_encData->m_avgQpRc;
620
0
        if (!!m_param->rc.hevcAq)
621
0
        {
622
0
            for (uint32_t d = 0; d < 4; d++)
623
0
            {
624
0
                int ctuSizeIdx = 6 - g_log2Size[m_param->maxCUSize];
625
0
                int aqDepth = g_log2Size[m_param->maxCUSize] - g_log2Size[m_param->rc.qgSize];
626
0
                if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
627
0
                    continue;
628
0
                PicQPAdaptationLayer* pcAQLayer0 = &m_frame[0]->m_lowres.pAQLayer[d];
629
0
                PicQPAdaptationLayer* pcAQLayer1 = &m_frame[layer]->m_lowres.pAQLayer[d];
630
0
                const uint32_t aqPartWidth = m_frame[0]->m_lowres.pAQLayer[d].aqPartWidth;
631
0
                const uint32_t aqPartHeight = m_frame[0]->m_lowres.pAQLayer[d].aqPartHeight;
632
0
                double* pcQP0 = pcAQLayer0->dQpOffset;
633
0
                double* pcCuTree0 = pcAQLayer0->dCuTreeOffset;
634
0
                double* pcQP1 = pcAQLayer1->dQpOffset;
635
0
                double* pcCuTree1 = pcAQLayer1->dCuTreeOffset;
636
0
                if (m_param->numViews > 1)
637
0
                {
638
0
                    for (uint32_t y = 0; y < m_frame[0]->m_fencPic->m_picHeight; y += aqPartHeight)
639
0
                    {
640
0
                        for (uint32_t x = 0; x < m_frame[0]->m_fencPic->m_picWidth; x += aqPartWidth, pcQP0++, pcCuTree0++, pcQP1++, pcCuTree1++)
641
0
                        {
642
0
                            *pcQP1 = *pcQP0;
643
0
                            *pcCuTree1 = *pcCuTree0;
644
0
                        }
645
0
                    }
646
0
                }
647
0
                else if (m_param->numScalableLayers > 1)
648
0
                {
649
0
                    int numAQPartInWidth = (m_frame[0]->m_fencPic->m_picWidth + aqPartWidth - 1) / aqPartWidth;
650
0
                    int numAQPartInHeight = (m_frame[0]->m_fencPic->m_picHeight + aqPartHeight - 1) / aqPartHeight;
651
0
                    memset(m_frame[layer]->m_lowres.pAQLayer[d].dQpOffset, 0, sizeof(double)*numAQPartInWidth* numAQPartInHeight);
652
0
                    memset(m_frame[layer]->m_lowres.pAQLayer[d].dCuTreeOffset, 0, sizeof(double)* numAQPartInWidth* numAQPartInHeight);
653
0
                }
654
0
            }
655
0
        }
656
0
    }
657
0
    if (m_param->bEnableTemporalFilter)
658
0
    {
659
0
        m_frame[layer]->m_mcstf->m_QP = qp;
660
0
        m_frame[layer]->m_mcstf->bilateralFilter(m_frame[layer], m_frame[layer]->m_mcstfRefList, m_param->temporalFilterStrength);
661
0
    }
662
663
0
    if (m_nr)
664
0
    {
665
0
        if (qp > QP_MAX_SPEC && m_frame[layer]->m_param->rc.vbvBufferSize)
666
0
        {
667
0
            for (int i = 0; i < numTLD; i++)
668
0
            {
669
0
                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = m_top->m_offsetEmergency[qp - QP_MAX_SPEC - 1];
670
0
                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum = m_top->m_residualSumEmergency;
671
0
                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count = m_top->m_countEmergency;
672
0
            }
673
0
        }
674
0
        else
675
0
        {
676
0
            if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
677
0
            {
678
0
                for (int i = 0; i < numTLD; i++)
679
0
                {
680
0
                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrOffsetDenoise;
681
0
                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrResidualSum;
682
0
                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrCount;
683
0
                }
684
0
            }
685
0
            else
686
0
            {
687
0
                for (int i = 0; i < numTLD; i++)
688
0
                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = NULL;
689
0
            }
690
0
        }
691
0
    }
692
693
    /* Clip slice QP to 0-51 spec range before encoding */
694
0
    slice->m_sliceQp = x265_clip3(-QP_BD_OFFSET, QP_MAX_SPEC, qp);
695
0
    if (m_param->bHDR10Opt)
696
0
    {
697
0
        int qpCb = x265_clip3(-12, 0, (int)floor((m_top->m_cB * ((-.46) * qp + 9.26)) + 0.5 ));
698
0
        int qpCr = x265_clip3(-12, 0, (int)floor((m_top->m_cR * ((-.46) * qp + 9.26)) + 0.5 ));
699
0
        slice->m_chromaQpOffset[0] = slice->m_pps->chromaQpOffset[0] + qpCb < -12 ? (qpCb + (-12 - (slice->m_pps->chromaQpOffset[0] + qpCb))) : qpCb;
700
0
        slice->m_chromaQpOffset[1] = slice->m_pps->chromaQpOffset[1] + qpCr < -12 ? (qpCr + (-12 - (slice->m_pps->chromaQpOffset[1] + qpCr))) : qpCr;
701
0
    }
702
703
0
    if (m_param->bOptQpPPS && m_param->bRepeatHeaders)
704
0
    {
705
0
        ScopedLock qpLock(m_top->m_sliceQpLock);
706
0
        for (int i = 0; i < (QP_MAX_MAX + 1); i++)
707
0
        {
708
0
            int delta = slice->m_sliceQp - (i + 1);
709
0
            int codeLength = getBsLength( delta );
710
0
            m_top->m_iBitsCostSum[i] += codeLength;
711
0
        }
712
0
        m_top->m_iFrameNum++;
713
0
    }
714
0
    m_initSliceContext.resetEntropy(*slice);
715
716
0
    m_frameFilter.start(m_frame[layer], m_initSliceContext);
717
718
    /* ensure all rows are blocked prior to initializing row CTU counters */
719
0
    WaveFront::clearEnabledRowMask();
720
721
0
    WaveFront::setLayerId(layer);
722
    /* reset entropy coders and compute slice id */
723
0
    m_entropyCoder.load(m_initSliceContext);
724
0
    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)   
725
0
        for (uint32_t row = m_sliceBaseRow[sliceId]; row < m_sliceBaseRow[sliceId + 1]; row++)
726
0
            m_rows[row].init(m_initSliceContext, sliceId);   
727
728
    // reset slice counter for rate control update
729
0
    m_sliceCnt = 0;
730
731
0
    uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : m_param->maxSlices;
732
0
    X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices == 1), "Multiple slices without WPP unsupport now!");
733
0
    if (!m_outStreams)
734
0
    {
735
0
        m_outStreams = new Bitstream[numSubstreams];
736
0
        if (!m_param->bEnableWavefront)
737
0
            m_backupStreams = new Bitstream[numSubstreams];
738
0
        m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);
739
0
        if (!slice->m_bUseSao)
740
0
        {
741
0
            for (uint32_t i = 0; i < numSubstreams; i++)
742
0
                m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);
743
0
        }
744
0
    }
745
0
    else
746
0
    {
747
0
        for (uint32_t i = 0; i < numSubstreams; i++)
748
0
        {
749
0
            m_outStreams[i].resetBits();
750
0
            if (!slice->m_bUseSao)
751
0
                m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);
752
0
            else
753
0
                m_rows[i].rowGoOnCoder.setBitstream(NULL);
754
0
        }
755
0
    }
756
757
0
    m_rce.encodeOrder = m_frame[layer]->m_encodeOrder;
758
0
    int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0;
759
760
0
    if (m_frame[layer]->m_lowres.bKeyframe)
761
0
    {
762
0
        if (m_param->bEmitHRDSEI)
763
0
        {
764
0
            SEIBufferingPeriod* bpSei = &m_top->m_rateControl->m_bufPeriodSEI;
765
766
            // since the temporal layer HRD is not ready, we assumed it is fixed
767
0
            bpSei->m_auCpbRemovalDelayDelta = 1;
768
0
            bpSei->m_cpbDelayOffset = 0;
769
0
            bpSei->m_dpbDelayOffset = 0;
770
0
            bpSei->m_concatenationFlag = (m_param->bEnableHRDConcatFlag && !m_frame[layer]->m_poc) ? true : false;
771
772
            // hrdFullness() calculates the initial CPB removal delay and offset
773
0
            m_top->m_rateControl->hrdFullness(bpSei);
774
0
            bpSei->writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
775
776
0
            m_top->m_lastBPSEI = m_rce.encodeOrder;
777
0
        }
778
779
0
        if (m_frame[layer]->m_lowres.sliceType == X265_TYPE_IDR && m_param->bEmitIDRRecoverySEI)
780
0
        {
781
            /* Recovery Point SEI require the SPS to be "activated" */
782
0
            SEIRecoveryPoint sei;
783
0
            sei.m_recoveryPocCnt = 0;
784
0
            sei.m_exactMatchingFlag = true;
785
0
            sei.m_brokenLinkFlag = false;
786
0
            sei.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
787
0
        }
788
0
    }
789
790
0
    if ((m_param->bEmitHRDSEI || !!m_param->interlaceMode))
791
0
    {
792
0
        SEIPictureTiming *sei = m_rce.picTimingSEI;
793
0
        const VUI *vui = &slice->m_sps->vuiParameters;
794
0
        const HRDInfo *hrd = &vui->hrdParameters;
795
0
        int poc = slice->m_poc;
796
797
0
        if (vui->frameFieldInfoPresentFlag)
798
0
        {
799
0
            if (m_param->interlaceMode > 0)
800
0
            {
801
0
                if( m_param->interlaceMode == 2 )
802
0
                {   
803
                    // m_picStruct should be set to 3 or 4 when field feature is enabled
804
0
                    if (m_param->bField)
805
                        // 3: Top field, bottom field, in that order; 4: Bottom field, top field, in that order
806
0
                        sei->m_picStruct = (slice->m_fieldNum == 1) ? 4 : 3;
807
0
                    else
808
0
                        sei->m_picStruct = (poc & 1) ? 1 /* top */ : 2 /* bottom */;
809
0
                }     
810
0
                else if (m_param->interlaceMode == 1)
811
0
                {
812
0
                    if (m_param->bField)
813
0
                        sei->m_picStruct = (slice->m_fieldNum == 1) ? 3: 4;
814
0
                    else
815
0
                        sei->m_picStruct = (poc & 1) ? 2 /* bottom */ : 1 /* top */;
816
0
                }
817
0
            }
818
0
            else if (m_param->bEnableFrameDuplication)
819
0
                sei->m_picStruct = m_frame[layer]->m_picStruct;
820
0
            else
821
0
                sei->m_picStruct = m_param->pictureStructure;
822
823
0
            sei->m_sourceScanType = m_param->interlaceMode ? 0 : 1;
824
825
0
            sei->m_duplicateFlag = false;
826
0
        }
827
828
0
        if (vui->hrdParametersPresentFlag)
829
0
        {
830
            // The m_aucpbremoval delay specifies how many clock ticks the
831
            // access unit associated with the picture timing SEI message has to
832
            // wait after removal of the access unit with the most recent
833
            // buffering period SEI message
834
0
            sei->m_auCpbRemovalDelay = X265_MIN(X265_MAX(1, m_rce.encodeOrder - prevBPSEI), (1 << hrd->cpbRemovalDelayLength));
835
0
            sei->m_picDpbOutputDelay = slice->m_sps->numReorderPics[m_frame[layer]->m_tempLayer] + poc - m_rce.encodeOrder;
836
0
        }
837
838
0
        sei->writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
839
0
    }
840
841
0
    if (m_param->preferredTransferCharacteristics > -1 && slice->isIRAP())
842
0
    {
843
0
        SEIAlternativeTC m_seiAlternativeTC;
844
0
        m_seiAlternativeTC.m_preferredTransferCharacteristics = m_param->preferredTransferCharacteristics;
845
0
        m_seiAlternativeTC.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
846
0
    }
847
    /* Write Film grain characteristics if present */
848
0
    if (this->m_top->m_filmGrainIn)
849
0
    {
850
0
        FilmGrainCharacteristics m_filmGrain;
851
        /* Read the Film grain model file */
852
0
        readModel(&m_filmGrain, this->m_top->m_filmGrainIn);
853
0
        m_filmGrain.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
854
0
    }
855
    /* Write Aom film grain characteristics if present */
856
0
    if (this->m_top->m_aomFilmGrainIn)
857
0
    {
858
0
        AomFilmGrainCharacteristics m_aomFilmGrain;
859
        /* Read the Film grain model file */
860
0
        readAomModel(&m_aomFilmGrain, this->m_top->m_aomFilmGrainIn);
861
0
        m_aomFilmGrain.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal);
862
0
    }
863
    /* Write user SEI */
864
0
    for (int i = 0; i < m_frame[layer]->m_userSEI.numPayloads; i++)
865
0
    {
866
0
        x265_sei_payload *payload = &m_frame[layer]->m_userSEI.payloads[i];
867
0
        if (payload->payloadType == USER_DATA_UNREGISTERED)
868
0
        {
869
0
            SEIuserDataUnregistered sei;
870
0
            sei.m_userData = payload->payload;
871
0
            sei.setSize(payload->payloadSize);
872
0
            sei.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
873
0
        }
874
0
        else if (payload->payloadType == USER_DATA_REGISTERED_ITU_T_T35)
875
0
        {
876
0
            bool writeSei = m_param->bDhdr10opt ? writeToneMapInfo(payload) : true;
877
0
            if (writeSei)
878
0
            {
879
0
                SEIuserDataRegistered sei;
880
0
                sei.m_userData = payload->payload;
881
0
                sei.setSize(payload->payloadSize);
882
0
                sei.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
883
0
            }
884
0
        }
885
0
        else
886
0
            x265_log(m_param, X265_LOG_ERROR, "Unrecognized SEI type\n");
887
0
    }
888
889
0
    bool isSei = ((m_frame[layer]->m_lowres.bKeyframe && m_param->bRepeatHeaders) || m_param->bEmitHRDSEI ||
890
0
                 !!m_param->interlaceMode || (m_frame[layer]->m_lowres.sliceType == X265_TYPE_IDR && m_param->bEmitIDRRecoverySEI) ||
891
0
                   m_frame[layer]->m_userSEI.numPayloads);
892
893
0
    if (isSei && m_param->bSingleSeiNal)
894
0
    {
895
0
        m_bs.writeByteAlignment();
896
0
        m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
897
0
    }
898
    /* CQP and CRF (without capped VBV) doesn't use mid-frame statistics to 
899
     * tune RateControl parameters for other frames.
900
     * Hence, for these modes, update m_startEndOrder and unlock RC for previous threads waiting in
901
     * RateControlEnd here, after the slice contexts are initialized. For the rest - ABR
902
     * and VBV, unlock only after rateControlUpdateStats of this frame is called */
903
0
    if (m_param->rc.rateControlMode != X265_RC_ABR && !m_top->m_rateControl->m_isVbv)
904
0
    {
905
0
        m_top->m_rateControl->m_startEndOrder.incr();
906
907
0
        if (m_rce.encodeOrder < m_param->frameNumThreads - 1)
908
0
            m_top->m_rateControl->m_startEndOrder.incr(); // faked rateControlEnd calls for negative frames
909
0
    }
910
911
0
    if (m_param->bDynamicRefine)
912
0
        computeAvgTrainingData(layer);
913
914
    /* Analyze CTU rows, most of the hard work is done here.  Frame is
915
     * compressed in a wave-front pattern if WPP is enabled. Row based loop
916
     * filters runs behind the CTU compression and reconstruction */
917
918
0
    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)    
919
0
        m_rows[m_sliceBaseRow[sliceId]].active = true;
920
    
921
0
    if (m_param->bEnableWavefront)
922
0
    {
923
0
        int i = 0;
924
0
        for (uint32_t rowInSlice = 0; rowInSlice < m_sliceGroupSize; rowInSlice++)
925
0
        {
926
0
            for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
927
0
            {
928
0
                const uint32_t sliceStartRow = m_sliceBaseRow[sliceId];
929
0
                const uint32_t sliceEndRow = m_sliceBaseRow[sliceId + 1] - 1;
930
0
                const uint32_t row = sliceStartRow + rowInSlice;
931
0
                if (row > sliceEndRow)
932
0
                    continue;
933
0
                m_row_to_idx[row] = i;
934
0
                m_idx_to_row[i] = row;
935
0
                i += 1;
936
0
            }
937
0
        }
938
0
    }
939
940
0
    if (m_param->bEnableWavefront)
941
0
    {
942
0
        for (uint32_t rowInSlice = 0; rowInSlice < m_sliceGroupSize; rowInSlice++)
943
0
        {
944
0
            for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
945
0
            {
946
0
                const uint32_t sliceStartRow = m_sliceBaseRow[sliceId];
947
0
                const uint32_t sliceEndRow = m_sliceBaseRow[sliceId + 1] - 1;
948
0
                const uint32_t row = sliceStartRow + rowInSlice;
949
950
0
                X265_CHECK(row < m_numRows, "slices row fault was detected");
951
952
0
                if (row > sliceEndRow)
953
0
                    continue;
954
955
                // block until all reference frames have reconstructed the rows we need
956
0
                for (int l = 0; l < numPredDir; l++)
957
0
                {
958
0
                    for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
959
0
                    {
960
0
                        Frame *refpic = slice->m_refFrameList[l][ref];
961
962
#if ENABLE_SCC_EXT
963
                        /*Exempt the current pic as reference*/
964
                        if (m_param->bEnableSCC && refpic->m_poc == m_frame[layer]->m_poc)
965
                            continue;
966
#endif
967
968
                        // NOTE: we unnecessary wait row that beyond current slice boundary
969
0
                        const int rowIdx = X265_MIN(sliceEndRow, (row + m_refLagRows));
970
971
0
                        while (refpic->m_reconRowFlag[rowIdx].get() == 0)
972
0
                            refpic->m_reconRowFlag[rowIdx].waitForChange(0);
973
974
0
                        if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
975
0
                            m_mref[l][ref].applyWeight(rowIdx, m_numRows, sliceEndRow, sliceId);
976
0
                    }
977
0
                }
978
979
0
                enableRowEncoder(m_row_to_idx[row]); /* clear external dependency for this row */
980
0
                if (!rowInSlice)
981
0
                {
982
0
                    m_row0WaitTime[layer] = x265_mdate();
983
0
                    enqueueRowEncoder(m_row_to_idx[row]); /* clear internal dependency, start wavefront */
984
0
                }
985
0
                tryWakeOne();
986
0
            } // end of loop rowInSlice
987
0
        } // end of loop sliceId
988
989
0
        m_allRowsAvailableTime[layer] = x265_mdate();
990
0
        tryWakeOne(); /* ensure one thread is active or help-wanted flag is set prior to blocking */
991
0
        static const int block_ms = 250;
992
0
        while (m_completionEvent.timedWait(block_ms))
993
0
            tryWakeOne();
994
0
    }
995
0
    else
996
0
    {
997
0
        for (uint32_t i = 0; i < m_numRows + m_filterRowDelay; i++)
998
0
        {
999
            // compress
1000
0
            if (i < m_numRows)
1001
0
            {
1002
                // block until all reference frames have reconstructed the rows we need
1003
0
                for (int l = 0; l < numPredDir; l++)
1004
0
                {
1005
0
                    int list = l;
1006
0
                    for (int ref = 0; ref < slice->m_numRefIdx[list]; ref++)
1007
0
                    {
1008
0
                        Frame *refpic = slice->m_refFrameList[list][ref];
1009
1010
#if ENABLE_SCC_EXT
1011
                        /*Exempt the current pic as reference*/
1012
                        if (m_param->bEnableSCC && refpic->m_poc == m_frame[layer]->m_poc)
1013
                            continue;
1014
#endif
1015
1016
0
                        const int rowIdx = X265_MIN(m_numRows - 1, (i + m_refLagRows));
1017
0
                        while (refpic->m_reconRowFlag[rowIdx].get() == 0)
1018
0
                            refpic->m_reconRowFlag[rowIdx].waitForChange(0);
1019
1020
0
                        if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
1021
0
                            m_mref[list][ref].applyWeight(rowIdx, m_numRows, m_numRows, 0);
1022
0
                    }
1023
0
                }
1024
1025
0
                if (!i)
1026
0
                    m_row0WaitTime[layer] = x265_mdate();
1027
0
                else if (i == m_numRows - 1)
1028
0
                    m_allRowsAvailableTime[layer] = x265_mdate();
1029
0
                processRowEncoder(i, m_tld[m_localTldIdx], layer);
1030
0
            }
1031
1032
            // filter
1033
0
            if (i >= m_filterRowDelay)
1034
0
                m_frameFilter.processRow(i - m_filterRowDelay, layer);
1035
0
        }
1036
0
    }
1037
#if ENABLE_LIBVMAF
1038
    vmafFrameLevelScore();
1039
#endif
1040
1041
0
    if (m_param->maxSlices > 1)
1042
0
    {
1043
0
        PicYuv *reconPic = m_frame[layer]->m_reconPic[0];
1044
0
        uint32_t height = reconPic->m_picHeight;
1045
0
        initDecodedPictureHashSEI(0, 0, height, layer);
1046
0
    } 
1047
1048
0
    if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame[layer]->m_encodeOrder) //Avoid collecting data that will not be used by future frames.
1049
0
        collectDynDataFrame(layer);
1050
1051
0
    if (m_param->bEnableTemporalFilter && m_top->isFilterThisframe(m_frame[layer]->m_mcstf->m_sliceTypeConfig, m_frame[layer]->m_lowres.sliceType))
1052
0
    {
1053
        //Reset the MCSTF context in Frame Encoder and Frame
1054
0
        for (int i = 0; i < (m_frame[layer]->m_mcstf->m_range << 1); i++)
1055
0
        {
1056
0
            memset(m_frame[layer]->m_mcstfRefList[i].mvs0, 0, sizeof(MV) * ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
1057
0
            memset(m_frame[layer]->m_mcstfRefList[i].mvs1, 0, sizeof(MV) * ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
1058
0
            memset(m_frame[layer]->m_mcstfRefList[i].mvs2, 0, sizeof(MV) * ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
1059
0
            memset(m_frame[layer]->m_mcstfRefList[i].mvs,  0, sizeof(MV) * ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
1060
0
            memset(m_frame[layer]->m_mcstfRefList[i].noise, 0, sizeof(int) * ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
1061
0
            memset(m_frame[layer]->m_mcstfRefList[i].error, 0, sizeof(int) * ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
1062
1063
0
            m_frame[layer]->m_mcstf->m_numRef = 0;
1064
0
        }
1065
0
    }
1066
1067
1068
0
    if (m_param->rc.bStatWrite)
1069
0
    {
1070
0
        int totalI = 0, totalP = 0, totalSkip = 0;
1071
1072
        // accumulate intra,inter,skip cu count per frame for 2 pass
1073
0
        for (uint32_t i = 0; i < m_numRows; i++)
1074
0
        {
1075
0
            m_frame[layer]->m_encData->m_frameStats.mvBits    += m_rows[i].rowStats.mvBits;
1076
0
            m_frame[layer]->m_encData->m_frameStats.coeffBits += m_rows[i].rowStats.coeffBits;
1077
0
            m_frame[layer]->m_encData->m_frameStats.miscBits  += m_rows[i].rowStats.miscBits;
1078
0
            totalI                                     += m_rows[i].rowStats.intra8x8Cnt;
1079
0
            totalP                                     += m_rows[i].rowStats.inter8x8Cnt;
1080
0
            totalSkip                                  += m_rows[i].rowStats.skip8x8Cnt;
1081
0
        }
1082
0
        int totalCuCount = totalI + totalP + totalSkip;
1083
0
        m_frame[layer]->m_encData->m_frameStats.percent8x8Intra = (double)totalI / totalCuCount;
1084
0
        m_frame[layer]->m_encData->m_frameStats.percent8x8Inter = (double)totalP / totalCuCount;
1085
0
        m_frame[layer]->m_encData->m_frameStats.percent8x8Skip  = (double)totalSkip / totalCuCount;
1086
0
    }
1087
1088
0
    if (m_param->csvLogLevel >= 1)
1089
0
    {
1090
0
        for (uint32_t i = 0; i < m_numRows; i++)
1091
0
        {
1092
0
            m_frame[layer]->m_encData->m_frameStats.cntIntraNxN += m_rows[i].rowStats.cntIntraNxN;
1093
0
            m_frame[layer]->m_encData->m_frameStats.totalCu += m_rows[i].rowStats.totalCu;
1094
0
            m_frame[layer]->m_encData->m_frameStats.totalCtu += m_rows[i].rowStats.totalCtu;
1095
0
            m_frame[layer]->m_encData->m_frameStats.lumaDistortion += m_rows[i].rowStats.lumaDistortion;
1096
0
            m_frame[layer]->m_encData->m_frameStats.chromaDistortion += m_rows[i].rowStats.chromaDistortion;
1097
0
            m_frame[layer]->m_encData->m_frameStats.psyEnergy += m_rows[i].rowStats.psyEnergy;
1098
0
            m_frame[layer]->m_encData->m_frameStats.ssimEnergy += m_rows[i].rowStats.ssimEnergy;
1099
0
            m_frame[layer]->m_encData->m_frameStats.resEnergy += m_rows[i].rowStats.resEnergy;
1100
0
            for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
1101
0
            {
1102
0
                m_frame[layer]->m_encData->m_frameStats.cntSkipCu[depth] += m_rows[i].rowStats.cntSkipCu[depth];
1103
0
                m_frame[layer]->m_encData->m_frameStats.cntMergeCu[depth] += m_rows[i].rowStats.cntMergeCu[depth];
1104
0
                for (int m = 0; m < INTER_MODES; m++)
1105
0
                    m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][m] += m_rows[i].rowStats.cuInterDistribution[depth][m];
1106
0
                for (int n = 0; n < INTRA_MODES; n++)
1107
0
                    m_frame[layer]->m_encData->m_frameStats.cuIntraDistribution[depth][n] += m_rows[i].rowStats.cuIntraDistribution[depth][n];
1108
0
            }
1109
0
        }
1110
0
        m_frame[layer]->m_encData->m_frameStats.percentIntraNxN = (double)(m_frame[layer]->m_encData->m_frameStats.cntIntraNxN * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1111
1112
0
        for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
1113
0
        {
1114
0
            m_frame[layer]->m_encData->m_frameStats.percentSkipCu[depth] = (double)(m_frame[layer]->m_encData->m_frameStats.cntSkipCu[depth] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1115
0
            m_frame[layer]->m_encData->m_frameStats.percentMergeCu[depth] = (double)(m_frame[layer]->m_encData->m_frameStats.cntMergeCu[depth] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1116
0
            for (int n = 0; n < INTRA_MODES; n++)
1117
0
                m_frame[layer]->m_encData->m_frameStats.percentIntraDistribution[depth][n] = (double)(m_frame[layer]->m_encData->m_frameStats.cuIntraDistribution[depth][n] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1118
0
            uint64_t cuInterRectCnt = 0; // sum of Nx2N, 2NxN counts
1119
0
            cuInterRectCnt += m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][1] + m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][2];
1120
0
            m_frame[layer]->m_encData->m_frameStats.percentInterDistribution[depth][0] = (double)(m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][0] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1121
0
            m_frame[layer]->m_encData->m_frameStats.percentInterDistribution[depth][1] = (double)(cuInterRectCnt * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1122
0
            m_frame[layer]->m_encData->m_frameStats.percentInterDistribution[depth][2] = (double)(m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][3] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1123
0
        }
1124
0
    }
1125
1126
0
    if (m_param->csvLogLevel >= 2)
1127
0
    {
1128
0
        m_frame[layer]->m_encData->m_frameStats.avgLumaDistortion = (double)(m_frame[layer]->m_encData->m_frameStats.lumaDistortion) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1129
0
        m_frame[layer]->m_encData->m_frameStats.avgChromaDistortion = (double)(m_frame[layer]->m_encData->m_frameStats.chromaDistortion) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1130
0
        m_frame[layer]->m_encData->m_frameStats.avgPsyEnergy = (double)(m_frame[layer]->m_encData->m_frameStats.psyEnergy) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1131
0
        m_frame[layer]->m_encData->m_frameStats.avgSsimEnergy = (double)(m_frame[layer]->m_encData->m_frameStats.ssimEnergy) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1132
0
        m_frame[layer]->m_encData->m_frameStats.avgResEnergy = (double)(m_frame[layer]->m_encData->m_frameStats.resEnergy) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1133
0
    }
1134
1135
0
    m_bs.resetBits();
1136
0
    m_entropyCoder.load(m_initSliceContext);
1137
0
    m_entropyCoder.setBitstream(&m_bs);
1138
1139
    // finish encode of each CTU row, only required when SAO is enabled
1140
0
    if (slice->m_bUseSao)
1141
0
        encodeSlice(0, layer);
1142
1143
0
    m_entropyCoder.setBitstream(&m_bs);
1144
1145
0
    if (m_param->maxSlices > 1)
1146
0
    {
1147
0
        uint32_t nextSliceRow = 0;
1148
1149
0
        for(uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
1150
0
        {
1151
0
            m_bs.resetBits();
1152
1153
0
            const uint32_t sliceAddr = nextSliceRow * m_numCols;
1154
0
            if (m_param->bOptRefListLengthPPS)
1155
0
            {
1156
0
                ScopedLock refIdxLock(m_top->m_sliceRefIdxLock);
1157
0
                m_top->analyseRefIdx(slice->m_numRefIdx);
1158
0
            }
1159
0
            m_entropyCoder.codeSliceHeader(*slice, *m_frame[layer]->m_encData, sliceAddr, m_sliceAddrBits, slice->m_sliceQp, layer);
1160
1161
            // Find rows of current slice
1162
0
            const uint32_t prevSliceRow = nextSliceRow;
1163
0
            while(nextSliceRow < m_numRows && m_rows[nextSliceRow].sliceId == sliceId)
1164
0
                nextSliceRow++;
1165
1166
            // serialize each row, record final lengths in slice header
1167
0
            uint32_t maxStreamSize = m_nalList.serializeSubstreams(&m_substreamSizes[prevSliceRow], (nextSliceRow - prevSliceRow), &m_outStreams[prevSliceRow]);
1168
1169
            // complete the slice header by writing WPP row-starts
1170
0
            m_entropyCoder.setBitstream(&m_bs);
1171
0
            if (slice->m_pps->bEntropyCodingSyncEnabled)
1172
0
                m_entropyCoder.codeSliceHeaderWPPEntryPoints(&m_substreamSizes[prevSliceRow], (nextSliceRow - prevSliceRow - 1), maxStreamSize);
1173
            
1174
0
            m_bs.writeByteAlignment();
1175
1176
0
            m_nalList.serialize(slice->m_nalUnitType, m_bs, layer, (!!m_param->bEnableTemporalSubLayers ? m_frame[layer]->m_tempLayer + 1 : (1 + (slice->m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N))));
1177
0
        }
1178
0
    }
1179
0
    else
1180
0
    {
1181
0
        if (m_param->bOptRefListLengthPPS)
1182
0
        {
1183
0
            ScopedLock refIdxLock(m_top->m_sliceRefIdxLock);
1184
0
            m_top->analyseRefIdx(slice->m_numRefIdx);
1185
0
        }
1186
0
        m_entropyCoder.codeSliceHeader(*slice, *m_frame[layer]->m_encData, 0, 0, slice->m_sliceQp, layer);
1187
1188
        // serialize each row, record final lengths in slice header
1189
0
        uint32_t maxStreamSize = m_nalList.serializeSubstreams(m_substreamSizes, numSubstreams, m_outStreams);
1190
1191
        // complete the slice header by writing WPP row-starts
1192
0
        m_entropyCoder.setBitstream(&m_bs);
1193
0
        if (slice->m_pps->bEntropyCodingSyncEnabled)
1194
0
            m_entropyCoder.codeSliceHeaderWPPEntryPoints(m_substreamSizes, (slice->m_sps->numCuInHeight - 1), maxStreamSize);
1195
0
        m_bs.writeByteAlignment();
1196
1197
0
        m_nalList.serialize(slice->m_nalUnitType, m_bs, layer, (!!m_param->bEnableTemporalSubLayers ? m_frame[layer]->m_tempLayer + 1 : (1 + (slice->m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N))));
1198
0
    }
1199
1200
0
    if (m_param->decodedPictureHashSEI)
1201
0
        writeTrailingSEIMessages(layer);
1202
1203
0
    uint64_t bytes = 0;
1204
0
    for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
1205
0
    {
1206
0
        int type = m_nalList.m_nal[i].type;
1207
1208
        // exclude SEI
1209
0
        if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
1210
0
        {
1211
0
            bytes += m_nalList.m_nal[i].sizeBytes;
1212
            // and exclude start code prefix
1213
0
            bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
1214
0
        }
1215
0
    }
1216
0
    m_accessUnitBits[layer] = (layer) ? (bytes - (m_accessUnitBits[0] >> 3)) << 3 : bytes << 3;
1217
1218
0
    int filler = 0;
1219
    /* rateControlEnd may also block for earlier frames to call rateControlUpdateStats */
1220
0
    if (!layer && m_top->m_rateControl->rateControlEnd(m_frame[layer], m_accessUnitBits[layer], &m_rce, &filler) < 0)
1221
0
        m_top->m_aborted = true;
1222
1223
#if ENABLE_ALPHA
1224
    if (layer && m_param->numScalableLayers > 1)
1225
        m_frame[layer]->m_encData->m_avgQpAq = m_frame[layer]->m_encData->m_avgQpRc;
1226
#endif
1227
#if ENABLE_MULTIVIEW
1228
    if (layer && m_param->numViews > 1)
1229
    {
1230
        double avgQpAq = 0;
1231
        for (uint32_t i = 0; i < slice->m_sps->numCuInHeight; i++)
1232
            avgQpAq += m_frame[layer]->m_encData->m_rowStat[i].sumQpAq;
1233
1234
        avgQpAq /= (slice->m_sps->numCUsInFrame * m_param->num4x4Partitions);
1235
        m_frame[layer]->m_encData->m_avgQpAq = avgQpAq;
1236
    }
1237
#endif
1238
1239
0
    if (filler > 0)
1240
0
    {
1241
0
        filler = (filler - FILLER_OVERHEAD * 8) >> 3;
1242
0
        m_bs.resetBits();
1243
0
        while (filler > 0)
1244
0
        {
1245
0
            m_bs.write(0xff, 8);
1246
0
            filler--;
1247
0
        }
1248
0
        m_bs.writeByteAlignment();
1249
0
        m_nalList.serialize(NAL_UNIT_FILLER_DATA, m_bs);
1250
0
        bytes += m_nalList.m_nal[m_nalList.m_numNal - 1].sizeBytes;
1251
0
        bytes -= 3; //exclude start code prefix
1252
0
        m_accessUnitBits[layer] = bytes << 3;
1253
0
    }
1254
1255
0
    if (m_frame[layer]->m_rpu.payloadSize)
1256
0
    {
1257
0
        m_bs.resetBits();
1258
0
        for (int i = 0; i < m_frame[layer]->m_rpu.payloadSize; i++)
1259
0
            m_bs.write(m_frame[layer]->m_rpu.payload[i], 8);
1260
0
        m_nalList.serialize(NAL_UNIT_UNSPECIFIED, m_bs);
1261
0
    }
1262
1263
0
    m_endCompressTime[layer] = x265_mdate();
1264
1265
    /* Decrement referenced frame reference counts, allow them to be recycled */
1266
0
    for (int l = 0; l < numPredDir; l++)
1267
0
    {
1268
0
        for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
1269
0
        {
1270
0
            Frame *refpic = slice->m_refFrameList[l][ref];
1271
0
            ATOMIC_DEC(&refpic->m_countRefEncoders);
1272
0
        }
1273
0
    }
1274
1275
0
    if (m_nr)
1276
0
    {
1277
0
        bool nrEnabled = (m_rce.newQp < QP_MAX_SPEC || !m_param->rc.vbvBufferSize) && (m_param->noiseReductionIntra || m_param->noiseReductionInter);
1278
1279
0
        if (nrEnabled)
1280
0
        {
1281
            /* Accumulate NR statistics from all worker threads */
1282
0
            for (int i = 0; i < numTLD; i++)
1283
0
            {
1284
0
                NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
1285
0
                for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
1286
0
                {
1287
0
                    for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)
1288
0
                        m_nr->nrResidualSum[cat][coeff] += nr->nrResidualSum[cat][coeff];
1289
1290
0
                    m_nr->nrCount[cat] += nr->nrCount[cat];
1291
0
                }
1292
0
            }
1293
1294
0
            noiseReductionUpdate();
1295
1296
            /* Copy updated NR coefficients back to all worker threads */
1297
0
            for (int i = 0; i < numTLD; i++)
1298
0
            {
1299
0
                NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
1300
0
                memcpy(nr->nrOffsetDenoise, m_nr->nrOffsetDenoise, sizeof(uint16_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
1301
0
                memset(nr->nrCount, 0, sizeof(uint32_t)* MAX_NUM_TR_CATEGORIES);
1302
0
                memset(nr->nrResidualSum, 0, sizeof(uint32_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
1303
0
            }
1304
0
        }
1305
0
    }
1306
1307
#if DETAILED_CU_STATS
1308
    /* Accumulate CU statistics from each worker thread, we could report
1309
     * per-frame stats here, but currently we do not. */
1310
    for (int i = 0; i < numTLD; i++)
1311
        m_cuStats.accumulate(m_tld[i].analysis.m_stats[m_jpId], *m_param);
1312
#endif
1313
1314
0
    m_endFrameTime[layer] = x265_mdate();
1315
0
}
1316
1317
void FrameEncoder::initDecodedPictureHashSEI(int row, int cuAddr, int height, int layer)
1318
0
{
1319
0
    PicYuv *reconPic = m_frame[layer]->m_reconPic[0];
1320
0
    uint32_t width = reconPic->m_picWidth;  
1321
0
    intptr_t stride = reconPic->m_stride;
1322
0
    uint32_t maxCUHeight = m_param->maxCUSize;
1323
1324
0
    const uint32_t hChromaShift = CHROMA_H_SHIFT(m_param->internalCsp);
1325
0
    const uint32_t vChromaShift = CHROMA_V_SHIFT(m_param->internalCsp);
1326
1327
0
    if (m_param->decodedPictureHashSEI == 1)
1328
0
    {
1329
0
        if (!row)
1330
0
            MD5Init(&m_seiReconPictureDigest.m_state[0]);
1331
1332
0
        updateMD5Plane(m_seiReconPictureDigest.m_state[0], reconPic->getLumaAddr(cuAddr), width, height, stride);
1333
0
        if (m_param->internalCsp != X265_CSP_I400)
1334
0
        {
1335
0
            if (!row)
1336
0
            {
1337
0
                MD5Init(&m_seiReconPictureDigest.m_state[1]);
1338
0
                MD5Init(&m_seiReconPictureDigest.m_state[2]);
1339
0
            }
1340
1341
0
            width >>= hChromaShift;
1342
0
            height >>= vChromaShift;
1343
0
            stride = reconPic->m_strideC;
1344
1345
0
            updateMD5Plane(m_seiReconPictureDigest.m_state[1], reconPic->getCbAddr(cuAddr), width, height, stride);
1346
0
            updateMD5Plane(m_seiReconPictureDigest.m_state[2], reconPic->getCrAddr(cuAddr), width, height, stride);
1347
0
        }
1348
0
    }
1349
0
    else if (m_param->decodedPictureHashSEI == 2)
1350
0
    {
1351
1352
0
        if (!row)
1353
0
            m_seiReconPictureDigest.m_crc[0] = 0xffff;
1354
1355
0
        updateCRC(reconPic->getLumaAddr(cuAddr), m_seiReconPictureDigest.m_crc[0], height, width, stride);
1356
0
        if (m_param->internalCsp != X265_CSP_I400)
1357
0
        {
1358
0
            width >>= hChromaShift;
1359
0
            height >>= vChromaShift;
1360
0
            stride = reconPic->m_strideC;
1361
0
            m_seiReconPictureDigest.m_crc[1] = m_seiReconPictureDigest.m_crc[2] = 0xffff;
1362
1363
0
            updateCRC(reconPic->getCbAddr(cuAddr), m_seiReconPictureDigest.m_crc[1], height, width, stride);
1364
0
            updateCRC(reconPic->getCrAddr(cuAddr), m_seiReconPictureDigest.m_crc[2], height, width, stride);
1365
0
        }
1366
0
    }
1367
0
    else if (m_param->decodedPictureHashSEI == 3)
1368
0
    {
1369
0
        if (!row)
1370
0
            m_seiReconPictureDigest.m_checksum[0] = 0;
1371
1372
0
        updateChecksum(reconPic->m_picOrg[0], m_seiReconPictureDigest.m_checksum[0], height, width, stride, row, maxCUHeight);
1373
0
        if (m_param->internalCsp != X265_CSP_I400)
1374
0
        {
1375
0
            width >>= hChromaShift;
1376
0
            height >>= vChromaShift;
1377
0
            stride = reconPic->m_strideC;
1378
0
            maxCUHeight >>= vChromaShift;
1379
1380
0
            if (!row)
1381
0
                m_seiReconPictureDigest.m_checksum[1] = m_seiReconPictureDigest.m_checksum[2] = 0;
1382
1383
0
            updateChecksum(reconPic->m_picOrg[1], m_seiReconPictureDigest.m_checksum[1], height, width, stride, row, maxCUHeight);
1384
0
            updateChecksum(reconPic->m_picOrg[2], m_seiReconPictureDigest.m_checksum[2], height, width, stride, row, maxCUHeight);
1385
0
        }
1386
0
    }
1387
0
}
1388
1389
void FrameEncoder::encodeSlice(uint32_t sliceAddr, int layer)
1390
0
{
1391
0
    Slice* slice = m_frame[layer]->m_encData->m_slice;
1392
0
    const uint32_t widthInLCUs = slice->m_sps->numCuInWidth;
1393
0
    const uint32_t lastCUAddr = (slice->m_endCUAddr + m_param->num4x4Partitions - 1) / m_param->num4x4Partitions;
1394
0
    const uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : 1;
1395
1396
0
    SAOParam* saoParam = slice->m_sps->bUseSAO && slice->m_bUseSao ? m_frame[layer]->m_encData->m_saoParam : NULL;
1397
0
    for (uint32_t cuAddr = sliceAddr; cuAddr < lastCUAddr; cuAddr++)
1398
0
    {
1399
0
        uint32_t col = cuAddr % widthInLCUs;
1400
0
        uint32_t row = cuAddr / widthInLCUs;
1401
0
        uint32_t subStrm = row % numSubstreams;
1402
0
        CUData* ctu = m_frame[layer]->m_encData->getPicCTU(cuAddr);
1403
1404
0
        m_entropyCoder.setBitstream(&m_outStreams[subStrm]);
1405
1406
        // Synchronize cabac probabilities with upper-right CTU if it's available and we're at the start of a line.
1407
0
        if (m_param->bEnableWavefront && !col && row)
1408
0
        {
1409
0
            m_entropyCoder.copyState(m_initSliceContext);
1410
0
            m_entropyCoder.loadContexts(m_rows[row - 1].bufferedEntropy);
1411
0
        }
1412
1413
        // Initialize slice context
1414
0
        if (ctu->m_bFirstRowInSlice && !col)
1415
0
            m_entropyCoder.load(m_initSliceContext);
1416
1417
0
        if (saoParam)
1418
0
        {
1419
0
            if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
1420
0
            {
1421
0
                int mergeLeft = col && saoParam->ctuParam[0][cuAddr].mergeMode == SAO_MERGE_LEFT;
1422
0
                int mergeUp = !ctu->m_bFirstRowInSlice && saoParam->ctuParam[0][cuAddr].mergeMode == SAO_MERGE_UP;
1423
0
                if (col)
1424
0
                    m_entropyCoder.codeSaoMerge(mergeLeft);
1425
0
                if (!ctu->m_bFirstRowInSlice && !mergeLeft)
1426
0
                    m_entropyCoder.codeSaoMerge(mergeUp);
1427
0
                if (!mergeLeft && !mergeUp)
1428
0
                {
1429
0
                    if (saoParam->bSaoFlag[0])
1430
0
                        m_entropyCoder.codeSaoOffset(saoParam->ctuParam[0][cuAddr], 0);
1431
0
                    if (saoParam->bSaoFlag[1])
1432
0
                    {
1433
0
                        m_entropyCoder.codeSaoOffset(saoParam->ctuParam[1][cuAddr], 1);
1434
0
                        m_entropyCoder.codeSaoOffset(saoParam->ctuParam[2][cuAddr], 2);
1435
0
                    }
1436
0
                }
1437
0
            }
1438
0
            else
1439
0
            {
1440
0
                for (int i = 0; i < (m_param->internalCsp != X265_CSP_I400 ? 3 : 1); i++)
1441
0
                    saoParam->ctuParam[i][cuAddr].reset();
1442
0
            }
1443
0
        }
1444
1445
        // final coding (bitstream generation) for this CU
1446
0
        m_entropyCoder.encodeCTU(*ctu, m_cuGeoms[m_ctuGeomMap[cuAddr]]);
1447
1448
0
        if (m_param->bEnableWavefront)
1449
0
        {
1450
0
            if (col == 1)
1451
                // Store probabilities of second CTU in line into buffer
1452
0
                m_rows[row].bufferedEntropy.loadContexts(m_entropyCoder);
1453
1454
0
            if (col == widthInLCUs - 1)
1455
0
                m_entropyCoder.finishSlice();
1456
0
        }
1457
0
    }
1458
1459
0
    if (!m_param->bEnableWavefront)
1460
0
        m_entropyCoder.finishSlice();
1461
0
}
1462
1463
void FrameEncoder::processRow(int row, int threadId, int layer)
1464
0
{
1465
0
    int64_t startTime = x265_mdate();
1466
0
    if (ATOMIC_INC(&m_activeWorkerCount) == 1 && m_stallStartTime[layer])
1467
0
        m_totalNoWorkerTime[layer] += x265_mdate() - m_stallStartTime[layer];
1468
1469
0
    const uint32_t realRow = m_idx_to_row[row >> 1];
1470
0
    const uint32_t typeNum = m_idx_to_row[row & 1];
1471
1472
0
    if (!typeNum)
1473
0
        processRowEncoder(realRow, m_tld[threadId], layer);
1474
0
    else
1475
0
    {
1476
0
        m_frameFilter.processRow(realRow, layer);
1477
1478
        // NOTE: Active next row
1479
0
        if (realRow != m_sliceBaseRow[m_rows[realRow].sliceId + 1] - 1)
1480
0
            enqueueRowFilter(m_row_to_idx[realRow + 1]);
1481
0
    }
1482
1483
0
    if (ATOMIC_DEC(&m_activeWorkerCount) == 0)
1484
0
        m_stallStartTime[layer] = x265_mdate();
1485
1486
0
    m_totalWorkerElapsedTime[layer] += x265_mdate() - startTime; // not thread safe, but good enough
1487
0
}
1488
1489
// Called by worker threads
1490
void FrameEncoder::processRowEncoder(int intRow, ThreadLocalData& tld, int layer)
1491
0
{
1492
0
    const uint32_t row = (uint32_t)intRow;
1493
0
    CTURow& curRow = m_rows[row];
1494
1495
0
    if (m_param->bEnableWavefront)
1496
0
    {
1497
0
        ScopedLock self(curRow.lock);
1498
0
        if (!curRow.active)
1499
            /* VBV restart is in progress, exit out */
1500
0
            return;
1501
0
        if (curRow.busy)
1502
0
        {
1503
            /* On multi-socket Windows servers, we have seen problems with
1504
             * ATOMIC_CAS which resulted in multiple worker threads processing
1505
             * the same CU row, which often resulted in bad pointer accesses. We
1506
             * believe the problem is fixed, but are leaving this check in place
1507
             * to prevent crashes in case it is not */
1508
0
            x265_log(m_param, X265_LOG_WARNING,
1509
0
                     "internal error - simultaneous row access detected. Please report HW to x265-devel@videolan.org\n");
1510
0
            return;
1511
0
        }
1512
0
        curRow.busy = true;
1513
0
    }
1514
1515
    /* When WPP is enabled, every row has its own row coder instance. Otherwise
1516
     * they share row 0 */
1517
0
    Entropy& rowCoder = m_param->bEnableWavefront ? curRow.rowGoOnCoder : m_rows[0].rowGoOnCoder;
1518
0
    FrameData& curEncData = *m_frame[layer]->m_encData;
1519
0
    Slice *slice = curEncData.m_slice;
1520
1521
0
    const uint32_t numCols = m_numCols;
1522
0
    const uint32_t lineStartCUAddr = row * numCols;
1523
0
    bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
1524
1525
0
    const uint32_t sliceId = curRow.sliceId;
1526
0
    uint32_t maxBlockCols = (m_frame[layer]->m_fencPic->m_picWidth + (16 - 1)) / 16;
1527
0
    uint32_t noOfBlocks = m_param->maxCUSize / 16;
1528
0
    const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row - 1].sliceId != curRow.sliceId)) ? 1 : 0;
1529
0
    const uint32_t bLastRowInSlice = ((row == m_numRows - 1) || (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0;
1530
0
    const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1];
1531
0
    const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];
1532
1533
    // Load SBAC coder context from previous row and initialize row state.
1534
0
    if (bFirstRowInSlice && !curRow.completed)        
1535
0
        rowCoder.load(m_initSliceContext);     
1536
1537
    // calculate mean QP for consistent deltaQP signalling calculation
1538
0
    if (m_param->bOptCUDeltaQP)
1539
0
    {
1540
0
        ScopedLock self(curRow.lock);
1541
0
        if (!curRow.avgQPComputed)
1542
0
        {
1543
0
            if (m_param->bEnableWavefront || !row)
1544
0
            {
1545
0
                double meanQPOff = 0;
1546
0
                bool isReferenced = IS_REFERENCED(m_frame[layer]);
1547
0
                double *qpoffs = (isReferenced && m_param->rc.cuTree) ? m_frame[layer]->m_lowres.qpCuTreeOffset : m_frame[layer]->m_lowres.qpAqOffset;
1548
0
                if (qpoffs)
1549
0
                {
1550
0
                    uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;
1551
1552
0
                    uint32_t cuYStart = 0, height = m_frame[layer]->m_fencPic->m_picHeight;
1553
0
                    if (m_param->bEnableWavefront)
1554
0
                    {
1555
0
                        cuYStart = intRow * m_param->maxCUSize;
1556
0
                        height = cuYStart + m_param->maxCUSize;
1557
0
                    }
1558
1559
0
                    uint32_t qgSize = m_param->rc.qgSize, width = m_frame[layer]->m_fencPic->m_picWidth;
1560
0
                    uint32_t maxOffsetCols = (m_frame[layer]->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr;
1561
0
                    uint32_t count = 0;
1562
0
                    for (uint32_t cuY = cuYStart; cuY < height && (cuY < m_frame[layer]->m_fencPic->m_picHeight); cuY += qgSize)
1563
0
                    {
1564
0
                        for (uint32_t cuX = 0; cuX < width; cuX += qgSize)
1565
0
                        {
1566
0
                            double qp_offset = 0;
1567
0
                            uint32_t cnt = 0;
1568
1569
0
                            for (uint32_t block_yy = cuY; block_yy < cuY + qgSize && block_yy < m_frame[layer]->m_fencPic->m_picHeight; block_yy += loopIncr)
1570
0
                            {
1571
0
                                for (uint32_t block_xx = cuX; block_xx < cuX + qgSize && block_xx < width; block_xx += loopIncr)
1572
0
                                {
1573
0
                                    int idx = ((block_yy / loopIncr) * (maxOffsetCols)) + (block_xx / loopIncr);
1574
0
                                    qp_offset += qpoffs[idx];
1575
0
                                    cnt++;
1576
0
                                }
1577
0
                            }
1578
0
                            qp_offset /= cnt;
1579
0
                            meanQPOff += qp_offset;
1580
0
                            count++;
1581
0
                        }
1582
0
                    }
1583
0
                    meanQPOff /= count;
1584
0
                }
1585
0
                rowCoder.m_meanQP = slice->m_sliceQp + meanQPOff;
1586
0
            }
1587
0
            else
1588
0
            {
1589
0
                rowCoder.m_meanQP = m_rows[0].rowGoOnCoder.m_meanQP;
1590
0
            }
1591
0
            curRow.avgQPComputed = 1;
1592
0
        }
1593
0
    }
1594
1595
    // Initialize restrict on MV range in slices
1596
0
    tld.analysis.m_sliceMinY = -(int32_t)(rowInSlice * m_param->maxCUSize * 4) + 3 * 4;
1597
0
    tld.analysis.m_sliceMaxY = (int32_t)((endRowInSlicePlus1 - 1 - row) * (m_param->maxCUSize * 4) - 4 * 4);
1598
1599
    // Handle single row slice
1600
0
    if (tld.analysis.m_sliceMaxY < tld.analysis.m_sliceMinY)
1601
0
        tld.analysis.m_sliceMaxY = tld.analysis.m_sliceMinY = 0;
1602
1603
1604
0
    while (curRow.completed < numCols)
1605
0
    {
1606
0
        ProfileScopeEvent(encodeCTU);
1607
1608
0
        const uint32_t col = curRow.completed;
1609
0
        const uint32_t cuAddr = lineStartCUAddr + col;
1610
0
        CUData* ctu = curEncData.getPicCTU(cuAddr);
1611
0
        const uint32_t bLastCuInSlice = (bLastRowInSlice & (col == numCols - 1)) ? 1 : 0;
1612
0
        ctu->initCTU(*m_frame[layer], cuAddr, slice->m_sliceQp, bFirstRowInSlice, bLastRowInSlice, bLastCuInSlice);
1613
1614
0
        if (!layer && bIsVbv)
1615
0
        {
1616
0
            if (col == 0 && !m_param->bEnableWavefront)
1617
0
            {
1618
0
                m_backupStreams[0].copyBits(&m_outStreams[0]);
1619
0
                curRow.bufferedEntropy.copyState(rowCoder);
1620
0
                curRow.bufferedEntropy.loadContexts(rowCoder);
1621
0
            }
1622
0
            if (bFirstRowInSlice && m_vbvResetTriggerRow[curRow.sliceId] != intRow)
1623
0
            {
1624
0
                curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc;
1625
0
                curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(curEncData.m_avgQpRc);
1626
0
            }
1627
1628
0
            FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
1629
0
            if (m_param->bEnableWavefront && rowInSlice >= col && !bFirstRowInSlice && m_vbvResetTriggerRow[curRow.sliceId] != intRow)
1630
0
                cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;
1631
0
            else if (!m_param->bEnableWavefront && !bFirstRowInSlice && m_vbvResetTriggerRow[curRow.sliceId] != intRow)
1632
0
                cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;
1633
0
            else
1634
0
                cuStat.baseQp = curEncData.m_rowStat[row].rowQp;
1635
1636
            /* TODO: use defines from slicetype.h for lowres block size */
1637
0
            uint32_t block_y = (ctu->m_cuPelY >> m_param->maxLog2CUSize) * noOfBlocks;
1638
0
            uint32_t block_x = (ctu->m_cuPelX >> m_param->maxLog2CUSize) * noOfBlocks;
1639
0
            if (!strlen(m_param->analysisLoad) || !m_param->bDisableLookahead)
1640
0
            {
1641
0
                cuStat.vbvCost = 0;
1642
0
                cuStat.intraVbvCost = 0;
1643
1644
0
                for (uint32_t h = 0; h < noOfBlocks && block_y < m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++)
1645
0
                {
1646
0
                    uint32_t idx = block_x + (block_y * maxBlockCols);
1647
1648
0
                    for (uint32_t w = 0; w < noOfBlocks && (block_x + w) < maxBlockCols; w++, idx++)
1649
0
                    {
1650
0
                        cuStat.vbvCost += m_frame[layer]->m_lowres.lowresCostForRc[idx] & LOWRES_COST_MASK;
1651
0
                        cuStat.intraVbvCost += m_frame[layer]->m_lowres.intraCost[idx];
1652
0
                    }
1653
0
                }
1654
0
            }
1655
0
        }
1656
0
        else
1657
0
            curEncData.m_cuStat[cuAddr].baseQp = curEncData.m_avgQpRc;
1658
1659
0
        if (m_param->bEnableWavefront && !col && !bFirstRowInSlice)
1660
0
        {
1661
            // Load SBAC coder context from previous row and initialize row state.
1662
0
            rowCoder.copyState(m_initSliceContext);
1663
0
            rowCoder.loadContexts(m_rows[row - 1].bufferedEntropy);
1664
0
        }
1665
0
        if (m_param->dynamicRd && (int32_t)(m_rce.qpaRc - m_rce.qpNoVbv) > 0)
1666
0
            ctu->m_vbvAffected = true;
1667
1668
        // Does all the CU analysis, returns best top level mode decision
1669
0
        Mode& best = tld.analysis.compressCTU(*ctu, *m_frame[layer], m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
1670
1671
        /* startPoint > encodeOrder is true when the start point changes for
1672
        a new GOP but few frames from the previous GOP is still incomplete.
1673
        The data of frames in this interval will not be used by any future frames. */
1674
0
        if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame[layer]->m_encodeOrder)
1675
0
            collectDynDataRow(*ctu, &curRow.rowStats);
1676
1677
        // take a sample of the current active worker count
1678
0
        ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount);
1679
0
        ATOMIC_INC(&m_activeWorkerCountSamples);
1680
1681
        /* advance top-level row coder to include the context of this CTU.
1682
         * if SAO is disabled, rowCoder writes the final CTU bitstream */
1683
0
        rowCoder.encodeCTU(*ctu, m_cuGeoms[m_ctuGeomMap[cuAddr]]);
1684
1685
0
        if (m_param->bEnableWavefront && col == 1)
1686
            // Save CABAC state for next row
1687
0
            curRow.bufferedEntropy.loadContexts(rowCoder);
1688
1689
        /* SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas */
1690
0
        if (slice->m_bUseSao && m_param->bSaoNonDeblocked)
1691
0
            m_frameFilter.m_parallelFilter[row].m_sao.calcSaoStatsCu_BeforeDblk(m_frame[layer], col, row);
1692
1693
        /* Deblock with idle threading */
1694
0
        if (m_param->bEnableLoopFilter | slice->m_bUseSao)
1695
0
        {
1696
            // NOTE: in VBV mode, we may reencode anytime, so we can't do Deblock stage-Horizon and SAO
1697
0
            if (!bIsVbv)
1698
0
            {
1699
                // Delay one row to avoid intra prediction conflict
1700
0
                if (m_pool && !bFirstRowInSlice)
1701
0
                {                    
1702
0
                    int allowCol = col;
1703
1704
                    // avoid race condition on last column
1705
0
                    if (rowInSlice >= 2)
1706
0
                    {
1707
0
                        allowCol = X265_MIN(((col == numCols - 1) ? m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.get()
1708
0
                                                                  : m_frameFilter.m_parallelFilter[row - 2].m_lastCol.get()), (int)col);
1709
0
                    }
1710
0
                    m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(allowCol);
1711
0
                }
1712
1713
                // Last Row may start early
1714
0
                if (m_pool && bLastRowInSlice)
1715
0
                {
1716
                    // Deblocking last row
1717
0
                    int allowCol = col;
1718
1719
                    // avoid race condition on last column
1720
0
                    if (rowInSlice >= 2)
1721
0
                    {
1722
0
                        allowCol = X265_MIN(((col == numCols - 1) ? m_frameFilter.m_parallelFilter[row - 1].m_lastDeblocked.get()
1723
0
                                                                  : m_frameFilter.m_parallelFilter[row - 1].m_lastCol.get()), (int)col);
1724
0
                    }
1725
0
                    m_frameFilter.m_parallelFilter[row].m_allowedCol.set(allowCol);
1726
0
                }
1727
0
            } // end of !bIsVbv
1728
0
        }
1729
        // Both Loopfilter and SAO Disabled
1730
0
        else
1731
0
        {
1732
0
            m_frameFilter.m_parallelFilter[row].processPostCu(col);
1733
0
        }
1734
1735
        // Completed CU processing
1736
0
        curRow.completed++;
1737
1738
0
        FrameStats frameLog;
1739
0
        curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu, &frameLog);
1740
1741
        // copy number of intra, inter cu per row into frame stats for 2 pass
1742
0
        if (m_param->rc.bStatWrite)
1743
0
        {
1744
0
            curRow.rowStats.mvBits    += best.mvBits;
1745
0
            curRow.rowStats.coeffBits += best.coeffBits;
1746
0
            curRow.rowStats.miscBits  += best.totalBits - (best.mvBits + best.coeffBits);
1747
1748
0
            for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
1749
0
            {
1750
                /* 1 << shift == number of 8x8 blocks at current depth */
1751
0
                int shift = 2 * (m_param->maxCUDepth - depth);
1752
0
                int cuSize = m_param->maxCUSize >> depth;
1753
1754
0
                curRow.rowStats.intra8x8Cnt += (cuSize == 8) ? (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) :
1755
0
                                                               (int)(frameLog.cntIntra[depth] << shift);
1756
1757
0
                curRow.rowStats.inter8x8Cnt += (int)(frameLog.cntInter[depth] << shift);
1758
0
                curRow.rowStats.skip8x8Cnt += (int)((frameLog.cntSkipCu[depth] + frameLog.cntMergeCu[depth]) << shift);
1759
0
            }
1760
0
        }
1761
0
        curRow.rowStats.totalCtu++;
1762
0
        curRow.rowStats.lumaDistortion   += best.lumaDistortion;
1763
0
        curRow.rowStats.chromaDistortion += best.chromaDistortion;
1764
0
        curRow.rowStats.psyEnergy        += best.psyEnergy;
1765
0
        curRow.rowStats.ssimEnergy       += best.ssimEnergy;
1766
0
        curRow.rowStats.resEnergy        += best.resEnergy;
1767
0
        curRow.rowStats.cntIntraNxN      += frameLog.cntIntraNxN;
1768
0
        curRow.rowStats.totalCu          += frameLog.totalCu;
1769
0
        for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
1770
0
        {
1771
0
            curRow.rowStats.cntSkipCu[depth] += frameLog.cntSkipCu[depth];
1772
0
            curRow.rowStats.cntMergeCu[depth] += frameLog.cntMergeCu[depth];
1773
0
            for (int m = 0; m < INTER_MODES; m++)
1774
0
                curRow.rowStats.cuInterDistribution[depth][m] += frameLog.cuInterDistribution[depth][m];
1775
0
            for (int n = 0; n < INTRA_MODES; n++)
1776
0
                curRow.rowStats.cuIntraDistribution[depth][n] += frameLog.cuIntraDistribution[depth][n];
1777
0
        }
1778
1779
0
        curEncData.m_cuStat[cuAddr].totalBits = best.totalBits;
1780
0
        x265_emms();
1781
1782
0
        if (!layer && bIsVbv)
1783
0
        {   
1784
            // Update encoded bits, satdCost, baseQP for each CU if tune grain is disabled
1785
0
            FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];    
1786
0
            if ((m_param->bEnableWavefront && ((cuAddr == m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)
1787
0
            {
1788
0
                curEncData.m_rowStat[row].rowSatd += cuStat.vbvCost;
1789
0
                curEncData.m_rowStat[row].rowIntraSatd += cuStat.intraVbvCost;
1790
0
                curEncData.m_rowStat[row].encodedBits += cuStat.totalBits;
1791
0
                curEncData.m_rowStat[row].sumQpRc += cuStat.baseQp;
1792
0
                curEncData.m_rowStat[row].numEncodedCUs = cuAddr;
1793
0
            }
1794
            
1795
            // If current block is at row end checkpoint, call vbv ratecontrol.
1796
0
            if (!m_param->bEnableWavefront && col == numCols - 1)
1797
0
            {
1798
0
                double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
1799
0
                curRow.reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame[layer], row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
1800
0
                qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase);
1801
0
                curEncData.m_rowStat[row].rowQp = qpBase;
1802
0
                curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase);
1803
0
                if (curRow.reEncode < 0)
1804
0
                {
1805
0
                    x265_log(m_param, X265_LOG_DEBUG, "POC %d row %d - encode restart required for VBV, to %.2f from %.2f\n",
1806
0
                        m_frame[layer]->m_poc, row, qpBase, curEncData.m_cuStat[cuAddr].baseQp);
1807
1808
0
                    m_vbvResetTriggerRow[curRow.sliceId] = row;
1809
0
                    m_outStreams[0].copyBits(&m_backupStreams[0]);
1810
1811
0
                    rowCoder.copyState(curRow.bufferedEntropy);
1812
0
                    rowCoder.loadContexts(curRow.bufferedEntropy);
1813
1814
0
                    curRow.completed = 0;
1815
0
                    memset(&curRow.rowStats, 0, sizeof(curRow.rowStats));
1816
0
                    curEncData.m_rowStat[row].numEncodedCUs = 0;
1817
0
                    curEncData.m_rowStat[row].encodedBits = 0;
1818
0
                    curEncData.m_rowStat[row].rowSatd = 0;
1819
0
                    curEncData.m_rowStat[row].rowIntraSatd = 0;
1820
0
                    curEncData.m_rowStat[row].sumQpRc = 0;
1821
0
                    curEncData.m_rowStat[row].sumQpAq = 0;
1822
0
                }
1823
0
            }
1824
            // If current block is at row diagonal checkpoint, call vbv ratecontrol.
1825
0
            else if (m_param->bEnableWavefront && rowInSlice == col && !bFirstRowInSlice)
1826
0
            {
1827
0
                if (m_param->rc.bEnableConstVbv)
1828
0
                {
1829
0
                    uint32_t startCuAddr = numCols * row;
1830
0
                    uint32_t EndCuAddr = startCuAddr + col;
1831
1832
0
                    for (int32_t r = row; r >= (int32_t)m_sliceBaseRow[sliceId]; r--)
1833
0
                    {
1834
0
                        for (uint32_t c = startCuAddr; c <= EndCuAddr && c <= numCols * (r + 1) - 1; c++)
1835
0
                        {
1836
0
                            curEncData.m_rowStat[r].rowSatd += curEncData.m_cuStat[c].vbvCost;
1837
0
                            curEncData.m_rowStat[r].rowIntraSatd += curEncData.m_cuStat[c].intraVbvCost;
1838
0
                            curEncData.m_rowStat[r].encodedBits += curEncData.m_cuStat[c].totalBits;
1839
0
                            curEncData.m_rowStat[r].sumQpRc += curEncData.m_cuStat[c].baseQp;
1840
0
                            curEncData.m_rowStat[r].numEncodedCUs = c;
1841
0
                        }
1842
0
                        if (curRow.reEncode < 0)
1843
0
                            break;
1844
0
                        startCuAddr = EndCuAddr - numCols;
1845
0
                        EndCuAddr = startCuAddr + 1;
1846
0
                    }
1847
0
                }
1848
0
                double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
1849
0
                curRow.reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame[layer], row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
1850
0
                qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase);
1851
0
                curEncData.m_rowStat[row].rowQp = qpBase;
1852
0
                curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase);
1853
1854
0
                if (curRow.reEncode < 0)
1855
0
                {
1856
0
                    x265_log(m_param, X265_LOG_DEBUG, "POC %d row %d - encode restart required for VBV, to %.2f from %.2f\n",
1857
0
                             m_frame[layer]->m_poc, row, qpBase, curEncData.m_cuStat[cuAddr].baseQp);
1858
1859
                    // prevent the WaveFront::findJob() method from providing new jobs
1860
0
                    m_vbvResetTriggerRow[curRow.sliceId] = row;
1861
0
                    m_bAllRowsStop[curRow.sliceId] = true;
1862
1863
0
                    for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1; r >= row; r--)
1864
0
                    {
1865
0
                        CTURow& stopRow = m_rows[r];
1866
1867
0
                        if (r != row)
1868
0
                        {
1869
                            /* if row was active (ready to be run) clear active bit and bitmap bit for this row */
1870
0
                            stopRow.lock.acquire();
1871
0
                            while (stopRow.active)
1872
0
                            {
1873
0
                                if (dequeueRow(m_row_to_idx[r] * 2))
1874
0
                                    stopRow.active = false;
1875
0
                                else
1876
0
                                {
1877
                                    /* we must release the row lock to allow the thread to exit */
1878
0
                                    stopRow.lock.release();
1879
0
                                    GIVE_UP_TIME();
1880
0
                                    stopRow.lock.acquire();
1881
0
                                }
1882
0
                            }
1883
0
                            stopRow.lock.release();
1884
1885
0
                            bool bRowBusy = true;
1886
0
                            do
1887
0
                            {
1888
0
                                stopRow.lock.acquire();
1889
0
                                bRowBusy = stopRow.busy;
1890
0
                                stopRow.lock.release();
1891
1892
0
                                if (bRowBusy)
1893
0
                                {
1894
0
                                    GIVE_UP_TIME();
1895
0
                                }
1896
0
                            }
1897
0
                            while (bRowBusy);
1898
0
                        }
1899
1900
0
                        m_outStreams[r].resetBits();
1901
0
                        stopRow.completed = 0;
1902
0
                        memset(&stopRow.rowStats, 0, sizeof(stopRow.rowStats));
1903
0
                        curEncData.m_rowStat[r].numEncodedCUs = 0;
1904
0
                        curEncData.m_rowStat[r].encodedBits = 0;
1905
0
                        curEncData.m_rowStat[r].rowSatd = 0;
1906
0
                        curEncData.m_rowStat[r].rowIntraSatd = 0;
1907
0
                        curEncData.m_rowStat[r].sumQpRc = 0;
1908
0
                        curEncData.m_rowStat[r].sumQpAq = 0;
1909
0
                    }
1910
1911
0
                    m_bAllRowsStop[curRow.sliceId] = false;
1912
0
                }
1913
0
            }
1914
0
        }
1915
1916
0
        if (m_param->bEnableWavefront && curRow.completed >= 2 && !bLastRowInSlice &&
1917
0
            (!m_bAllRowsStop[curRow.sliceId] || intRow + 1 < m_vbvResetTriggerRow[curRow.sliceId]))
1918
0
        {
1919
            /* activate next row */
1920
0
            ScopedLock below(m_rows[row + 1].lock);
1921
1922
0
            if (m_rows[row + 1].active == false &&
1923
0
                m_rows[row + 1].completed + 2 <= curRow.completed)
1924
0
            {
1925
0
                m_rows[row + 1].active = true;
1926
0
                enqueueRowEncoder(m_row_to_idx[row + 1]);
1927
0
                tryWakeOne(); /* wake up a sleeping thread or set the help wanted flag */
1928
0
            }
1929
0
        }
1930
1931
0
        ScopedLock self(curRow.lock);
1932
0
        if ((m_bAllRowsStop[curRow.sliceId] && intRow > m_vbvResetTriggerRow[curRow.sliceId]) ||
1933
0
            (!bFirstRowInSlice && ((curRow.completed < numCols - 1) || (m_rows[row - 1].completed < numCols)) && m_rows[row - 1].completed < curRow.completed + 2))
1934
0
        {
1935
0
            curRow.active = false;
1936
0
            curRow.busy = false;
1937
0
            ATOMIC_INC(&m_countRowBlocks);
1938
0
            return;
1939
0
        }
1940
0
    }
1941
1942
    /* this row of CTUs has been compressed */
1943
0
    if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv)
1944
0
    {
1945
0
        if (bLastRowInSlice)       
1946
0
        {
1947
0
            for (uint32_t r = m_sliceBaseRow[sliceId]; r < m_sliceBaseRow[sliceId + 1]; r++)
1948
0
            {
1949
0
                for (uint32_t c = curEncData.m_rowStat[r].numEncodedCUs + 1; c < numCols * (r + 1); c++)
1950
0
                {
1951
0
                    curEncData.m_rowStat[r].rowSatd += curEncData.m_cuStat[c].vbvCost;
1952
0
                    curEncData.m_rowStat[r].rowIntraSatd += curEncData.m_cuStat[c].intraVbvCost;
1953
0
                    curEncData.m_rowStat[r].encodedBits += curEncData.m_cuStat[c].totalBits;
1954
0
                    curEncData.m_rowStat[r].sumQpRc += curEncData.m_cuStat[c].baseQp;
1955
0
                    curEncData.m_rowStat[r].numEncodedCUs = c;
1956
0
                }
1957
0
            }
1958
0
        }
1959
0
    }
1960
1961
    /* If encoding with ABR, update update bits and complexity in rate control
1962
     * after a number of rows so the next frame's rateControlStart has more
1963
     * accurate data for estimation. At the start of the encode we update stats
1964
     * after half the frame is encoded, but after this initial period we update
1965
     * after refLagRows (the number of rows reference frames must have completed
1966
     * before referencees may begin encoding) */
1967
0
    if ((!layer) && (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv))
1968
0
    {
1969
0
        uint32_t rowCount = 0;
1970
0
        uint32_t maxRows = m_sliceBaseRow[sliceId + 1] - m_sliceBaseRow[sliceId];
1971
1972
0
        if (!m_rce.encodeOrder)
1973
0
            rowCount = maxRows - 1; 
1974
0
        else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum / m_param->fpsDenom))
1975
0
            rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1);
1976
0
        else
1977
0
            rowCount = X265_MIN(m_refLagRows / m_param->maxSlices, maxRows - 1);
1978
1979
0
        if (rowInSlice == rowCount)
1980
0
        {
1981
0
            m_rowSliceTotalBits[sliceId] = 0;
1982
0
            if (bIsVbv && !(m_param->rc.bEnableConstVbv && m_param->bEnableWavefront))
1983
0
            {
1984
0
                for (uint32_t i = m_sliceBaseRow[sliceId]; i < rowCount + m_sliceBaseRow[sliceId]; i++)
1985
0
                    m_rowSliceTotalBits[sliceId] += curEncData.m_rowStat[i].encodedBits;
1986
0
            }
1987
0
            else
1988
0
            {
1989
0
                uint32_t startAddr = m_sliceBaseRow[sliceId] * numCols;
1990
0
                uint32_t finishAddr = startAddr + rowCount * numCols;
1991
                
1992
0
                for (uint32_t cuAddr = startAddr; cuAddr < finishAddr; cuAddr++)
1993
0
                    m_rowSliceTotalBits[sliceId] += curEncData.m_cuStat[cuAddr].totalBits;
1994
0
            }
1995
1996
0
            if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices)
1997
0
            {
1998
0
                m_rce.rowTotalBits = 0;
1999
0
                for (uint32_t i = 0; i < m_param->maxSlices; i++)
2000
0
                    m_rce.rowTotalBits += m_rowSliceTotalBits[i];
2001
0
                m_top->m_rateControl->rateControlUpdateStats(&m_rce);
2002
0
            }
2003
0
        }
2004
0
    }
2005
2006
    /* flush row bitstream (if WPP and no SAO) or flush frame if no WPP and no SAO */
2007
    /* end_of_sub_stream_one_bit / end_of_slice_segment_flag */
2008
0
       if (!slice->m_bUseSao && (m_param->bEnableWavefront || bLastRowInSlice))
2009
0
               rowCoder.finishSlice();
2010
2011
2012
    /* Processing left Deblock block with current threading */
2013
0
    if ((m_param->bEnableLoopFilter | slice->m_bUseSao) & (rowInSlice >= 2))
2014
0
    {
2015
        /* Check conditional to start previous row process with current threading */
2016
0
        if (m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.get() == (int)numCols)
2017
0
        {
2018
            /* stop threading on current row and restart it */
2019
0
            m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(numCols);
2020
0
            m_frameFilter.m_parallelFilter[row - 1].processTasks(-1);
2021
0
        }
2022
0
    }
2023
2024
    /* trigger row-wise loop filters */
2025
0
    if (m_param->bEnableWavefront)
2026
0
    {
2027
0
        if (rowInSlice >= m_filterRowDelay)
2028
0
        {
2029
0
            enableRowFilter(m_row_to_idx[row - m_filterRowDelay]);
2030
2031
            /* NOTE: Activate filter if first row (row 0) */
2032
0
            if (rowInSlice == m_filterRowDelay)
2033
0
                enqueueRowFilter(m_row_to_idx[row - m_filterRowDelay]);
2034
0
            tryWakeOne();
2035
0
        }
2036
2037
0
        if (bLastRowInSlice)
2038
0
        {
2039
0
            for (uint32_t i = endRowInSlicePlus1 - m_filterRowDelay; i < endRowInSlicePlus1; i++)
2040
0
            {
2041
0
                enableRowFilter(m_row_to_idx[i]);
2042
0
            }
2043
0
            tryWakeOne();
2044
0
        }
2045
2046
        // handle specially case - single row slice
2047
0
        if  (bFirstRowInSlice & bLastRowInSlice)
2048
0
        {
2049
0
            enqueueRowFilter(m_row_to_idx[row]);
2050
0
            tryWakeOne();
2051
0
        }
2052
0
    }
2053
2054
0
    curRow.busy = false;
2055
2056
    // CHECK_ME: Does it always FALSE condition?
2057
0
    if (ATOMIC_INC(&m_completionCount) == 2 * (int)m_numRows)
2058
0
        m_completionEvent.trigger();
2059
0
}
2060
2061
void FrameEncoder::collectDynDataRow(CUData& ctu, FrameStats* rowStats)
2062
0
{
2063
0
    for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
2064
0
    {
2065
0
        for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
2066
0
        {
2067
0
            int offset = (depth * X265_REFINE_INTER_LEVELS) + i;
2068
0
            if (ctu.m_collectCUCount[offset])
2069
0
            {
2070
0
                rowStats->rowVarDyn[offset] += ctu.m_collectCUVariance[offset];
2071
0
                rowStats->rowRdDyn[offset] += ctu.m_collectCURd[offset];
2072
0
                rowStats->rowCntDyn[offset] += ctu.m_collectCUCount[offset];
2073
0
            }
2074
0
        }
2075
0
    }
2076
0
}
2077
2078
void FrameEncoder::collectDynDataFrame(int layer)
2079
0
{
2080
0
    for (uint32_t row = 0; row < m_numRows; row++)
2081
0
    {
2082
0
        for (uint32_t refLevel = 0; refLevel < X265_REFINE_INTER_LEVELS; refLevel++)
2083
0
        {
2084
0
            for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
2085
0
            {
2086
0
                int offset = (depth * X265_REFINE_INTER_LEVELS) + refLevel;
2087
0
                int curFrameIndex = m_frame[layer]->m_encodeOrder - m_top->m_startPoint;
2088
0
                int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
2089
0
                if (m_rows[row].rowStats.rowCntDyn[offset])
2090
0
                {
2091
0
                    m_top->m_variance[index] += m_rows[row].rowStats.rowVarDyn[offset];
2092
0
                    m_top->m_rdCost[index] += m_rows[row].rowStats.rowRdDyn[offset];
2093
0
                    m_top->m_trainingCount[index] += m_rows[row].rowStats.rowCntDyn[offset];
2094
0
                }
2095
0
            }
2096
0
        }
2097
0
    }
2098
0
}
2099
2100
void FrameEncoder::computeAvgTrainingData(int layer)
2101
0
{
2102
0
    if (m_frame[layer]->m_lowres.bScenecut || m_frame[layer]->m_lowres.bKeyframe)
2103
0
    {
2104
0
        m_top->m_startPoint = m_frame[layer]->m_encodeOrder;
2105
0
        int size = (m_param->keyframeMax + m_param->lookaheadDepth) * m_param->maxCUDepth * X265_REFINE_INTER_LEVELS;
2106
0
        memset(m_top->m_variance, 0, size * sizeof(uint64_t));
2107
0
        memset(m_top->m_rdCost, 0, size * sizeof(uint64_t));
2108
0
        memset(m_top->m_trainingCount, 0, size * sizeof(uint32_t));
2109
0
    }
2110
0
    if (m_frame[layer]->m_encodeOrder - m_top->m_startPoint < 2 * m_param->frameNumThreads)
2111
0
        m_frame[layer]->m_classifyFrame = false;
2112
0
    else
2113
0
        m_frame[layer]->m_classifyFrame = true;
2114
2115
0
    int size = m_param->maxCUDepth * X265_REFINE_INTER_LEVELS;
2116
0
    memset(m_frame[layer]->m_classifyRd, 0, size * sizeof(uint64_t));
2117
0
    memset(m_frame[layer]->m_classifyVariance, 0, size * sizeof(uint64_t));
2118
0
    memset(m_frame[layer]->m_classifyCount, 0, size * sizeof(uint32_t));
2119
0
    if (m_frame[layer]->m_classifyFrame)
2120
0
    {
2121
0
        uint32_t limit = m_frame[layer]->m_encodeOrder - m_top->m_startPoint - m_param->frameNumThreads;
2122
0
        for (uint32_t i = 1; i < limit; i++)
2123
0
        {
2124
0
            for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++)
2125
0
            {
2126
0
                for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
2127
0
                {
2128
0
                    int offset = (depth * X265_REFINE_INTER_LEVELS) + j;
2129
0
                    int index = (i* X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
2130
0
                    if (m_top->m_trainingCount[index])
2131
0
                    {
2132
0
                        m_frame[layer]->m_classifyRd[offset] += m_top->m_rdCost[index] / m_top->m_trainingCount[index];
2133
0
                        m_frame[layer]->m_classifyVariance[offset] += m_top->m_variance[index] / m_top->m_trainingCount[index];
2134
0
                        m_frame[layer]->m_classifyCount[offset] += m_top->m_trainingCount[index];
2135
0
                    }
2136
0
                }
2137
0
            }
2138
0
        }
2139
        /* Calculates the average feature values of historic frames that are being considered for the current frame */
2140
0
        int historyCount = m_frame[layer]->m_encodeOrder - m_param->frameNumThreads - m_top->m_startPoint - 1;
2141
0
        if (historyCount)
2142
0
        {
2143
0
            for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++)
2144
0
            {
2145
0
                for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
2146
0
                {
2147
0
                    int offset = (depth * X265_REFINE_INTER_LEVELS) + j;
2148
0
                    m_frame[layer]->m_classifyRd[offset] /= historyCount;
2149
0
                    m_frame[layer]->m_classifyVariance[offset] /= historyCount;
2150
0
                }
2151
0
            }
2152
0
        }
2153
0
    }
2154
0
}
2155
2156
/* collect statistics about CU coding decisions, return total QP */
2157
int FrameEncoder::collectCTUStatistics(const CUData& ctu, FrameStats* log)
2158
0
{
2159
0
    int totQP = 0;
2160
0
    uint32_t depth = 0;
2161
0
    for (uint32_t absPartIdx = 0; absPartIdx < ctu.m_numPartitions; absPartIdx += ctu.m_numPartitions >> (depth * 2))
2162
0
    {
2163
0
        depth = ctu.m_cuDepth[absPartIdx];
2164
0
        totQP += ctu.m_qp[absPartIdx] * (ctu.m_numPartitions >> (depth * 2));
2165
0
    }
2166
2167
0
    if (m_param->csvLogLevel >= 1 || m_param->rc.bStatWrite)
2168
0
    {
2169
0
        if (ctu.m_slice->m_sliceType == I_SLICE)
2170
0
        {
2171
0
            depth = 0;
2172
0
            for (uint32_t absPartIdx = 0; absPartIdx < ctu.m_numPartitions; absPartIdx += ctu.m_numPartitions >> (depth * 2))
2173
0
            {
2174
0
                depth = ctu.m_cuDepth[absPartIdx];
2175
2176
0
                log->totalCu++;
2177
0
                log->cntIntra[depth]++;
2178
2179
0
                if (ctu.m_predMode[absPartIdx] == MODE_NONE)
2180
0
                {
2181
0
                    log->totalCu--;
2182
0
                    log->cntIntra[depth]--;
2183
0
                }
2184
0
                else if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
2185
0
                {
2186
                    /* TODO: log intra modes at absPartIdx +0 to +3 */
2187
0
                    X265_CHECK(ctu.m_log2CUSize[absPartIdx] == 3 && ctu.m_slice->m_sps->quadtreeTULog2MinSize < 3, "Intra NxN found at improbable depth\n");
2188
0
                    log->cntIntraNxN++;
2189
0
                    log->cntIntra[depth]--;
2190
0
                }
2191
0
                else if (ctu.m_lumaIntraDir[absPartIdx] > 1)
2192
0
                    log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
2193
0
                else
2194
0
                    log->cuIntraDistribution[depth][ctu.m_lumaIntraDir[absPartIdx]]++;
2195
0
            }
2196
0
        }
2197
0
        else
2198
0
        {
2199
0
            depth = 0;
2200
0
            for (uint32_t absPartIdx = 0; absPartIdx < ctu.m_numPartitions; absPartIdx += ctu.m_numPartitions >> (depth * 2))
2201
0
            {
2202
0
                depth = ctu.m_cuDepth[absPartIdx];
2203
2204
0
                log->totalCu++;
2205
2206
0
                if (ctu.m_predMode[absPartIdx] == MODE_NONE)
2207
0
                    log->totalCu--;
2208
0
                else if (ctu.isSkipped(absPartIdx))
2209
0
                {
2210
0
                    if (ctu.m_mergeFlag[0])
2211
0
                        log->cntMergeCu[depth]++;
2212
0
                    else
2213
0
                        log->cntSkipCu[depth]++;
2214
0
                }
2215
0
                else if (ctu.isInter(absPartIdx))
2216
0
                {
2217
0
                    log->cntInter[depth]++;
2218
2219
0
                    if (ctu.m_partSize[absPartIdx] < AMP_ID)
2220
0
                        log->cuInterDistribution[depth][ctu.m_partSize[absPartIdx]]++;
2221
0
                    else
2222
0
                        log->cuInterDistribution[depth][AMP_ID]++;
2223
0
                }
2224
0
                else if (ctu.isIntra(absPartIdx))
2225
0
                {
2226
0
                    log->cntIntra[depth]++;
2227
2228
0
                    if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
2229
0
                    {
2230
0
                        X265_CHECK(ctu.m_log2CUSize[absPartIdx] == 3 && ctu.m_slice->m_sps->quadtreeTULog2MinSize < 3, "Intra NxN found at improbable depth\n");
2231
0
                        log->cntIntraNxN++;
2232
0
                        log->cntIntra[depth]--;
2233
                        /* TODO: log intra modes at absPartIdx +0 to +3 */
2234
0
                    }
2235
0
                    else if (ctu.m_lumaIntraDir[absPartIdx] > 1)
2236
0
                        log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
2237
0
                    else
2238
0
                        log->cuIntraDistribution[depth][ctu.m_lumaIntraDir[absPartIdx]]++;
2239
0
                }
2240
0
            }
2241
0
        }
2242
0
    }
2243
2244
0
    return totQP;
2245
0
}
2246
2247
/* DCT-domain noise reduction / adaptive deadzone from libavcodec */
2248
void FrameEncoder::noiseReductionUpdate()
2249
0
{
2250
0
    static const uint32_t maxBlocksPerTrSize[4] = {1 << 18, 1 << 16, 1 << 14, 1 << 12};
2251
2252
0
    for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
2253
0
    {
2254
0
        int trSize = cat & 3;
2255
0
        int coefCount = 1 << ((trSize + 2) * 2);
2256
2257
0
        if (m_nr->nrCount[cat] > maxBlocksPerTrSize[trSize])
2258
0
        {
2259
0
            for (int i = 0; i < coefCount; i++)
2260
0
                m_nr->nrResidualSum[cat][i] >>= 1;
2261
0
            m_nr->nrCount[cat] >>= 1;
2262
0
        }
2263
2264
0
        int nrStrength = cat < 8 ? m_param->noiseReductionIntra : m_param->noiseReductionInter;
2265
0
        uint64_t scaledCount = (uint64_t)nrStrength * m_nr->nrCount[cat];
2266
2267
0
        for (int i = 0; i < coefCount; i++)
2268
0
        {
2269
0
            uint64_t value = scaledCount + m_nr->nrResidualSum[cat][i] / 2;
2270
0
            uint64_t denom = m_nr->nrResidualSum[cat][i] + 1;
2271
0
            m_nr->nrOffsetDenoise[cat][i] = (uint16_t)(value / denom);
2272
0
        }
2273
2274
        // Don't denoise DC coefficients
2275
0
        m_nr->nrOffsetDenoise[cat][0] = 0;
2276
0
    }
2277
0
}
2278
2279
void FrameEncoder::readModel(FilmGrainCharacteristics* m_filmGrain, FILE* filmgrain)
2280
0
{
2281
0
    char const* errorMessage = "Error reading FilmGrain characteristics\n";
2282
0
    FilmGrain m_fg;
2283
0
    x265_fread((char* )&m_fg, sizeof(bool) * 3 + sizeof(uint8_t), 1, filmgrain, errorMessage);
2284
0
    m_filmGrain->m_filmGrainCharacteristicsCancelFlag = m_fg.m_filmGrainCharacteristicsCancelFlag;
2285
0
    m_filmGrain->m_filmGrainCharacteristicsPersistenceFlag = m_fg.m_filmGrainCharacteristicsPersistenceFlag;
2286
0
    m_filmGrain->m_filmGrainModelId = m_fg.m_filmGrainModelId;
2287
0
    m_filmGrain->m_separateColourDescriptionPresentFlag = m_fg.m_separateColourDescriptionPresentFlag;
2288
0
    if (m_filmGrain->m_separateColourDescriptionPresentFlag)
2289
0
    {
2290
0
        ColourDescription m_clr;
2291
0
        x265_fread((char* )&m_clr, sizeof(bool) + sizeof(uint8_t) * 5, 1, filmgrain, errorMessage);
2292
0
        m_filmGrain->m_filmGrainBitDepthLumaMinus8 = m_clr.m_filmGrainBitDepthLumaMinus8;
2293
0
        m_filmGrain->m_filmGrainBitDepthChromaMinus8 = m_clr.m_filmGrainBitDepthChromaMinus8;
2294
0
        m_filmGrain->m_filmGrainFullRangeFlag = m_clr.m_filmGrainFullRangeFlag;
2295
0
        m_filmGrain->m_filmGrainColourPrimaries = m_clr.m_filmGrainColourPrimaries;
2296
0
        m_filmGrain->m_filmGrainTransferCharacteristics = m_clr.m_filmGrainTransferCharacteristics;
2297
0
        m_filmGrain->m_filmGrainMatrixCoeffs = m_clr.m_filmGrainMatrixCoeffs;
2298
0
    }
2299
0
    FGPresent m_present;
2300
0
    x265_fread((char* )&m_present, sizeof(bool) * 3 + sizeof(uint8_t) * 2, 1, filmgrain, errorMessage);
2301
0
    m_filmGrain->m_blendingModeId = m_present.m_blendingModeId;
2302
0
    m_filmGrain->m_log2ScaleFactor = m_present.m_log2ScaleFactor;
2303
0
    m_filmGrain->m_compModel[0].bPresentFlag = m_present.m_presentFlag[0];
2304
0
    m_filmGrain->m_compModel[1].bPresentFlag = m_present.m_presentFlag[1];
2305
0
    m_filmGrain->m_compModel[2].bPresentFlag = m_present.m_presentFlag[2];
2306
0
    for (int i = 0; i < MAX_NUM_COMPONENT; i++)
2307
0
    {
2308
0
        if (m_filmGrain->m_compModel[i].bPresentFlag)
2309
0
        {
2310
0
            x265_fread((char* )(&m_filmGrain->m_compModel[i].m_filmGrainNumIntensityIntervalMinus1), sizeof(uint8_t), 1, filmgrain, errorMessage);
2311
0
            x265_fread((char* )(&m_filmGrain->m_compModel[i].numModelValues), sizeof(uint8_t), 1, filmgrain, errorMessage);
2312
0
            m_filmGrain->m_compModel[i].intensityValues = (FilmGrainCharacteristics::CompModelIntensityValues* ) malloc(sizeof(FilmGrainCharacteristics::CompModelIntensityValues) * (m_filmGrain->m_compModel[i].m_filmGrainNumIntensityIntervalMinus1+1)) ;
2313
0
            for (int j = 0; j <= m_filmGrain->m_compModel[i].m_filmGrainNumIntensityIntervalMinus1; j++)
2314
0
            {
2315
0
                x265_fread((char* )(&m_filmGrain->m_compModel[i].intensityValues[j].intensityIntervalLowerBound), sizeof(uint8_t), 1, filmgrain, errorMessage);
2316
0
                x265_fread((char* )(&m_filmGrain->m_compModel[i].intensityValues[j].intensityIntervalUpperBound), sizeof(uint8_t), 1, filmgrain, errorMessage);
2317
0
                m_filmGrain->m_compModel[i].intensityValues[j].compModelValue = (int* ) malloc(sizeof(int) * (m_filmGrain->m_compModel[i].numModelValues));
2318
0
                for (int k = 0; k < m_filmGrain->m_compModel[i].numModelValues; k++)
2319
0
                {
2320
0
                    x265_fread((char* )(&m_filmGrain->m_compModel[i].intensityValues[j].compModelValue[k]), sizeof(int), 1, filmgrain, errorMessage);
2321
0
                }
2322
0
            }
2323
0
        }
2324
0
    }
2325
0
}
2326
2327
void FrameEncoder::readAomModel(AomFilmGrainCharacteristics* m_aomFilmGrain, FILE* Aomfilmgrain)
2328
0
{
2329
0
    char const* errorMessage = "Error reading Aom FilmGrain characteristics\n";
2330
0
    AomFilmGrain m_afg;
2331
0
    m_afg.m_chroma_scaling_from_luma = 0;
2332
0
    x265_fread((char*)&m_aomFilmGrain->m_apply_grain, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2333
0
    x265_fread((char*)&m_aomFilmGrain->m_grain_seed, sizeof(uint16_t), 1, Aomfilmgrain, errorMessage);
2334
0
    x265_fread((char*)&m_aomFilmGrain->m_update_grain, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2335
0
    x265_fread((char*)&m_aomFilmGrain->m_num_y_points, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2336
0
    if (m_aomFilmGrain->m_num_y_points)
2337
0
    {
2338
0
        for (int i = 0; i < m_aomFilmGrain->m_num_y_points; i++)
2339
0
        {
2340
0
            for (int j = 0; j < 2; j++)
2341
0
            {
2342
0
                x265_fread((char*)&m_aomFilmGrain->m_scaling_points_y[i][j], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2343
0
            }
2344
0
        }
2345
0
    }
2346
0
    x265_fread((char*)&m_aomFilmGrain->m_num_cb_points, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2347
0
    if (m_aomFilmGrain->m_num_cb_points)
2348
0
    {
2349
0
        for (int i = 0; i < m_aomFilmGrain->m_num_cb_points; i++)
2350
0
        {
2351
0
            for (int j = 0; j < 2; j++)
2352
0
            {
2353
0
                x265_fread((char*)&m_aomFilmGrain->m_scaling_points_cb[i][j], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2354
0
            }
2355
0
        }
2356
0
    }
2357
0
    x265_fread((char*)&m_aomFilmGrain->m_num_cr_points, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2358
0
    if (m_aomFilmGrain->m_num_cr_points)
2359
0
    {
2360
0
        for (int i = 0; i < m_aomFilmGrain->m_num_cr_points; i++)
2361
0
        {
2362
0
            for (int j = 0; j < 2; j++)
2363
0
            {
2364
0
                x265_fread((char*)&m_aomFilmGrain->m_scaling_points_cr[i][j], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2365
0
            }
2366
0
        }
2367
0
    }
2368
0
    x265_fread((char*)&m_aomFilmGrain->m_scaling_shift, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2369
0
    x265_fread((char*)&m_aomFilmGrain->m_ar_coeff_lag, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2370
0
    if (m_aomFilmGrain->m_num_y_points)
2371
0
    {
2372
2373
0
        for (int i = 0; i < 24; i++)
2374
0
        {
2375
0
            x265_fread((char*)&m_aomFilmGrain->m_ar_coeffs_y[i], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2376
0
        }
2377
0
    }
2378
0
    if (m_aomFilmGrain->m_num_cb_points || m_afg.m_chroma_scaling_from_luma)
2379
0
    {
2380
0
        for (int i = 0; i < 25; i++)
2381
0
        {
2382
0
            x265_fread((char*)&m_aomFilmGrain->m_ar_coeffs_cb[i], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2383
0
        }
2384
0
    }
2385
0
    if (m_aomFilmGrain->m_num_cr_points || m_afg.m_chroma_scaling_from_luma)
2386
0
    {
2387
2388
0
        for (int i = 0; i < 25; i++)
2389
0
        {
2390
0
            x265_fread((char*)&m_aomFilmGrain->m_ar_coeffs_cr[i], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2391
0
        }
2392
0
    }
2393
0
    x265_fread((char*)&m_aomFilmGrain->m_ar_coeff_shift, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2394
0
    x265_fread((char*)&m_aomFilmGrain->m_grain_scale_shift, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2395
0
    if (m_aomFilmGrain->m_num_cb_points)
2396
0
    {
2397
0
        x265_fread((char*)&m_aomFilmGrain->m_cb_mult, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2398
0
        x265_fread((char*)&m_aomFilmGrain->m_cb_luma_mult, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2399
0
        x265_fread((char*)&m_aomFilmGrain->m_cb_offset, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2400
0
    }
2401
0
    if (m_aomFilmGrain->m_num_cr_points)
2402
0
    {
2403
0
        x265_fread((char*)&m_aomFilmGrain->m_cr_mult, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2404
0
        x265_fread((char*)&m_aomFilmGrain->m_cr_luma_mult, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2405
0
        x265_fread((char*)&m_aomFilmGrain->m_cr_offset, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2406
0
    }
2407
0
    x265_fread((char*)&m_aomFilmGrain->m_overlap_flag, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2408
0
    x265_fread((char*)&m_aomFilmGrain->m_clip_to_restricted_range, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2409
0
}
2410
2411
#if ENABLE_LIBVMAF
2412
void FrameEncoder::vmafFrameLevelScore()
2413
{
2414
    PicYuv *fenc = m_frame[0]->m_fencPic;
2415
    PicYuv *recon = m_frame[0]->m_reconPic[0];
2416
2417
    x265_vmaf_framedata *vmafframedata = (x265_vmaf_framedata*)x265_malloc(sizeof(x265_vmaf_framedata));
2418
    if (!vmafframedata)
2419
    {
2420
        x265_log(NULL, X265_LOG_ERROR, "vmaf frame data alloc failed\n");
2421
    }
2422
2423
    vmafframedata->height = fenc->m_picHeight;
2424
    vmafframedata->width = fenc->m_picWidth;
2425
    vmafframedata->frame_set = 0;
2426
    vmafframedata->internalBitDepth = m_param->internalBitDepth;
2427
    vmafframedata->reference_frame = fenc;
2428
    vmafframedata->distorted_frame = recon;
2429
    fenc->m_vmafScore = x265_calculate_vmaf_framelevelscore(m_param,vmafframedata);
2430
2431
    if (vmafframedata)
2432
    x265_free(vmafframedata);
2433
}
2434
#endif
2435
2436
Frame** FrameEncoder::getEncodedPicture(NALList& output)
2437
0
{
2438
0
    if (m_frame[0] && (m_param->numLayers <= 1 || (MAX_LAYERS > 1 && m_frame[1])))
2439
0
    {
2440
        /* block here until worker thread completes */
2441
0
        m_done.wait();
2442
2443
0
        for (int i = 0; i < m_param->numLayers; i++)
2444
0
        {
2445
0
            m_retFrameBuffer[i] = m_frame[i];
2446
0
            m_frame[i] = NULL;
2447
0
            m_prevOutputTime[i] = x265_mdate();
2448
0
        }
2449
0
        output.takeContents(m_nalList);
2450
0
        return m_retFrameBuffer;
2451
0
    }
2452
2453
0
    return NULL;
2454
0
}
2455
}