Coverage Report

Created: 2026-03-08 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/encoder/frameencoder.cpp
Line
Count
Source
1
/*****************************************************************************
2
 * Copyright (C) 2013-2020 MulticoreWare, Inc
3
 *
4
 * Authors: Chung Shin Yee <shinyee@multicorewareinc.com>
5
 *          Min Chen <chenm003@163.com>
6
 *          Steve Borho <steve@borho.org>
7
 *
8
 * This program is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with this program; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
21
 *
22
 * This program is also available under a commercial proprietary license.
23
 * For more information, contact us at license @ x265.com.
24
 *****************************************************************************/
25
26
#include "common.h"
27
#include "frame.h"
28
#include "framedata.h"
29
#include "wavefront.h"
30
#include "param.h"
31
32
#include "encoder.h"
33
#include "frameencoder.h"
34
#include "common.h"
35
#include "slicetype.h"
36
#include "nal.h"
37
#include "temporalfilter.h"
38
39
#include <iostream>
40
41
namespace X265_NS {
42
void weightAnalyse(Slice& slice, Frame& frame, x265_param& param);
43
44
FrameEncoder::FrameEncoder()
45
2.89k
{
46
2.89k
    m_reconfigure = false;
47
2.89k
    m_isFrameEncoder = true;
48
2.89k
    m_threadActive = true;
49
2.89k
    m_activeWorkerCount = 0;
50
2.89k
    m_completionCount = 0;
51
2.89k
    m_outStreams = NULL;
52
2.89k
    m_backupStreams = NULL;
53
2.89k
    m_substreamSizes = NULL;
54
2.89k
    m_nr = NULL;
55
2.89k
    m_tld = NULL;
56
2.89k
    m_rows = NULL;
57
2.89k
    m_top = NULL;
58
2.89k
    m_param = NULL;
59
2.89k
    m_cuGeoms = NULL;
60
2.89k
    m_ctuGeomMap = NULL;
61
2.89k
    m_localTldIdx = 0;
62
2.89k
    memset(&m_rce, 0, sizeof(RateControlEntry));
63
5.79k
    for (int layer = 0; layer < MAX_LAYERS; layer++)
64
2.89k
    {
65
2.89k
        m_prevOutputTime[layer] = x265_mdate();
66
2.89k
        m_slicetypeWaitTime[layer] = 0;
67
2.89k
        m_frame[layer] = NULL;
68
2.89k
    }
69
2.89k
}
70
71
void FrameEncoder::destroy()
72
2.89k
{
73
2.89k
    if (m_pool)
74
2.89k
    {
75
2.89k
        if (!m_jpId)
76
654
        {
77
654
            int numTLD = m_pool->m_numWorkers;
78
654
            if (!m_param->bEnableWavefront)
79
123
                numTLD += m_pool->m_numProviders;
80
21.9k
            for (int i = 0; i < numTLD; i++)
81
21.2k
                m_tld[i].destroy();
82
654
            delete [] m_tld;
83
654
        }
84
2.89k
    }
85
0
    else
86
0
    {
87
0
        m_tld->destroy();
88
0
        delete m_tld;
89
0
    }
90
91
2.89k
    delete[] m_rows;
92
2.89k
    delete[] m_outStreams;
93
2.89k
    delete[] m_backupStreams;
94
2.89k
    X265_FREE(m_sliceBaseRow);
95
2.89k
    X265_FREE((void*)m_bAllRowsStop);
96
2.89k
    X265_FREE((void*)m_vbvResetTriggerRow);
97
2.89k
    X265_FREE(m_sliceMaxBlockRow);
98
2.89k
    X265_FREE(m_cuGeoms);
99
2.89k
    X265_FREE(m_ctuGeomMap);
100
2.89k
    X265_FREE(m_substreamSizes);
101
2.89k
    X265_FREE(m_nr);
102
2.89k
    X265_FREE(m_retFrameBuffer);
103
104
2.89k
    m_frameFilter.destroy();
105
106
2.89k
    if (m_param->bEmitHRDSEI || !!m_param->interlaceMode)
107
0
    {
108
0
        delete m_rce.picTimingSEI;
109
0
        delete m_rce.hrdTiming;
110
0
    }
111
2.89k
}
112
113
bool FrameEncoder::init(Encoder *top, int numRows, int numCols)
114
2.89k
{
115
2.89k
    m_top = top;
116
2.89k
    m_param = top->m_param;
117
2.89k
    m_numRows = numRows;
118
2.89k
    m_numCols = numCols;
119
2.89k
    m_reconfigure = false;
120
2.89k
    m_filterRowDelay = ((m_param->bEnableSAO && m_param->bSaoNonDeblocked)
121
2.89k
                        || (!m_param->bEnableLoopFilter && m_param->bEnableSAO)) ?
122
2.89k
                        2 : (m_param->bEnableSAO || m_param->bEnableLoopFilter ? 1 : 0);
123
2.89k
    m_filterRowDelayCus = m_filterRowDelay * numCols;
124
2.89k
    m_rows = new CTURow[m_numRows];
125
2.89k
    bool ok = !!m_numRows;
126
127
2.89k
    m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
128
2.89k
    m_bAllRowsStop = X265_MALLOC(bool, m_param->maxSlices);
129
2.89k
    m_vbvResetTriggerRow = X265_MALLOC(int, m_param->maxSlices);
130
2.89k
    ok &= !!m_sliceBaseRow;
131
2.89k
    m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) / m_param->maxSlices;
132
2.89k
    uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;    
133
2.89k
    uint32_t rowSum = sliceGroupSizeAccu;
134
2.89k
    uint32_t sidx = 0;
135
18.2k
    for (uint32_t i = 0; i < m_numRows; i++)
136
15.3k
    {
137
15.3k
        const uint32_t rowRange = (rowSum >> 8);
138
15.3k
        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
139
0
        {
140
0
            rowSum += sliceGroupSizeAccu;
141
0
            m_sliceBaseRow[++sidx] = i;
142
0
        }
143
15.3k
    }
144
2.89k
    X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
145
2.89k
    m_sliceBaseRow[0] = 0;
146
2.89k
    m_sliceBaseRow[m_param->maxSlices] = m_numRows;
147
148
2.89k
    m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
149
2.89k
    ok &= !!m_sliceMaxBlockRow;
150
2.89k
    uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16;
151
2.89k
    sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices;
152
2.89k
    rowSum = sliceGroupSizeAccu;
153
2.89k
    sidx = 0;
154
35.6k
    for (uint32_t i = 0; i < maxBlockRows; i++)
155
32.7k
    {
156
32.7k
        const uint32_t rowRange = (rowSum >> 8);
157
32.7k
        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
158
0
        {
159
0
            rowSum += sliceGroupSizeAccu;
160
0
            m_sliceMaxBlockRow[++sidx] = i;
161
0
        }
162
32.7k
    }
163
2.89k
    m_sliceMaxBlockRow[0] = 0;
164
2.89k
    m_sliceMaxBlockRow[m_param->maxSlices] = maxBlockRows;
165
166
    /* determine full motion search range */
167
2.89k
    int range  = m_param->searchRange;       /* fpel search */
168
2.89k
    range += !!(m_param->searchMethod < 2);  /* diamond/hex range check lag */
169
2.89k
    range += NTAPS_LUMA / 2;                 /* subpel filter half-length */
170
2.89k
    range += 2 + (MotionEstimate::hpelIterationCount(m_param->subpelRefine) + 1) / 2; /* subpel refine steps */
171
2.89k
    m_refLagRows = /*(m_param->maxSlices > 1 ? 1 : 0) +*/ 1 + ((range + m_param->maxCUSize - 1) / m_param->maxCUSize);
172
173
    // NOTE: 2 times of numRows because both Encoder and Filter in same queue
174
2.89k
    if (!WaveFront::init(m_numRows * 2))
175
0
    {
176
0
        x265_log(m_param, X265_LOG_ERROR, "unable to initialize wavefront queue\n");
177
0
        m_pool = NULL;
178
0
    }
179
180
2.89k
    m_frameFilter.init(top, this, numRows, numCols);
181
182
    // initialize HRD parameters of SPS
183
2.89k
    if (m_param->bEmitHRDSEI || !!m_param->interlaceMode)
184
0
    {
185
0
        m_rce.picTimingSEI = new SEIPictureTiming;
186
0
        m_rce.hrdTiming = new HRDTiming;
187
188
0
        ok &= m_rce.picTimingSEI && m_rce.hrdTiming;
189
0
    }
190
191
2.89k
    if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
192
0
        m_nr = X265_MALLOC(NoiseReduction, 1);
193
2.89k
    if (m_nr)
194
0
        memset(m_nr, 0, sizeof(NoiseReduction));
195
2.89k
    else
196
2.89k
        m_param->noiseReductionIntra = m_param->noiseReductionInter = 0;
197
198
    // 7.4.7.1 - Ceil( Log2( PicSizeInCtbsY ) ) bits
199
2.89k
    {
200
2.89k
        unsigned long tmp;
201
2.89k
        BSR(tmp, (numRows * numCols - 1));
202
2.89k
        m_sliceAddrBits = (uint16_t)(tmp + 1);
203
2.89k
    }
204
205
2.89k
    m_tmeDeps.resize(m_numRows);
206
207
2.89k
    m_retFrameBuffer = X265_MALLOC(Frame*, m_param->numLayers);
208
5.79k
    for (int layer = 0; layer < m_param->numLayers; layer++)
209
2.89k
        m_retFrameBuffer[layer] = NULL;
210
2.89k
    return ok;
211
2.89k
}
212
213
/* Generate a complete list of unique geom sets for the current picture dimensions */
214
bool FrameEncoder::initializeGeoms()
215
654
{
216
    /* Geoms only vary between CTUs in the presence of picture edges */
217
654
    int maxCUSize = m_param->maxCUSize;
218
654
    int minCUSize = m_param->minCUSize;
219
654
    int heightRem = m_param->sourceHeight & (maxCUSize - 1);
220
654
    int widthRem = m_param->sourceWidth & (maxCUSize - 1);
221
654
    int allocGeoms = 1; // body
222
654
    if (heightRem && widthRem)
223
353
        allocGeoms = 4; // body, right, bottom, corner
224
301
    else if (heightRem || widthRem)
225
185
        allocGeoms = 2; // body, right or bottom
226
227
654
    m_ctuGeomMap = X265_MALLOC(uint32_t, m_numRows * m_numCols);
228
654
    m_cuGeoms = X265_MALLOC(CUGeom, allocGeoms * CUGeom::MAX_GEOMS);
229
654
    if (!m_cuGeoms || !m_ctuGeomMap)
230
0
        return false;
231
232
    // body
233
654
    CUData::calcCTUGeoms(maxCUSize, maxCUSize, maxCUSize, minCUSize, m_cuGeoms);
234
654
    memset(m_ctuGeomMap, 0, sizeof(uint32_t) * m_numRows * m_numCols);
235
654
    if (allocGeoms == 1)
236
116
        return true;
237
238
538
    int countGeoms = 1;
239
538
    if (widthRem)
240
440
    {
241
        // right
242
440
        CUData::calcCTUGeoms(widthRem, maxCUSize, maxCUSize, minCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS);
243
1.95k
        for (uint32_t i = 0; i < m_numRows; i++)
244
1.51k
        {
245
1.51k
            uint32_t ctuAddr = m_numCols * (i + 1) - 1;
246
1.51k
            m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS;
247
1.51k
        }
248
440
        countGeoms++;
249
440
    }
250
538
    if (heightRem)
251
451
    {
252
        // bottom
253
451
        CUData::calcCTUGeoms(maxCUSize, heightRem, maxCUSize, minCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS);
254
2.07k
        for (uint32_t i = 0; i < m_numCols; i++)
255
1.62k
        {
256
1.62k
            uint32_t ctuAddr = m_numCols * (m_numRows - 1) + i;
257
1.62k
            m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS;
258
1.62k
        }
259
451
        countGeoms++;
260
261
451
        if (widthRem)
262
353
        {
263
            // corner
264
353
            CUData::calcCTUGeoms(widthRem, heightRem, maxCUSize, minCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS);
265
266
353
            uint32_t ctuAddr = m_numCols * m_numRows - 1;
267
353
            m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS;
268
353
            countGeoms++;
269
353
        }
270
451
        X265_CHECK(countGeoms == allocGeoms, "geometry match check failure\n");
271
451
    }
272
273
538
    return true;
274
654
}
275
276
bool FrameEncoder::startCompressFrame(Frame* curFrame[MAX_LAYERS])
277
654
{
278
1.30k
    for (int layer = 0; layer < m_param->numLayers; layer++)
279
654
    {
280
654
        m_slicetypeWaitTime[layer] = x265_mdate() - m_prevOutputTime[layer];
281
654
        m_frame[layer] = curFrame[layer];
282
654
        curFrame[layer]->m_encData->m_frameEncoderID = m_jpId;
283
654
        curFrame[layer]->m_encData->m_jobProvider = this;
284
654
        curFrame[layer]->m_encData->m_slice->m_mref = m_mref;
285
654
    }
286
654
    m_sliceType = curFrame[0]->m_lowres.sliceType;
287
288
654
    if (!m_cuGeoms)
289
654
    {
290
654
        if (!initializeGeoms())
291
0
            return false;
292
654
    }
293
294
654
    m_enable.trigger();
295
654
    return true;
296
654
}
297
298
void FrameEncoder::threadMain()
299
2.89k
{
300
2.89k
    THREAD_NAME("Frame", m_jpId);
301
302
2.89k
    if (m_pool)
303
2.89k
    {
304
2.89k
        m_pool->setCurrentThreadAffinity();
305
306
        /* the first FE on each NUMA node is responsible for allocating thread
307
         * local data for all worker threads in that pool. If WPP is disabled, then
308
         * each FE also needs a TLD instance */
309
2.89k
        if (!m_jpId)
310
654
        {
311
654
            int numTLD = m_pool->m_numWorkers;
312
654
            if (!m_param->bEnableWavefront)
313
123
                numTLD += m_pool->m_numProviders;
314
315
654
            m_tld = new ThreadLocalData[numTLD];
316
21.9k
            for (int i = 0; i < numTLD; i++)
317
21.2k
            {
318
21.2k
                m_tld[i].analysis.initSearch(*m_param, m_top->m_scalingList);
319
21.2k
                m_tld[i].analysis.create(m_tld);
320
21.2k
            }
321
322
4.20k
            for (int i = 0; i < m_pool->m_numProviders; i++)
323
3.55k
            {
324
3.55k
                if (m_pool->m_jpTable[i]->m_isFrameEncoder) /* ugh; over-allocation and other issues here */
325
2.89k
                {
326
2.89k
                    FrameEncoder *peer = dynamic_cast<FrameEncoder*>(m_pool->m_jpTable[i]);
327
2.89k
                    peer->m_tld = m_tld;
328
2.89k
                }
329
3.55k
            }
330
654
        }
331
332
2.89k
        if (m_param->bEnableWavefront)
333
2.65k
            m_localTldIdx = -1; // cause exception if used
334
244
        else
335
244
            m_localTldIdx = m_pool->m_numWorkers + m_jpId;
336
2.89k
    }
337
0
    else
338
0
    {
339
0
        m_tld = new ThreadLocalData;
340
0
        m_tld->analysis.initSearch(*m_param, m_top->m_scalingList);
341
0
        m_tld->analysis.create(NULL);
342
0
        m_localTldIdx = 0;
343
0
    }
344
345
2.89k
    m_done.trigger();     /* signal that thread is initialized */
346
2.89k
    m_enable.wait();      /* Encoder::encode() triggers this event */
347
348
3.55k
    while (m_threadActive)
349
654
    {
350
654
        if (m_param->bCTUInfo)
351
0
        {
352
0
            while (!m_frame[0]->m_ctuInfo)
353
0
                m_frame[0]->m_copied.wait();
354
0
        }
355
654
        if ((m_param->bAnalysisType == AVC_INFO) && !strlen(m_param->analysisSave) && !strlen(m_param->analysisLoad) && !(IS_X265_TYPE_I(m_frame[0]->m_lowres.sliceType)))
356
0
        {
357
0
            while (((m_frame[0]->m_analysisData.interData == NULL && m_frame[0]->m_analysisData.intraData == NULL) || (uint32_t)m_frame[0]->m_poc != m_frame[0]->m_analysisData.poc))
358
0
                m_frame[0]->m_copyMVType.wait();
359
0
        }
360
361
1.30k
        for (int layer = 0; layer < m_param->numLayers; layer++)
362
654
            compressFrame(layer);
363
654
        m_done.trigger(); /* FrameEncoder::getEncodedPicture() blocks for this event */
364
654
        m_enable.wait();
365
654
    }
366
2.89k
}
367
368
void FrameEncoder::WeightAnalysis::processTasks(int /* workerThreadId */)
369
0
{
370
0
    Frame* frame = master.m_frame[master.m_sLayerId];
371
0
    weightAnalyse(*frame->m_encData->m_slice, *frame, *master.m_param);
372
0
}
373
374
375
uint32_t getBsLength( int32_t code )
376
0
{
377
0
    uint32_t ucode = (code <= 0) ? -code << 1 : (code << 1) - 1;
378
379
0
    ++ucode;
380
0
    unsigned long idx;
381
0
    BSR( idx, ucode );
382
0
    uint32_t length = (uint32_t)idx * 2 + 1;
383
384
0
    return length;
385
0
}
386
387
bool FrameEncoder::writeToneMapInfo(x265_sei_payload *payload)
388
0
{
389
0
    bool payloadChange = false;
390
0
    if (m_top->m_prevTonemapPayload.payload != NULL && payload->payloadSize == m_top->m_prevTonemapPayload.payloadSize)
391
0
    {
392
0
        if (memcmp(m_top->m_prevTonemapPayload.payload, payload->payload, payload->payloadSize) != 0)
393
0
            payloadChange = true;
394
0
    }
395
0
    else
396
0
    {
397
0
        payloadChange = true;
398
0
        if (m_top->m_prevTonemapPayload.payload != NULL)
399
0
            x265_free(m_top->m_prevTonemapPayload.payload);
400
0
        m_top->m_prevTonemapPayload.payload = (uint8_t*)x265_malloc(sizeof(uint8_t)* payload->payloadSize);
401
0
    }
402
403
0
    if (payloadChange)
404
0
    {
405
0
        m_top->m_prevTonemapPayload.payloadType = payload->payloadType;
406
0
        m_top->m_prevTonemapPayload.payloadSize = payload->payloadSize;
407
0
        memcpy(m_top->m_prevTonemapPayload.payload, payload->payload, payload->payloadSize);
408
0
    }
409
410
0
    bool isIDR = m_frame[0]->m_lowres.sliceType == X265_TYPE_IDR;
411
0
    return (payloadChange || isIDR);
412
0
}
413
414
void FrameEncoder::writeTrailingSEIMessages(int layer)
415
0
{
416
0
    Slice* slice = m_frame[layer]->m_encData->m_slice;
417
0
    int planes = (m_param->internalCsp != X265_CSP_I400) ? 3 : 1;
418
0
    int32_t payloadSize = 0;
419
420
0
    if (m_param->decodedPictureHashSEI == 1)
421
0
    {
422
0
        m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::MD5;
423
0
        for (int i = 0; i < planes; i++)
424
0
            MD5Final(&m_seiReconPictureDigest.m_state[i], m_seiReconPictureDigest.m_digest[i]);
425
0
        payloadSize = 1 + 16 * planes;
426
0
    }
427
0
    else if (m_param->decodedPictureHashSEI == 2)
428
0
    {
429
0
        m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::CRC;
430
0
        for (int i = 0; i < planes; i++)
431
0
            crcFinish(m_seiReconPictureDigest.m_crc[i], m_seiReconPictureDigest.m_digest[i]);
432
0
        payloadSize = 1 + 2 * planes;
433
0
    }
434
0
    else if (m_param->decodedPictureHashSEI == 3)
435
0
    {
436
0
        m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::CHECKSUM;
437
0
        for (int i = 0; i < planes; i++)
438
0
            checksumFinish(m_seiReconPictureDigest.m_checksum[i], m_seiReconPictureDigest.m_digest[i]);
439
0
        payloadSize = 1 + 4 * planes;
440
0
    }
441
442
0
    m_seiReconPictureDigest.setSize(payloadSize);
443
0
    m_seiReconPictureDigest.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_SUFFIX_SEI, m_nalList, false, layer);
444
0
}
445
446
void FrameEncoder::compressFrame(int layer)
447
654
{
448
654
    ProfileScopeEvent(frameThread);
449
450
654
    m_startCompressTime[layer] = x265_mdate();
451
654
    m_totalActiveWorkerCount = 0;
452
654
    m_activeWorkerCountSamples = 0;
453
654
    m_totalWorkerElapsedTime[layer] = 0;
454
654
    m_totalThreadedMETime[layer] = 0;
455
654
    m_totalThreadedMEWait[layer] = 0;
456
654
    m_totalNoWorkerTime[layer] = 0;
457
654
    m_countRowBlocks = 0;
458
654
    m_allRowsAvailableTime[layer] = 0;
459
654
    m_stallStartTime[layer] = 0;
460
461
654
    m_completionCount = 0;
462
654
    memset((void*)m_bAllRowsStop, 0, sizeof(bool) * m_param->maxSlices);
463
654
    memset((void*)m_vbvResetTriggerRow, -1, sizeof(int) * m_param->maxSlices);
464
654
    m_rowSliceTotalBits[0] = 0;
465
654
    m_rowSliceTotalBits[1] = 0;
466
467
654
    m_SSDY[layer] = m_SSDU[layer] = m_SSDV[layer] = 0;
468
654
    m_ssim[layer] = 0;
469
654
    m_ssimCnt[layer] = 0;
470
654
    memset(&(m_frame[layer]->m_encData->m_frameStats), 0, sizeof(m_frame[layer]->m_encData->m_frameStats));
471
654
    m_sLayerId = layer;
472
473
654
    if (m_param->rc.aqMode != X265_AQ_EDGE && m_param->recursionSkipMode == EDGE_BASED_RSKIP)
474
0
    {
475
0
        int height = m_frame[layer]->m_fencPic->m_picHeight;
476
0
        int width = m_frame[layer]->m_fencPic->m_picWidth;
477
0
        intptr_t stride = m_frame[layer]->m_fencPic->m_stride;
478
479
0
        if (!computeEdge(m_frame[layer]->m_edgeBitPic, m_frame[layer]->m_fencPic->m_picOrg[0], NULL, stride, height, width, false, 1))
480
0
        {
481
0
            x265_log(m_param, X265_LOG_ERROR, " Failed to compute edge !");
482
0
        }
483
0
    }
484
485
    /* Emit access unit delimiter unless this is the first frame and the user is
486
     * not repeating headers (since AUD is supposed to be the first NAL in the access
487
     * unit) */
488
654
    Slice* slice = m_frame[layer]->m_encData->m_slice;
489
490
654
    if (m_param->bEnableEndOfSequence && m_frame[layer]->m_lowres.sliceType == X265_TYPE_IDR && m_frame[layer]->m_poc)
491
0
    {
492
0
        m_bs.resetBits();
493
0
        m_nalList.serialize(NAL_UNIT_EOS, m_bs);
494
0
    }
495
496
654
    if (m_param->bEnableAccessUnitDelimiters && (m_frame[layer]->m_poc || m_param->bRepeatHeaders))
497
0
    {
498
0
        m_bs.resetBits();
499
0
        m_entropyCoder.setBitstream(&m_bs);
500
0
        m_entropyCoder.codeAUD(*slice);
501
0
        m_bs.writeByteAlignment();
502
0
        m_nalList.serialize(NAL_UNIT_ACCESS_UNIT_DELIMITER, m_bs);
503
0
        if (m_param->bSingleSeiNal)
504
0
            m_bs.resetBits();
505
0
    }
506
654
    if (m_frame[layer]->m_lowres.bKeyframe && m_param->bRepeatHeaders)
507
654
    {
508
654
        if (m_param->bOptRefListLengthPPS)
509
0
        {
510
0
            ScopedLock refIdxLock(m_top->m_sliceRefIdxLock);
511
0
            m_top->updateRefIdx();
512
0
        }
513
654
        if (m_top->m_param->rc.bStatRead  && m_top->m_param->bMultiPassOptRPS)
514
0
        {
515
0
            ScopedLock refIdxLock(m_top->m_rpsInSpsLock);
516
0
            if (!m_top->computeSPSRPSIndex())
517
0
            {
518
0
                x265_log(m_param, X265_LOG_ERROR, "compute commonly RPS failed!\n");
519
0
                m_top->m_aborted = true;
520
0
            }
521
0
            m_top->getStreamHeaders(m_nalList, m_entropyCoder, m_bs);
522
0
        }
523
654
        else
524
654
            m_top->getStreamHeaders(m_nalList, m_entropyCoder, m_bs);
525
654
    }
526
527
654
    if (m_top->m_param->rc.bStatRead && m_top->m_param->bMultiPassOptRPS)
528
0
        m_frame[layer]->m_encData->m_slice->m_rpsIdx = (m_top->m_rateControl->m_rce2Pass + m_frame[layer]->m_encodeOrder)->rpsIdx;
529
530
    // Weighted Prediction parameters estimation.
531
654
    bool bUseWeightP = slice->m_sliceType == P_SLICE && slice->m_pps->bUseWeightPred && !layer;
532
654
    bool bUseWeightB = slice->m_sliceType == B_SLICE && slice->m_pps->bUseWeightedBiPred && !layer;
533
534
654
    WeightParam* reuseWP = NULL;
535
654
    if (m_param->analysisLoad[0] && (bUseWeightP || bUseWeightB))
536
0
        reuseWP = (WeightParam*)m_frame[layer]->m_analysisData.wt;
537
538
654
    if (bUseWeightP || bUseWeightB)
539
0
    {
540
#if DETAILED_CU_STATS
541
        m_cuStats.countWeightAnalyze++;
542
        ScopedElapsedTime time(m_cuStats.weightAnalyzeTime);
543
#endif
544
0
        if (strlen(m_param->analysisLoad))
545
0
        {
546
0
            for (int list = 0; list < slice->isInterB() + 1; list++) 
547
0
            {
548
0
                for (int plane = 0; plane < (m_param->internalCsp != X265_CSP_I400 ? 3 : 1); plane++)
549
0
                {
550
0
                    for (int ref = 1; ref < slice->m_numRefIdx[list]; ref++)
551
0
                        SET_WEIGHT(slice->m_weightPredTable[list][ref][plane], false, 1 << reuseWP->log2WeightDenom, reuseWP->log2WeightDenom, 0);
552
0
                    slice->m_weightPredTable[list][0][plane] = *(reuseWP++);
553
0
                }
554
0
            }
555
0
        }
556
0
        else
557
0
        {
558
0
            WeightAnalysis wa(*this);
559
0
            if (m_pool && wa.tryBondPeers(*this, 1))
560
                /* use an idle worker for weight analysis */
561
0
                wa.waitForExit();
562
0
            else
563
0
                weightAnalyse(*slice, *m_frame[layer], *m_param);
564
0
        }
565
0
    }
566
654
    else
567
654
        slice->disableWeights();
568
569
654
    if (strlen(m_param->analysisSave) && (bUseWeightP || bUseWeightB))
570
0
        reuseWP = (WeightParam*)m_frame[layer]->m_analysisData.wt;
571
    // Generate motion references
572
654
    int numPredDir = slice->isInterP() ? 1 : slice->isInterB() ? 2 : 0;
573
654
    for (int l = 0; l < numPredDir; l++)
574
0
    {
575
0
        for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
576
0
        {
577
0
            WeightParam *w = NULL;
578
0
            if ((bUseWeightP || bUseWeightB) && slice->m_weightPredTable[l][ref][0].wtPresent)
579
0
                w = slice->m_weightPredTable[l][ref];
580
0
            slice->m_refReconPicList[l][ref] = slice->m_refFrameList[l][ref]->m_reconPic[0];
581
0
            m_mref[l][ref].init(slice->m_refReconPicList[l][ref], w, *m_param);
582
0
        }
583
0
        if (strlen(m_param->analysisSave) && (bUseWeightP || bUseWeightB))
584
0
        {
585
0
            for (int i = 0; i < (m_param->internalCsp != X265_CSP_I400 ? 3 : 1); i++)
586
0
                *(reuseWP++) = slice->m_weightPredTable[l][0][i];
587
0
        }
588
589
0
    }
590
591
654
    int numTLD;
592
654
    if (m_pool)
593
654
        numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers : m_pool->m_numWorkers + m_pool->m_numProviders;
594
0
    else
595
0
        numTLD = 1;
596
597
    /* Get the QP for this frame from rate control. This call may block until
598
     * frames ahead of it in encode order have called rateControlEnd() */
599
654
    int qp = (layer == 0) ? m_top->m_rateControl->rateControlStart(m_frame[layer], &m_rce, m_top) : (int)m_rce.newQp;
600
601
654
    m_rce.newQp = qp;
602
603
654
    if (!!layer && m_top->m_lookahead->m_bAdaptiveQuant)
604
0
    {
605
0
        int ncu;
606
0
        if (m_param->rc.qgSize == 8)
607
0
            ncu = m_top->m_rateControl->m_ncu * 4;
608
0
        else
609
0
            ncu = m_top->m_rateControl->m_ncu;
610
0
        if (m_param->numViews > 1)
611
0
        {
612
0
            for (int i = 0; i < ncu; i++)
613
0
            {
614
0
                m_frame[layer]->m_lowres.qpCuTreeOffset[i] = m_frame[0]->m_lowres.qpCuTreeOffset[i];
615
0
                m_frame[layer]->m_lowres.qpAqOffset[i] = m_frame[0]->m_lowres.qpAqOffset[i];
616
0
            }
617
0
        }
618
0
        else if (m_param->numScalableLayers > 1)
619
0
        {
620
0
            memset(m_frame[layer]->m_lowres.qpCuTreeOffset, 0, sizeof(double)*ncu);
621
0
            memset(m_frame[layer]->m_lowres.qpAqOffset, 0, sizeof(double)* ncu);
622
0
        }
623
624
0
        m_frame[layer]->m_encData->m_avgQpAq = m_frame[0]->m_encData->m_avgQpAq;
625
0
        m_frame[layer]->m_encData->m_avgQpRc = m_frame[0]->m_encData->m_avgQpRc;
626
0
        if (!!m_param->rc.hevcAq)
627
0
        {
628
0
            for (uint32_t d = 0; d < 4; d++)
629
0
            {
630
0
                int ctuSizeIdx = 6 - g_log2Size[m_param->maxCUSize];
631
0
                int aqDepth = g_log2Size[m_param->maxCUSize] - g_log2Size[m_param->rc.qgSize];
632
0
                if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
633
0
                    continue;
634
0
                PicQPAdaptationLayer* pcAQLayer0 = &m_frame[0]->m_lowres.pAQLayer[d];
635
0
                PicQPAdaptationLayer* pcAQLayer1 = &m_frame[layer]->m_lowres.pAQLayer[d];
636
0
                const uint32_t aqPartWidth = m_frame[0]->m_lowres.pAQLayer[d].aqPartWidth;
637
0
                const uint32_t aqPartHeight = m_frame[0]->m_lowres.pAQLayer[d].aqPartHeight;
638
0
                double* pcQP0 = pcAQLayer0->dQpOffset;
639
0
                double* pcCuTree0 = pcAQLayer0->dCuTreeOffset;
640
0
                double* pcQP1 = pcAQLayer1->dQpOffset;
641
0
                double* pcCuTree1 = pcAQLayer1->dCuTreeOffset;
642
0
                if (m_param->numViews > 1)
643
0
                {
644
0
                    for (uint32_t y = 0; y < m_frame[0]->m_fencPic->m_picHeight; y += aqPartHeight)
645
0
                    {
646
0
                        for (uint32_t x = 0; x < m_frame[0]->m_fencPic->m_picWidth; x += aqPartWidth, pcQP0++, pcCuTree0++, pcQP1++, pcCuTree1++)
647
0
                        {
648
0
                            *pcQP1 = *pcQP0;
649
0
                            *pcCuTree1 = *pcCuTree0;
650
0
                        }
651
0
                    }
652
0
                }
653
0
                else if (m_param->numScalableLayers > 1)
654
0
                {
655
0
                    int numAQPartInWidth = (m_frame[0]->m_fencPic->m_picWidth + aqPartWidth - 1) / aqPartWidth;
656
0
                    int numAQPartInHeight = (m_frame[0]->m_fencPic->m_picHeight + aqPartHeight - 1) / aqPartHeight;
657
0
                    memset(m_frame[layer]->m_lowres.pAQLayer[d].dQpOffset, 0, sizeof(double)*numAQPartInWidth* numAQPartInHeight);
658
0
                    memset(m_frame[layer]->m_lowres.pAQLayer[d].dCuTreeOffset, 0, sizeof(double)* numAQPartInWidth* numAQPartInHeight);
659
0
                }
660
0
            }
661
0
        }
662
0
    }
663
654
    if (m_param->bEnableTemporalFilter)
664
0
    {
665
0
        m_frame[layer]->m_mcstf->m_QP = qp;
666
0
        m_frame[layer]->m_mcstf->bilateralFilter(m_frame[layer], m_frame[layer]->m_mcstfRefList, m_param->temporalFilterStrength);
667
0
    }
668
669
654
    if (m_nr)
670
0
    {
671
0
        if (qp > QP_MAX_SPEC && m_frame[layer]->m_param->rc.vbvBufferSize)
672
0
        {
673
0
            for (int i = 0; i < numTLD; i++)
674
0
            {
675
0
                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = m_top->m_offsetEmergency[qp - QP_MAX_SPEC - 1];
676
0
                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum = m_top->m_residualSumEmergency;
677
0
                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count = m_top->m_countEmergency;
678
0
            }
679
0
        }
680
0
        else
681
0
        {
682
0
            if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
683
0
            {
684
0
                for (int i = 0; i < numTLD; i++)
685
0
                {
686
0
                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrOffsetDenoise;
687
0
                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrResidualSum;
688
0
                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrCount;
689
0
                }
690
0
            }
691
0
            else
692
0
            {
693
0
                for (int i = 0; i < numTLD; i++)
694
0
                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = NULL;
695
0
            }
696
0
        }
697
0
    }
698
699
    /* Clip slice QP to 0-51 spec range before encoding */
700
654
    slice->m_sliceQp = x265_clip3(-QP_BD_OFFSET, QP_MAX_SPEC, qp);
701
654
    if (m_param->bHDR10Opt)
702
0
    {
703
0
        int qpCb = x265_clip3(-12, 0, (int)floor((m_top->m_cB * ((-.46) * qp + 9.26)) + 0.5 ));
704
0
        int qpCr = x265_clip3(-12, 0, (int)floor((m_top->m_cR * ((-.46) * qp + 9.26)) + 0.5 ));
705
0
        slice->m_chromaQpOffset[0] = slice->m_pps->chromaQpOffset[0] + qpCb < -12 ? (qpCb + (-12 - (slice->m_pps->chromaQpOffset[0] + qpCb))) : qpCb;
706
0
        slice->m_chromaQpOffset[1] = slice->m_pps->chromaQpOffset[1] + qpCr < -12 ? (qpCr + (-12 - (slice->m_pps->chromaQpOffset[1] + qpCr))) : qpCr;
707
0
    }
708
709
654
    if (m_param->bOptQpPPS && m_param->bRepeatHeaders)
710
0
    {
711
0
        ScopedLock qpLock(m_top->m_sliceQpLock);
712
0
        for (int i = 0; i < (QP_MAX_MAX + 1); i++)
713
0
        {
714
0
            int delta = slice->m_sliceQp - (i + 1);
715
0
            int codeLength = getBsLength( delta );
716
0
            m_top->m_iBitsCostSum[i] += codeLength;
717
0
        }
718
0
        m_top->m_iFrameNum++;
719
0
    }
720
654
    m_initSliceContext.resetEntropy(*slice);
721
722
654
    m_frameFilter.start(m_frame[layer], m_initSliceContext);
723
724
    /* ensure all rows are blocked prior to initializing row CTU counters */
725
654
    WaveFront::clearEnabledRowMask();
726
727
654
    WaveFront::setLayerId(layer);
728
    /* reset entropy coders and compute slice id */
729
654
    m_entropyCoder.load(m_initSliceContext);
730
1.30k
    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)   
731
3.93k
        for (uint32_t row = m_sliceBaseRow[sliceId]; row < m_sliceBaseRow[sliceId + 1]; row++)
732
3.27k
            m_rows[row].init(m_initSliceContext, sliceId);   
733
734
    // reset slice counter for rate control update
735
654
    m_sliceCnt = 0;
736
737
654
    uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : m_param->maxSlices;
738
654
    X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices == 1), "Multiple slices without WPP unsupport now!");
739
654
    if (!m_outStreams)
740
654
    {
741
654
        m_outStreams = new Bitstream[numSubstreams];
742
654
        if (!m_param->bEnableWavefront)
743
123
            m_backupStreams = new Bitstream[numSubstreams];
744
654
        m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);
745
654
        if (!slice->m_bUseSao)
746
0
        {
747
0
            for (uint32_t i = 0; i < numSubstreams; i++)
748
0
                m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);
749
0
        }
750
654
    }
751
0
    else
752
0
    {
753
0
        for (uint32_t i = 0; i < numSubstreams; i++)
754
0
        {
755
0
            m_outStreams[i].resetBits();
756
0
            if (!slice->m_bUseSao)
757
0
                m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);
758
0
            else
759
0
                m_rows[i].rowGoOnCoder.setBitstream(NULL);
760
0
        }
761
0
    }
762
763
654
    m_rce.encodeOrder = m_frame[layer]->m_encodeOrder;
764
654
    int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0;
765
766
654
    if (m_frame[layer]->m_lowres.bKeyframe)
767
654
    {
768
654
        if (m_param->bEmitHRDSEI)
769
0
        {
770
0
            SEIBufferingPeriod* bpSei = &m_top->m_rateControl->m_bufPeriodSEI;
771
772
            // since the temporal layer HRD is not ready, we assumed it is fixed
773
0
            bpSei->m_auCpbRemovalDelayDelta = 1;
774
0
            bpSei->m_cpbDelayOffset = 0;
775
0
            bpSei->m_dpbDelayOffset = 0;
776
0
            bpSei->m_concatenationFlag = (m_param->bEnableHRDConcatFlag && !m_frame[layer]->m_poc) ? true : false;
777
778
            // hrdFullness() calculates the initial CPB removal delay and offset
779
0
            m_top->m_rateControl->hrdFullness(bpSei);
780
0
            bpSei->writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
781
782
0
            m_top->m_lastBPSEI = m_rce.encodeOrder;
783
0
        }
784
785
654
        if (m_frame[layer]->m_lowres.sliceType == X265_TYPE_IDR && m_param->bEmitIDRRecoverySEI)
786
0
        {
787
            /* Recovery Point SEI require the SPS to be "activated" */
788
0
            SEIRecoveryPoint sei;
789
0
            sei.m_recoveryPocCnt = 0;
790
0
            sei.m_exactMatchingFlag = true;
791
0
            sei.m_brokenLinkFlag = false;
792
0
            sei.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
793
0
        }
794
654
    }
795
796
654
    if ((m_param->bEmitHRDSEI || !!m_param->interlaceMode))
797
0
    {
798
0
        SEIPictureTiming *sei = m_rce.picTimingSEI;
799
0
        const VUI *vui = &slice->m_sps->vuiParameters;
800
0
        const HRDInfo *hrd = &vui->hrdParameters;
801
0
        int poc = slice->m_poc;
802
803
0
        if (vui->frameFieldInfoPresentFlag)
804
0
        {
805
0
            if (m_param->interlaceMode > 0)
806
0
            {
807
0
                if( m_param->interlaceMode == 2 )
808
0
                {   
809
                    // m_picStruct should be set to 3 or 4 when field feature is enabled
810
0
                    if (m_param->bField)
811
                        // 3: Top field, bottom field, in that order; 4: Bottom field, top field, in that order
812
0
                        sei->m_picStruct = (slice->m_fieldNum == 1) ? 4 : 3;
813
0
                    else
814
0
                        sei->m_picStruct = (poc & 1) ? 1 /* top */ : 2 /* bottom */;
815
0
                }     
816
0
                else if (m_param->interlaceMode == 1)
817
0
                {
818
0
                    if (m_param->bField)
819
0
                        sei->m_picStruct = (slice->m_fieldNum == 1) ? 3: 4;
820
0
                    else
821
0
                        sei->m_picStruct = (poc & 1) ? 2 /* bottom */ : 1 /* top */;
822
0
                }
823
0
            }
824
0
            else if (m_param->bEnableFrameDuplication)
825
0
                sei->m_picStruct = m_frame[layer]->m_picStruct;
826
0
            else
827
0
                sei->m_picStruct = m_param->pictureStructure;
828
829
0
            sei->m_sourceScanType = m_param->interlaceMode ? 0 : 1;
830
831
0
            sei->m_duplicateFlag = false;
832
0
        }
833
834
0
        if (vui->hrdParametersPresentFlag)
835
0
        {
836
            // The m_aucpbremoval delay specifies how many clock ticks the
837
            // access unit associated with the picture timing SEI message has to
838
            // wait after removal of the access unit with the most recent
839
            // buffering period SEI message
840
0
            sei->m_auCpbRemovalDelay = X265_MIN(X265_MAX(1, m_rce.encodeOrder - prevBPSEI), (1 << hrd->cpbRemovalDelayLength));
841
0
            sei->m_picDpbOutputDelay = slice->m_sps->numReorderPics[m_frame[layer]->m_tempLayer] + poc - m_rce.encodeOrder;
842
0
        }
843
844
0
        sei->writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
845
0
    }
846
847
654
    if (m_param->preferredTransferCharacteristics > -1 && slice->isIRAP())
848
0
    {
849
0
        SEIAlternativeTC m_seiAlternativeTC;
850
0
        m_seiAlternativeTC.m_preferredTransferCharacteristics = m_param->preferredTransferCharacteristics;
851
0
        m_seiAlternativeTC.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
852
0
    }
853
    /* Write Film grain characteristics if present */
854
654
    if (this->m_top->m_filmGrainIn)
855
0
    {
856
0
        FilmGrainCharacteristics m_filmGrain;
857
        /* Read the Film grain model file */
858
0
        readModel(&m_filmGrain, this->m_top->m_filmGrainIn);
859
0
        m_filmGrain.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
860
0
    }
861
    /* Write Aom film grain characteristics if present */
862
654
    if (this->m_top->m_aomFilmGrainIn)
863
0
    {
864
0
        AomFilmGrainCharacteristics m_aomFilmGrain;
865
        /* Read the Film grain model file */
866
0
        readAomModel(&m_aomFilmGrain, this->m_top->m_aomFilmGrainIn);
867
0
        m_aomFilmGrain.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal);
868
0
    }
869
    /* Write user SEI */
870
654
    for (int i = 0; i < m_frame[layer]->m_userSEI.numPayloads; i++)
871
0
    {
872
0
        x265_sei_payload *payload = &m_frame[layer]->m_userSEI.payloads[i];
873
0
        if (payload->payloadType == USER_DATA_UNREGISTERED)
874
0
        {
875
0
            SEIuserDataUnregistered sei;
876
0
            sei.m_userData = payload->payload;
877
0
            sei.setSize(payload->payloadSize);
878
0
            sei.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
879
0
        }
880
0
        else if (payload->payloadType == USER_DATA_REGISTERED_ITU_T_T35)
881
0
        {
882
0
            bool writeSei = m_param->bDhdr10opt ? writeToneMapInfo(payload) : true;
883
0
            if (writeSei)
884
0
            {
885
0
                SEIuserDataRegistered sei;
886
0
                sei.m_userData = payload->payload;
887
0
                sei.setSize(payload->payloadSize);
888
0
                sei.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
889
0
            }
890
0
        }
891
0
        else
892
0
            x265_log(m_param, X265_LOG_ERROR, "Unrecognized SEI type\n");
893
0
    }
894
895
654
    bool isSei = ((m_frame[layer]->m_lowres.bKeyframe && m_param->bRepeatHeaders) || m_param->bEmitHRDSEI ||
896
0
                 !!m_param->interlaceMode || (m_frame[layer]->m_lowres.sliceType == X265_TYPE_IDR && m_param->bEmitIDRRecoverySEI) ||
897
0
                   m_frame[layer]->m_userSEI.numPayloads);
898
899
654
    if (isSei && m_param->bSingleSeiNal)
900
0
    {
901
0
        m_bs.writeByteAlignment();
902
0
        m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
903
0
    }
904
    /* CQP and CRF (without capped VBV) doesn't use mid-frame statistics to 
905
     * tune RateControl parameters for other frames.
906
     * Hence, for these modes, update m_startEndOrder and unlock RC for previous threads waiting in
907
     * RateControlEnd here, after the slice contexts are initialized. For the rest - ABR
908
     * and VBV, unlock only after rateControlUpdateStats of this frame is called */
909
654
    if (m_param->rc.rateControlMode != X265_RC_ABR && !m_top->m_rateControl->m_isVbv)
910
654
    {
911
654
        m_top->m_rateControl->m_startEndOrder.incr();
912
913
654
        if (m_rce.encodeOrder < m_param->frameNumThreads - 1)
914
610
            m_top->m_rateControl->m_startEndOrder.incr(); // faked rateControlEnd calls for negative frames
915
654
    }
916
917
654
    if (m_param->bDynamicRefine)
918
0
        computeAvgTrainingData(layer);
919
920
    /* Analyze CTU rows, most of the hard work is done here.  Frame is
921
     * compressed in a wave-front pattern if WPP is enabled. Row based loop
922
     * filters runs behind the CTU compression and reconstruction */
923
924
1.30k
    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
925
654
        m_rows[m_sliceBaseRow[sliceId]].active = true;
926
    
927
654
    if (m_param->bEnableWavefront)
928
531
    {
929
531
        int i = 0;
930
3.37k
        for (uint32_t rowInSlice = 0; rowInSlice < m_sliceGroupSize; rowInSlice++)
931
2.84k
        {
932
5.68k
            for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
933
2.84k
            {
934
2.84k
                const uint32_t sliceStartRow = m_sliceBaseRow[sliceId];
935
2.84k
                const uint32_t sliceEndRow = m_sliceBaseRow[sliceId + 1] - 1;
936
2.84k
                const uint32_t row = sliceStartRow + rowInSlice;
937
2.84k
                if (row > sliceEndRow)
938
0
                    continue;
939
2.84k
                m_row_to_idx[row] = i;
940
2.84k
                m_idx_to_row[i] = row;
941
2.84k
                i += 1;
942
2.84k
            }
943
2.84k
        }
944
531
    }
945
946
654
    if (m_param->bEnableWavefront)
947
531
    {
948
3.37k
        for (uint32_t rowInSlice = 0; rowInSlice < m_sliceGroupSize; rowInSlice++)
949
2.84k
        {
950
5.68k
            for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
951
2.84k
            {
952
2.84k
                const uint32_t sliceStartRow = m_sliceBaseRow[sliceId];
953
2.84k
                const uint32_t sliceEndRow = m_sliceBaseRow[sliceId + 1] - 1;
954
2.84k
                const uint32_t row = sliceStartRow + rowInSlice;
955
956
2.84k
                X265_CHECK(row < m_numRows, "slices row fault was detected");
957
958
2.84k
                if (row > sliceEndRow)
959
0
                    continue;
960
961
                // block until all reference frames have reconstructed the rows we need
962
2.84k
                for (int l = 0; l < numPredDir; l++)
963
0
                {
964
0
                    for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
965
0
                    {
966
0
                        Frame *refpic = slice->m_refFrameList[l][ref];
967
968
#if ENABLE_SCC_EXT
969
                        /*Exempt the current pic as reference*/
970
                        if (m_param->bEnableSCC && refpic->m_poc == m_frame[layer]->m_poc)
971
                            continue;
972
#endif
973
974
                        // NOTE: we unnecessary wait row that beyond current slice boundary
975
0
                        const int rowIdx = X265_MIN(sliceEndRow, (row + m_refLagRows));
976
977
0
                        while (refpic->m_reconRowFlag[rowIdx].get() == 0)
978
0
                            refpic->m_reconRowFlag[rowIdx].waitForChange(0);
979
980
0
                        if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
981
0
                            m_mref[l][ref].applyWeight(rowIdx, m_numRows, sliceEndRow, sliceId);
982
0
                    }
983
0
                }
984
                
985
2.84k
                enableRowEncoder(m_row_to_idx[row]); /* clear external dependency for this row */
986
987
2.84k
                if (m_top->m_threadedME && !slice->isIntra())
988
0
                {
989
0
                    ScopedLock lock(m_tmeDepLock);
990
0
                    m_tmeDeps[row].external = true;
991
0
                    m_top->m_threadedME->enqueueReadyRows(row, layer, this);
992
0
                }
993
994
2.84k
                if (!rowInSlice)
995
531
                {
996
531
                    m_row0WaitTime[layer] = x265_mdate();
997
531
                    enqueueRowEncoder(m_row_to_idx[row]); /* clear internal dependency, start wavefront */
998
531
                }
999
2.84k
                tryWakeOne();
1000
2.84k
            } // end of loop rowInSlice
1001
2.84k
        } // end of loop sliceId
1002
1003
531
        m_allRowsAvailableTime[layer] = x265_mdate();
1004
531
        tryWakeOne(); /* ensure one thread is active or help-wanted flag is set prior to blocking */
1005
531
        static const int block_ms = 250;
1006
531
        while (m_completionEvent.timedWait(block_ms))
1007
0
            tryWakeOne();
1008
531
    }
1009
123
    else
1010
123
    {
1011
683
        for (uint32_t i = 0; i < m_numRows + m_filterRowDelay; i++)
1012
560
        {
1013
            // compress
1014
560
            if (i < m_numRows)
1015
437
            {
1016
                // block until all reference frames have reconstructed the rows we need
1017
437
                for (int l = 0; l < numPredDir; l++)
1018
0
                {
1019
0
                    int list = l;
1020
0
                    for (int ref = 0; ref < slice->m_numRefIdx[list]; ref++)
1021
0
                    {
1022
0
                        Frame *refpic = slice->m_refFrameList[list][ref];
1023
1024
#if ENABLE_SCC_EXT
1025
                        /*Exempt the current pic as reference*/
1026
                        if (m_param->bEnableSCC && refpic->m_poc == m_frame[layer]->m_poc)
1027
                            continue;
1028
#endif
1029
1030
0
                        const int rowIdx = X265_MIN(m_numRows - 1, (i + m_refLagRows));
1031
0
                        while (refpic->m_reconRowFlag[rowIdx].get() == 0)
1032
0
                            refpic->m_reconRowFlag[rowIdx].waitForChange(0);
1033
1034
0
                        if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
1035
0
                            m_mref[list][ref].applyWeight(rowIdx, m_numRows, m_numRows, 0);
1036
0
                    }
1037
0
                }
1038
1039
437
                if (!i)
1040
123
                    m_row0WaitTime[layer] = x265_mdate();
1041
314
                else if (i == m_numRows - 1)
1042
112
                    m_allRowsAvailableTime[layer] = x265_mdate();
1043
437
                processRowEncoder(i, m_tld[m_localTldIdx], layer);
1044
437
            }
1045
1046
            // filter
1047
560
            if (i >= m_filterRowDelay)
1048
437
                m_frameFilter.processRow(i - m_filterRowDelay, layer);
1049
560
        }
1050
123
    }
1051
#if ENABLE_LIBVMAF
1052
    vmafFrameLevelScore();
1053
#endif
1054
1055
654
    m_tmeDepLock.acquire();
1056
654
    m_tmeDeps.clear();
1057
654
    m_tmeDeps.resize(m_numRows);
1058
654
    m_tmeDepLock.release();
1059
1060
654
    if (m_param->maxSlices > 1)
1061
0
    {
1062
0
        PicYuv *reconPic = m_frame[layer]->m_reconPic[0];
1063
0
        uint32_t height = reconPic->m_picHeight;
1064
0
        initDecodedPictureHashSEI(0, 0, height, layer);
1065
0
    } 
1066
1067
654
    if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame[layer]->m_encodeOrder) //Avoid collecting data that will not be used by future frames.
1068
0
        collectDynDataFrame(layer);
1069
1070
654
    if (m_param->bEnableTemporalFilter && m_top->isFilterThisframe(m_frame[layer]->m_mcstf->m_sliceTypeConfig, m_frame[layer]->m_lowres.sliceType))
1071
0
    {
1072
        //Reset the MCSTF context in Frame Encoder and Frame
1073
0
        for (int i = 0; i < (m_frame[layer]->m_mcstf->m_range << 1); i++)
1074
0
        {
1075
0
            memset(m_frame[layer]->m_mcstfRefList[i].mvs0, 0, sizeof(MV) * ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
1076
0
            memset(m_frame[layer]->m_mcstfRefList[i].mvs1, 0, sizeof(MV) * ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
1077
0
            memset(m_frame[layer]->m_mcstfRefList[i].mvs2, 0, sizeof(MV) * ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
1078
0
            memset(m_frame[layer]->m_mcstfRefList[i].mvs,  0, sizeof(MV) * ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
1079
0
            memset(m_frame[layer]->m_mcstfRefList[i].noise, 0, sizeof(int) * ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
1080
0
            memset(m_frame[layer]->m_mcstfRefList[i].error, 0, sizeof(int) * ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
1081
1082
0
            m_frame[layer]->m_mcstf->m_numRef = 0;
1083
0
        }
1084
0
    }
1085
1086
1087
654
    if (m_param->rc.bStatWrite)
1088
0
    {
1089
0
        int totalI = 0, totalP = 0, totalSkip = 0;
1090
1091
        // accumulate intra,inter,skip cu count per frame for 2 pass
1092
0
        for (uint32_t i = 0; i < m_numRows; i++)
1093
0
        {
1094
0
            m_frame[layer]->m_encData->m_frameStats.mvBits    += m_rows[i].rowStats.mvBits;
1095
0
            m_frame[layer]->m_encData->m_frameStats.coeffBits += m_rows[i].rowStats.coeffBits;
1096
0
            m_frame[layer]->m_encData->m_frameStats.miscBits  += m_rows[i].rowStats.miscBits;
1097
0
            totalI                                     += m_rows[i].rowStats.intra8x8Cnt;
1098
0
            totalP                                     += m_rows[i].rowStats.inter8x8Cnt;
1099
0
            totalSkip                                  += m_rows[i].rowStats.skip8x8Cnt;
1100
0
        }
1101
0
        int totalCuCount = totalI + totalP + totalSkip;
1102
0
        m_frame[layer]->m_encData->m_frameStats.percent8x8Intra = (double)totalI / totalCuCount;
1103
0
        m_frame[layer]->m_encData->m_frameStats.percent8x8Inter = (double)totalP / totalCuCount;
1104
0
        m_frame[layer]->m_encData->m_frameStats.percent8x8Skip  = (double)totalSkip / totalCuCount;
1105
0
    }
1106
1107
654
    if (m_param->csvLogLevel >= 1)
1108
0
    {
1109
0
        for (uint32_t i = 0; i < m_numRows; i++)
1110
0
        {
1111
0
            m_frame[layer]->m_encData->m_frameStats.cntIntraNxN += m_rows[i].rowStats.cntIntraNxN;
1112
0
            m_frame[layer]->m_encData->m_frameStats.totalCu += m_rows[i].rowStats.totalCu;
1113
0
            m_frame[layer]->m_encData->m_frameStats.totalCtu += m_rows[i].rowStats.totalCtu;
1114
0
            m_frame[layer]->m_encData->m_frameStats.lumaDistortion += m_rows[i].rowStats.lumaDistortion;
1115
0
            m_frame[layer]->m_encData->m_frameStats.chromaDistortion += m_rows[i].rowStats.chromaDistortion;
1116
0
            m_frame[layer]->m_encData->m_frameStats.psyEnergy += m_rows[i].rowStats.psyEnergy;
1117
0
            m_frame[layer]->m_encData->m_frameStats.ssimEnergy += m_rows[i].rowStats.ssimEnergy;
1118
0
            m_frame[layer]->m_encData->m_frameStats.resEnergy += m_rows[i].rowStats.resEnergy;
1119
0
            for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
1120
0
            {
1121
0
                m_frame[layer]->m_encData->m_frameStats.cntSkipCu[depth] += m_rows[i].rowStats.cntSkipCu[depth];
1122
0
                m_frame[layer]->m_encData->m_frameStats.cntMergeCu[depth] += m_rows[i].rowStats.cntMergeCu[depth];
1123
0
                for (int m = 0; m < INTER_MODES; m++)
1124
0
                    m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][m] += m_rows[i].rowStats.cuInterDistribution[depth][m];
1125
0
                for (int n = 0; n < INTRA_MODES; n++)
1126
0
                    m_frame[layer]->m_encData->m_frameStats.cuIntraDistribution[depth][n] += m_rows[i].rowStats.cuIntraDistribution[depth][n];
1127
0
            }
1128
0
        }
1129
0
        m_frame[layer]->m_encData->m_frameStats.percentIntraNxN = (double)(m_frame[layer]->m_encData->m_frameStats.cntIntraNxN * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1130
1131
0
        for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
1132
0
        {
1133
0
            m_frame[layer]->m_encData->m_frameStats.percentSkipCu[depth] = (double)(m_frame[layer]->m_encData->m_frameStats.cntSkipCu[depth] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1134
0
            m_frame[layer]->m_encData->m_frameStats.percentMergeCu[depth] = (double)(m_frame[layer]->m_encData->m_frameStats.cntMergeCu[depth] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1135
0
            for (int n = 0; n < INTRA_MODES; n++)
1136
0
                m_frame[layer]->m_encData->m_frameStats.percentIntraDistribution[depth][n] = (double)(m_frame[layer]->m_encData->m_frameStats.cuIntraDistribution[depth][n] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1137
0
            uint64_t cuInterRectCnt = 0; // sum of Nx2N, 2NxN counts
1138
0
            cuInterRectCnt += m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][1] + m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][2];
1139
0
            m_frame[layer]->m_encData->m_frameStats.percentInterDistribution[depth][0] = (double)(m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][0] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1140
0
            m_frame[layer]->m_encData->m_frameStats.percentInterDistribution[depth][1] = (double)(cuInterRectCnt * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1141
0
            m_frame[layer]->m_encData->m_frameStats.percentInterDistribution[depth][2] = (double)(m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][3] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1142
0
        }
1143
0
    }
1144
1145
654
    if (m_param->csvLogLevel >= 2)
1146
0
    {
1147
0
        m_frame[layer]->m_encData->m_frameStats.avgLumaDistortion = (double)(m_frame[layer]->m_encData->m_frameStats.lumaDistortion) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1148
0
        m_frame[layer]->m_encData->m_frameStats.avgChromaDistortion = (double)(m_frame[layer]->m_encData->m_frameStats.chromaDistortion) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1149
0
        m_frame[layer]->m_encData->m_frameStats.avgPsyEnergy = (double)(m_frame[layer]->m_encData->m_frameStats.psyEnergy) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1150
0
        m_frame[layer]->m_encData->m_frameStats.avgSsimEnergy = (double)(m_frame[layer]->m_encData->m_frameStats.ssimEnergy) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1151
0
        m_frame[layer]->m_encData->m_frameStats.avgResEnergy = (double)(m_frame[layer]->m_encData->m_frameStats.resEnergy) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1152
0
    }
1153
1154
654
    m_bs.resetBits();
1155
654
    m_entropyCoder.load(m_initSliceContext);
1156
654
    m_entropyCoder.setBitstream(&m_bs);
1157
1158
    // finish encode of each CTU row, only required when SAO is enabled
1159
654
    if (slice->m_bUseSao)
1160
654
        encodeSlice(0, layer);
1161
1162
654
    m_entropyCoder.setBitstream(&m_bs);
1163
1164
654
    if (m_param->maxSlices > 1)
1165
0
    {
1166
0
        uint32_t nextSliceRow = 0;
1167
1168
0
        for(uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
1169
0
        {
1170
0
            m_bs.resetBits();
1171
1172
0
            const uint32_t sliceAddr = nextSliceRow * m_numCols;
1173
0
            if (m_param->bOptRefListLengthPPS)
1174
0
            {
1175
0
                ScopedLock refIdxLock(m_top->m_sliceRefIdxLock);
1176
0
                m_top->analyseRefIdx(slice->m_numRefIdx);
1177
0
            }
1178
0
            m_entropyCoder.codeSliceHeader(*slice, *m_frame[layer]->m_encData, sliceAddr, m_sliceAddrBits, slice->m_sliceQp, layer);
1179
1180
            // Find rows of current slice
1181
0
            const uint32_t prevSliceRow = nextSliceRow;
1182
0
            while(nextSliceRow < m_numRows && m_rows[nextSliceRow].sliceId == sliceId)
1183
0
                nextSliceRow++;
1184
1185
            // serialize each row, record final lengths in slice header
1186
0
            uint32_t maxStreamSize = m_nalList.serializeSubstreams(&m_substreamSizes[prevSliceRow], (nextSliceRow - prevSliceRow), &m_outStreams[prevSliceRow]);
1187
1188
            // complete the slice header by writing WPP row-starts
1189
0
            m_entropyCoder.setBitstream(&m_bs);
1190
0
            if (slice->m_pps->bEntropyCodingSyncEnabled)
1191
0
                m_entropyCoder.codeSliceHeaderWPPEntryPoints(&m_substreamSizes[prevSliceRow], (nextSliceRow - prevSliceRow - 1), maxStreamSize);
1192
            
1193
0
            m_bs.writeByteAlignment();
1194
1195
0
            m_nalList.serialize(slice->m_nalUnitType, m_bs, layer, (!!m_param->bEnableTemporalSubLayers ? m_frame[layer]->m_tempLayer + 1 : (1 + (slice->m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N))));
1196
0
        }
1197
0
    }
1198
654
    else
1199
654
    {
1200
654
        if (m_param->bOptRefListLengthPPS)
1201
0
        {
1202
0
            ScopedLock refIdxLock(m_top->m_sliceRefIdxLock);
1203
0
            m_top->analyseRefIdx(slice->m_numRefIdx);
1204
0
        }
1205
654
        m_entropyCoder.codeSliceHeader(*slice, *m_frame[layer]->m_encData, 0, 0, slice->m_sliceQp, layer);
1206
1207
        // serialize each row, record final lengths in slice header
1208
654
        uint32_t maxStreamSize = m_nalList.serializeSubstreams(m_substreamSizes, numSubstreams, m_outStreams);
1209
1210
        // complete the slice header by writing WPP row-starts
1211
654
        m_entropyCoder.setBitstream(&m_bs);
1212
654
        if (slice->m_pps->bEntropyCodingSyncEnabled)
1213
531
            m_entropyCoder.codeSliceHeaderWPPEntryPoints(m_substreamSizes, (slice->m_sps->numCuInHeight - 1), maxStreamSize);
1214
654
        m_bs.writeByteAlignment();
1215
1216
654
        m_nalList.serialize(slice->m_nalUnitType, m_bs, layer, (!!m_param->bEnableTemporalSubLayers ? m_frame[layer]->m_tempLayer + 1 : (1 + (slice->m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N))));
1217
654
    }
1218
1219
654
    if (m_param->decodedPictureHashSEI)
1220
0
        writeTrailingSEIMessages(layer);
1221
1222
654
    uint64_t bytes = 0;
1223
3.27k
    for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
1224
2.61k
    {
1225
2.61k
        int type = m_nalList.m_nal[i].type;
1226
1227
        // exclude SEI
1228
2.61k
        if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
1229
2.61k
        {
1230
2.61k
            bytes += m_nalList.m_nal[i].sizeBytes;
1231
            // and exclude start code prefix
1232
2.61k
            bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
1233
2.61k
        }
1234
2.61k
    }
1235
654
    m_accessUnitBits[layer] = (layer) ? (bytes - (m_accessUnitBits[0] >> 3)) << 3 : bytes << 3;
1236
1237
654
    int filler = 0;
1238
    /* rateControlEnd may also block for earlier frames to call rateControlUpdateStats */
1239
654
    if (!layer && m_top->m_rateControl->rateControlEnd(m_frame[layer], m_accessUnitBits[layer], &m_rce, &filler) < 0)
1240
0
        m_top->m_aborted = true;
1241
1242
#if ENABLE_ALPHA
1243
    if (layer && m_param->numScalableLayers > 1)
1244
        m_frame[layer]->m_encData->m_avgQpAq = m_frame[layer]->m_encData->m_avgQpRc;
1245
#endif
1246
#if ENABLE_MULTIVIEW
1247
    if (layer && m_param->numViews > 1)
1248
    {
1249
        double avgQpAq = 0;
1250
        for (uint32_t i = 0; i < slice->m_sps->numCuInHeight; i++)
1251
            avgQpAq += m_frame[layer]->m_encData->m_rowStat[i].sumQpAq;
1252
1253
        avgQpAq /= (slice->m_sps->numCUsInFrame * m_param->num4x4Partitions);
1254
        m_frame[layer]->m_encData->m_avgQpAq = avgQpAq;
1255
    }
1256
#endif
1257
1258
654
    if (filler > 0)
1259
0
    {
1260
0
        filler = (filler - FILLER_OVERHEAD * 8) >> 3;
1261
0
        m_bs.resetBits();
1262
0
        while (filler > 0)
1263
0
        {
1264
0
            m_bs.write(0xff, 8);
1265
0
            filler--;
1266
0
        }
1267
0
        m_bs.writeByteAlignment();
1268
0
        m_nalList.serialize(NAL_UNIT_FILLER_DATA, m_bs);
1269
0
        bytes += m_nalList.m_nal[m_nalList.m_numNal - 1].sizeBytes;
1270
0
        bytes -= 3; //exclude start code prefix
1271
0
        m_accessUnitBits[layer] = bytes << 3;
1272
0
    }
1273
1274
654
    if (m_frame[layer]->m_rpu.payloadSize)
1275
0
    {
1276
0
        m_bs.resetBits();
1277
0
        for (int i = 0; i < m_frame[layer]->m_rpu.payloadSize; i++)
1278
0
            m_bs.write(m_frame[layer]->m_rpu.payload[i], 8);
1279
0
        m_nalList.serialize(NAL_UNIT_UNSPECIFIED, m_bs);
1280
0
    }
1281
1282
654
    m_endCompressTime[layer] = x265_mdate();
1283
1284
    /* Decrement referenced frame reference counts, allow them to be recycled */
1285
654
    for (int l = 0; l < numPredDir; l++)
1286
0
    {
1287
0
        for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
1288
0
        {
1289
0
            Frame *refpic = slice->m_refFrameList[l][ref];
1290
0
            ATOMIC_DEC(&refpic->m_countRefEncoders);
1291
0
        }
1292
0
    }
1293
1294
654
    if (m_nr)
1295
0
    {
1296
0
        bool nrEnabled = (m_rce.newQp < QP_MAX_SPEC || !m_param->rc.vbvBufferSize) && (m_param->noiseReductionIntra || m_param->noiseReductionInter);
1297
1298
0
        if (nrEnabled)
1299
0
        {
1300
            /* Accumulate NR statistics from all worker threads */
1301
0
            for (int i = 0; i < numTLD; i++)
1302
0
            {
1303
0
                NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
1304
0
                for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
1305
0
                {
1306
0
                    for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)
1307
0
                        m_nr->nrResidualSum[cat][coeff] += nr->nrResidualSum[cat][coeff];
1308
1309
0
                    m_nr->nrCount[cat] += nr->nrCount[cat];
1310
0
                }
1311
0
            }
1312
1313
0
            noiseReductionUpdate();
1314
1315
            /* Copy updated NR coefficients back to all worker threads */
1316
0
            for (int i = 0; i < numTLD; i++)
1317
0
            {
1318
0
                NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
1319
0
                memcpy(nr->nrOffsetDenoise, m_nr->nrOffsetDenoise, sizeof(uint16_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
1320
0
                memset(nr->nrCount, 0, sizeof(uint32_t)* MAX_NUM_TR_CATEGORIES);
1321
0
                memset(nr->nrResidualSum, 0, sizeof(uint32_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
1322
0
            }
1323
0
        }
1324
0
    }
1325
1326
#if DETAILED_CU_STATS
1327
    /* Accumulate CU statistics from each worker thread, we could report
1328
     * per-frame stats here, but currently we do not. */
1329
    for (int i = 0; i < numTLD; i++)
1330
        m_cuStats.accumulate(m_tld[i].analysis.m_stats[m_jpId], *m_param);
1331
#endif
1332
1333
654
    m_endFrameTime[layer] = x265_mdate();
1334
654
}
1335
1336
void FrameEncoder::initDecodedPictureHashSEI(int row, int cuAddr, int height, int layer)
1337
3.27k
{
1338
3.27k
    PicYuv *reconPic = m_frame[layer]->m_reconPic[0];
1339
3.27k
    uint32_t width = reconPic->m_picWidth;  
1340
3.27k
    intptr_t stride = reconPic->m_stride;
1341
3.27k
    uint32_t maxCUHeight = m_param->maxCUSize;
1342
1343
3.27k
    const uint32_t hChromaShift = CHROMA_H_SHIFT(m_param->internalCsp);
1344
3.27k
    const uint32_t vChromaShift = CHROMA_V_SHIFT(m_param->internalCsp);
1345
1346
3.27k
    if (m_param->decodedPictureHashSEI == 1)
1347
0
    {
1348
0
        if (!row)
1349
0
            MD5Init(&m_seiReconPictureDigest.m_state[0]);
1350
1351
0
        updateMD5Plane(m_seiReconPictureDigest.m_state[0], reconPic->getLumaAddr(cuAddr), width, height, stride);
1352
0
        if (m_param->internalCsp != X265_CSP_I400)
1353
0
        {
1354
0
            if (!row)
1355
0
            {
1356
0
                MD5Init(&m_seiReconPictureDigest.m_state[1]);
1357
0
                MD5Init(&m_seiReconPictureDigest.m_state[2]);
1358
0
            }
1359
1360
0
            width >>= hChromaShift;
1361
0
            height >>= vChromaShift;
1362
0
            stride = reconPic->m_strideC;
1363
1364
0
            updateMD5Plane(m_seiReconPictureDigest.m_state[1], reconPic->getCbAddr(cuAddr), width, height, stride);
1365
0
            updateMD5Plane(m_seiReconPictureDigest.m_state[2], reconPic->getCrAddr(cuAddr), width, height, stride);
1366
0
        }
1367
0
    }
1368
3.27k
    else if (m_param->decodedPictureHashSEI == 2)
1369
0
    {
1370
1371
0
        if (!row)
1372
0
            m_seiReconPictureDigest.m_crc[0] = 0xffff;
1373
1374
0
        updateCRC(reconPic->getLumaAddr(cuAddr), m_seiReconPictureDigest.m_crc[0], height, width, stride);
1375
0
        if (m_param->internalCsp != X265_CSP_I400)
1376
0
        {
1377
0
            width >>= hChromaShift;
1378
0
            height >>= vChromaShift;
1379
0
            stride = reconPic->m_strideC;
1380
0
            m_seiReconPictureDigest.m_crc[1] = m_seiReconPictureDigest.m_crc[2] = 0xffff;
1381
1382
0
            updateCRC(reconPic->getCbAddr(cuAddr), m_seiReconPictureDigest.m_crc[1], height, width, stride);
1383
0
            updateCRC(reconPic->getCrAddr(cuAddr), m_seiReconPictureDigest.m_crc[2], height, width, stride);
1384
0
        }
1385
0
    }
1386
3.27k
    else if (m_param->decodedPictureHashSEI == 3)
1387
0
    {
1388
0
        if (!row)
1389
0
            m_seiReconPictureDigest.m_checksum[0] = 0;
1390
1391
0
        updateChecksum(reconPic->m_picOrg[0], m_seiReconPictureDigest.m_checksum[0], height, width, stride, row, maxCUHeight);
1392
0
        if (m_param->internalCsp != X265_CSP_I400)
1393
0
        {
1394
0
            width >>= hChromaShift;
1395
0
            height >>= vChromaShift;
1396
0
            stride = reconPic->m_strideC;
1397
0
            maxCUHeight >>= vChromaShift;
1398
1399
0
            if (!row)
1400
0
                m_seiReconPictureDigest.m_checksum[1] = m_seiReconPictureDigest.m_checksum[2] = 0;
1401
1402
0
            updateChecksum(reconPic->m_picOrg[1], m_seiReconPictureDigest.m_checksum[1], height, width, stride, row, maxCUHeight);
1403
0
            updateChecksum(reconPic->m_picOrg[2], m_seiReconPictureDigest.m_checksum[2], height, width, stride, row, maxCUHeight);
1404
0
        }
1405
0
    }
1406
3.27k
}
1407
1408
void FrameEncoder::encodeSlice(uint32_t sliceAddr, int layer)
1409
654
{
1410
654
    Slice* slice = m_frame[layer]->m_encData->m_slice;
1411
654
    const uint32_t widthInLCUs = slice->m_sps->numCuInWidth;
1412
654
    const uint32_t lastCUAddr = (slice->m_endCUAddr + m_param->num4x4Partitions - 1) / m_param->num4x4Partitions;
1413
654
    const uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : 1;
1414
1415
654
    SAOParam* saoParam = slice->m_sps->bUseSAO && slice->m_bUseSao ? m_frame[layer]->m_encData->m_saoParam : NULL;
1416
14.3k
    for (uint32_t cuAddr = sliceAddr; cuAddr < lastCUAddr; cuAddr++)
1417
13.7k
    {
1418
13.7k
        uint32_t col = cuAddr % widthInLCUs;
1419
13.7k
        uint32_t row = cuAddr / widthInLCUs;
1420
13.7k
        uint32_t subStrm = row % numSubstreams;
1421
13.7k
        CUData* ctu = m_frame[layer]->m_encData->getPicCTU(cuAddr);
1422
1423
13.7k
        m_entropyCoder.setBitstream(&m_outStreams[subStrm]);
1424
1425
        // Synchronize cabac probabilities with upper-right CTU if it's available and we're at the start of a line.
1426
13.7k
        if (m_param->bEnableWavefront && !col && row)
1427
2.31k
        {
1428
2.31k
            m_entropyCoder.copyState(m_initSliceContext);
1429
2.31k
            m_entropyCoder.loadContexts(m_rows[row - 1].bufferedEntropy);
1430
2.31k
        }
1431
1432
        // Initialize slice context
1433
13.7k
        if (ctu->m_bFirstRowInSlice && !col)
1434
654
            m_entropyCoder.load(m_initSliceContext);
1435
1436
13.7k
        if (saoParam)
1437
13.7k
        {
1438
13.7k
            if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
1439
13.7k
            {
1440
13.7k
                int mergeLeft = col && saoParam->ctuParam[0][cuAddr].mergeMode == SAO_MERGE_LEFT;
1441
13.7k
                int mergeUp = !ctu->m_bFirstRowInSlice && saoParam->ctuParam[0][cuAddr].mergeMode == SAO_MERGE_UP;
1442
13.7k
                if (col)
1443
10.4k
                    m_entropyCoder.codeSaoMerge(mergeLeft);
1444
13.7k
                if (!ctu->m_bFirstRowInSlice && !mergeLeft)
1445
4.83k
                    m_entropyCoder.codeSaoMerge(mergeUp);
1446
13.7k
                if (!mergeLeft && !mergeUp)
1447
4.36k
                {
1448
4.36k
                    if (saoParam->bSaoFlag[0])
1449
4.36k
                        m_entropyCoder.codeSaoOffset(saoParam->ctuParam[0][cuAddr], 0);
1450
4.36k
                    if (saoParam->bSaoFlag[1])
1451
4.36k
                    {
1452
4.36k
                        m_entropyCoder.codeSaoOffset(saoParam->ctuParam[1][cuAddr], 1);
1453
4.36k
                        m_entropyCoder.codeSaoOffset(saoParam->ctuParam[2][cuAddr], 2);
1454
4.36k
                    }
1455
4.36k
                }
1456
13.7k
            }
1457
0
            else
1458
0
            {
1459
0
                for (int i = 0; i < (m_param->internalCsp != X265_CSP_I400 ? 3 : 1); i++)
1460
0
                    saoParam->ctuParam[i][cuAddr].reset();
1461
0
            }
1462
13.7k
        }
1463
1464
        // final coding (bitstream generation) for this CU
1465
13.7k
        m_entropyCoder.encodeCTU(*ctu, m_cuGeoms[m_ctuGeomMap[cuAddr]]);
1466
1467
13.7k
        if (m_param->bEnableWavefront)
1468
12.8k
        {
1469
12.8k
            if (col == 1)
1470
                // Store probabilities of second CTU in line into buffer
1471
2.84k
                m_rows[row].bufferedEntropy.loadContexts(m_entropyCoder);
1472
1473
12.8k
            if (col == widthInLCUs - 1)
1474
2.84k
                m_entropyCoder.finishSlice();
1475
12.8k
        }
1476
13.7k
    }
1477
1478
654
    if (!m_param->bEnableWavefront)
1479
123
        m_entropyCoder.finishSlice();
1480
654
}
1481
1482
void FrameEncoder::processRow(int row, int threadId, int layer)
1483
6.42k
{
1484
6.42k
    int64_t startTime = x265_mdate();
1485
6.42k
    if (ATOMIC_INC(&m_activeWorkerCount) == 1 && m_stallStartTime[layer])
1486
1.10k
        m_totalNoWorkerTime[layer] += x265_mdate() - m_stallStartTime[layer];
1487
1488
6.42k
    const uint32_t realRow = m_idx_to_row[row >> 1];
1489
6.42k
    const uint32_t typeNum = m_idx_to_row[row & 1];
1490
1491
6.42k
    if (!typeNum)
1492
3.58k
    {
1493
3.58k
        processRowEncoder(realRow, m_tld[threadId], layer);
1494
3.58k
    }
1495
2.84k
    else
1496
2.84k
    {
1497
2.84k
        m_frameFilter.processRow(realRow, layer);
1498
1499
        // NOTE: Active next row
1500
2.84k
        if (realRow != m_sliceBaseRow[m_rows[realRow].sliceId + 1] - 1)
1501
2.31k
            enqueueRowFilter(m_row_to_idx[realRow + 1]);
1502
2.84k
    }
1503
1504
6.42k
    if (ATOMIC_DEC(&m_activeWorkerCount) == 0)
1505
1.63k
        m_stallStartTime[layer] = x265_mdate();
1506
1507
6.42k
    m_totalWorkerElapsedTime[layer] += x265_mdate() - startTime; // not thread safe, but good enough
1508
6.42k
}
1509
1510
// Called by worker threads
1511
void FrameEncoder::processRowEncoder(int intRow, ThreadLocalData& tld, int layer)
1512
4.01k
{
1513
4.01k
    const uint32_t row = (uint32_t)intRow;
1514
4.01k
    CTURow& curRow = m_rows[row];
1515
1516
4.01k
    if (m_param->bEnableWavefront)
1517
3.58k
    {
1518
3.58k
        ScopedLock self(curRow.lock);
1519
3.58k
        if (!curRow.active)
1520
            /* VBV restart is in progress, exit out */
1521
0
            return;
1522
3.58k
        if (curRow.busy)
1523
0
        {
1524
            /* On multi-socket Windows servers, we have seen problems with
1525
             * ATOMIC_CAS which resulted in multiple worker threads processing
1526
             * the same CU row, which often resulted in bad pointer accesses. We
1527
             * believe the problem is fixed, but are leaving this check in place
1528
             * to prevent crashes in case it is not */
1529
0
            x265_log(m_param, X265_LOG_WARNING,
1530
0
                     "internal error - simultaneous row access detected. Please report HW to x265-devel@videolan.org\n");
1531
0
            return;
1532
0
        }
1533
3.58k
        curRow.busy = true;
1534
3.58k
    }
1535
1536
    /* When WPP is enabled, every row has its own row coder instance. Otherwise
1537
     * they share row 0 */
1538
4.01k
    Entropy& rowCoder = m_param->bEnableWavefront ? curRow.rowGoOnCoder : m_rows[0].rowGoOnCoder;
1539
4.01k
    FrameData& curEncData = *m_frame[layer]->m_encData;
1540
4.01k
    Slice *slice = curEncData.m_slice;
1541
1542
4.01k
    const uint32_t numCols = m_numCols;
1543
4.01k
    const uint32_t lineStartCUAddr = row * numCols;
1544
4.01k
    bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
1545
1546
4.01k
    const uint32_t sliceId = curRow.sliceId;
1547
4.01k
    uint32_t maxBlockCols = (m_frame[layer]->m_fencPic->m_picWidth + (16 - 1)) / 16;
1548
4.01k
    uint32_t noOfBlocks = m_param->maxCUSize / 16;
1549
4.01k
    const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row - 1].sliceId != curRow.sliceId)) ? 1 : 0;
1550
4.01k
    const uint32_t bLastRowInSlice = ((row == m_numRows - 1) || (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0;
1551
4.01k
    const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1];
1552
4.01k
    const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];
1553
1554
    // Load SBAC coder context from previous row and initialize row state.
1555
4.01k
    if (bFirstRowInSlice && !curRow.completed)        
1556
654
        rowCoder.load(m_initSliceContext);     
1557
1558
    // calculate mean QP for consistent deltaQP signalling calculation
1559
4.01k
    if (m_param->bOptCUDeltaQP)
1560
0
    {
1561
0
        ScopedLock self(curRow.lock);
1562
0
        if (!curRow.avgQPComputed)
1563
0
        {
1564
0
            if (m_param->bEnableWavefront || !row)
1565
0
            {
1566
0
                double meanQPOff = 0;
1567
0
                bool isReferenced = IS_REFERENCED(m_frame[layer]);
1568
0
                double *qpoffs = (isReferenced && m_param->rc.cuTree) ? m_frame[layer]->m_lowres.qpCuTreeOffset : m_frame[layer]->m_lowres.qpAqOffset;
1569
0
                if (qpoffs)
1570
0
                {
1571
0
                    uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;
1572
1573
0
                    uint32_t cuYStart = 0, height = m_frame[layer]->m_fencPic->m_picHeight;
1574
0
                    if (m_param->bEnableWavefront)
1575
0
                    {
1576
0
                        cuYStart = intRow * m_param->maxCUSize;
1577
0
                        height = cuYStart + m_param->maxCUSize;
1578
0
                    }
1579
1580
0
                    uint32_t qgSize = m_param->rc.qgSize, width = m_frame[layer]->m_fencPic->m_picWidth;
1581
0
                    uint32_t maxOffsetCols = (m_frame[layer]->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr;
1582
0
                    uint32_t count = 0;
1583
0
                    for (uint32_t cuY = cuYStart; cuY < height && (cuY < m_frame[layer]->m_fencPic->m_picHeight); cuY += qgSize)
1584
0
                    {
1585
0
                        for (uint32_t cuX = 0; cuX < width; cuX += qgSize)
1586
0
                        {
1587
0
                            double qp_offset = 0;
1588
0
                            uint32_t cnt = 0;
1589
1590
0
                            for (uint32_t block_yy = cuY; block_yy < cuY + qgSize && block_yy < m_frame[layer]->m_fencPic->m_picHeight; block_yy += loopIncr)
1591
0
                            {
1592
0
                                for (uint32_t block_xx = cuX; block_xx < cuX + qgSize && block_xx < width; block_xx += loopIncr)
1593
0
                                {
1594
0
                                    int idx = ((block_yy / loopIncr) * (maxOffsetCols)) + (block_xx / loopIncr);
1595
0
                                    qp_offset += qpoffs[idx];
1596
0
                                    cnt++;
1597
0
                                }
1598
0
                            }
1599
0
                            qp_offset /= cnt;
1600
0
                            meanQPOff += qp_offset;
1601
0
                            count++;
1602
0
                        }
1603
0
                    }
1604
0
                    meanQPOff /= count;
1605
0
                }
1606
0
                rowCoder.m_meanQP = slice->m_sliceQp + meanQPOff;
1607
0
            }
1608
0
            else
1609
0
            {
1610
0
                rowCoder.m_meanQP = m_rows[0].rowGoOnCoder.m_meanQP;
1611
0
            }
1612
0
            curRow.avgQPComputed = 1;
1613
0
        }
1614
0
    }
1615
1616
    // Initialize restrict on MV range in slices
1617
4.01k
    tld.analysis.m_sliceMinY = -(int32_t)(rowInSlice * m_param->maxCUSize * 4) + 3 * 4;
1618
4.01k
    tld.analysis.m_sliceMaxY = (int32_t)((endRowInSlicePlus1 - 1 - row) * (m_param->maxCUSize * 4) - 4 * 4);
1619
1620
    // Handle single row slice
1621
4.01k
    if (tld.analysis.m_sliceMaxY < tld.analysis.m_sliceMinY)
1622
11
        tld.analysis.m_sliceMaxY = tld.analysis.m_sliceMinY = 0;
1623
1624
4.01k
    if (m_top->m_threadedME && !slice->isIntra())
1625
0
    {
1626
0
        ScopedLock lock(m_tmeDepLock);
1627
0
        m_tmeDeps[row].internal = true;
1628
0
        m_top->m_threadedME->enqueueReadyRows(row, layer, this);
1629
0
    }
1630
1631
16.9k
    while (curRow.completed < numCols)
1632
13.7k
    {
1633
13.7k
        ProfileScopeEvent(encodeCTU);
1634
1635
13.7k
        const uint32_t col = curRow.completed;
1636
13.7k
        const uint32_t cuAddr = lineStartCUAddr + col;
1637
13.7k
        CUData* ctu = curEncData.getPicCTU(cuAddr);
1638
13.7k
        const uint32_t bLastCuInSlice = (bLastRowInSlice & (col == numCols - 1)) ? 1 : 0;
1639
13.7k
        ctu->initCTU(*m_frame[layer], cuAddr, slice->m_sliceQp, bFirstRowInSlice, bLastRowInSlice, bLastCuInSlice);
1640
1641
13.7k
        if (!layer && bIsVbv)
1642
0
        {
1643
0
            if (col == 0 && !m_param->bEnableWavefront)
1644
0
            {
1645
0
                m_backupStreams[0].copyBits(&m_outStreams[0]);
1646
0
                curRow.bufferedEntropy.copyState(rowCoder);
1647
0
                curRow.bufferedEntropy.loadContexts(rowCoder);
1648
0
            }
1649
0
            if (bFirstRowInSlice && m_vbvResetTriggerRow[curRow.sliceId] != intRow)
1650
0
            {
1651
0
                curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc;
1652
0
                curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(curEncData.m_avgQpRc);
1653
0
            }
1654
1655
0
            FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
1656
0
            if (m_param->bEnableWavefront && rowInSlice >= col && !bFirstRowInSlice && m_vbvResetTriggerRow[curRow.sliceId] != intRow)
1657
0
                cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;
1658
0
            else if (!m_param->bEnableWavefront && !bFirstRowInSlice && m_vbvResetTriggerRow[curRow.sliceId] != intRow)
1659
0
                cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;
1660
0
            else
1661
0
                cuStat.baseQp = curEncData.m_rowStat[row].rowQp;
1662
1663
            /* TODO: use defines from slicetype.h for lowres block size */
1664
0
            uint32_t block_y = (ctu->m_cuPelY >> m_param->maxLog2CUSize) * noOfBlocks;
1665
0
            uint32_t block_x = (ctu->m_cuPelX >> m_param->maxLog2CUSize) * noOfBlocks;
1666
0
            if (!strlen(m_param->analysisLoad) || !m_param->bDisableLookahead)
1667
0
            {
1668
0
                cuStat.vbvCost = 0;
1669
0
                cuStat.intraVbvCost = 0;
1670
1671
0
                for (uint32_t h = 0; h < noOfBlocks && block_y < m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++)
1672
0
                {
1673
0
                    uint32_t idx = block_x + (block_y * maxBlockCols);
1674
1675
0
                    for (uint32_t w = 0; w < noOfBlocks && (block_x + w) < maxBlockCols; w++, idx++)
1676
0
                    {
1677
0
                        cuStat.vbvCost += m_frame[layer]->m_lowres.lowresCostForRc[idx] & LOWRES_COST_MASK;
1678
0
                        cuStat.intraVbvCost += m_frame[layer]->m_lowres.intraCost[idx];
1679
0
                    }
1680
0
                }
1681
0
            }
1682
0
        }
1683
13.7k
        else
1684
13.7k
            curEncData.m_cuStat[cuAddr].baseQp = curEncData.m_avgQpRc;
1685
1686
13.7k
        if (m_param->bEnableWavefront && !col && !bFirstRowInSlice)
1687
2.31k
        {
1688
            // Load SBAC coder context from previous row and initialize row state.
1689
2.31k
            rowCoder.copyState(m_initSliceContext);
1690
2.31k
            rowCoder.loadContexts(m_rows[row - 1].bufferedEntropy);
1691
2.31k
        }
1692
13.7k
        if (m_param->dynamicRd && (int32_t)(m_rce.qpaRc - m_rce.qpNoVbv) > 0)
1693
0
            ctu->m_vbvAffected = true;
1694
1695
13.7k
        if (m_top->m_threadedME && slice->m_sliceType != I_SLICE)
1696
0
        {
1697
0
            int64_t waitStart = x265_mdate();
1698
0
            bool waited = false;
1699
1700
            // Wait for threadedME to complete ME upto this CTU
1701
0
            while (m_frame[layer]->m_ctuMEFlags[cuAddr].get() == 0)
1702
0
            {
1703
#ifdef DETAILED_CU_STATS
1704
                tld.analysis.m_stats[m_jpId].countTmeBlockedCTUs++;
1705
#endif
1706
0
                m_frame[layer]->m_ctuMEFlags[cuAddr].waitForChange(0);
1707
0
                waited = true;
1708
0
            }
1709
1710
0
            int64_t waitEnd = x265_mdate();
1711
0
            if (waited)
1712
0
                ATOMIC_ADD(&m_totalThreadedMEWait[layer], waitEnd - waitStart);
1713
0
        }
1714
            
1715
        // Does all the CU analysis, returns best top level mode decision
1716
13.7k
        Mode& best = tld.analysis.compressCTU(*ctu, *m_frame[layer], m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
1717
1718
        /* startPoint > encodeOrder is true when the start point changes for
1719
        a new GOP but few frames from the previous GOP is still incomplete.
1720
        The data of frames in this interval will not be used by any future frames. */
1721
13.7k
        if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame[layer]->m_encodeOrder)
1722
0
            collectDynDataRow(*ctu, &curRow.rowStats);
1723
1724
        // take a sample of the current active worker count
1725
13.7k
        ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount);
1726
13.7k
        ATOMIC_INC(&m_activeWorkerCountSamples);
1727
1728
        /* advance top-level row coder to include the context of this CTU.
1729
         * if SAO is disabled, rowCoder writes the final CTU bitstream */
1730
13.7k
        rowCoder.encodeCTU(*ctu, m_cuGeoms[m_ctuGeomMap[cuAddr]]);
1731
1732
13.7k
        if (m_param->bEnableWavefront && col == 1)
1733
            // Save CABAC state for next row
1734
2.84k
            curRow.bufferedEntropy.loadContexts(rowCoder);
1735
1736
        /* SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas */
1737
13.7k
        if (slice->m_bUseSao && m_param->bSaoNonDeblocked)
1738
0
            m_frameFilter.m_parallelFilter[row].m_sao.calcSaoStatsCu_BeforeDblk(m_frame[layer], col, row);
1739
1740
        /* Deblock with idle threading */
1741
13.7k
        if (m_param->bEnableLoopFilter | slice->m_bUseSao)
1742
13.7k
        {
1743
            // NOTE: in VBV mode, we may reencode anytime, so we can't do Deblock stage-Horizon and SAO
1744
13.7k
            if (!bIsVbv)
1745
13.7k
            {
1746
                // Delay one row to avoid intra prediction conflict
1747
13.7k
                if (m_pool && !bFirstRowInSlice)
1748
10.9k
                {                    
1749
10.9k
                    int allowCol = col;
1750
1751
                    // avoid race condition on last column
1752
10.9k
                    if (rowInSlice >= 2)
1753
8.23k
                    {
1754
8.23k
                        allowCol = X265_MIN(((col == numCols - 1) ? m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.get()
1755
8.23k
                                                                  : m_frameFilter.m_parallelFilter[row - 2].m_lastCol.get()), (int)col);
1756
8.23k
                    }
1757
10.9k
                    m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(allowCol);
1758
10.9k
                }
1759
1760
                // Last Row may start early
1761
13.7k
                if (m_pool && bLastRowInSlice)
1762
2.74k
                {
1763
                    // Deblocking last row
1764
2.74k
                    int allowCol = col;
1765
1766
                    // avoid race condition on last column
1767
2.74k
                    if (rowInSlice >= 2)
1768
2.26k
                    {
1769
2.26k
                        allowCol = X265_MIN(((col == numCols - 1) ? m_frameFilter.m_parallelFilter[row - 1].m_lastDeblocked.get()
1770
2.26k
                                                                  : m_frameFilter.m_parallelFilter[row - 1].m_lastCol.get()), (int)col);
1771
2.26k
                    }
1772
2.74k
                    m_frameFilter.m_parallelFilter[row].m_allowedCol.set(allowCol);
1773
2.74k
                }
1774
13.7k
            } // end of !bIsVbv
1775
13.7k
        }
1776
        // Both Loopfilter and SAO Disabled
1777
2
        else
1778
2
        {
1779
2
            m_frameFilter.m_parallelFilter[row].processPostCu(col);
1780
2
        }
1781
1782
        // Completed CU processing
1783
13.7k
        curRow.completed++;
1784
1785
13.7k
        FrameStats frameLog;
1786
13.7k
        curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu, &frameLog);
1787
1788
        // copy number of intra, inter cu per row into frame stats for 2 pass
1789
13.7k
        if (m_param->rc.bStatWrite)
1790
0
        {
1791
0
            curRow.rowStats.mvBits    += best.mvBits;
1792
0
            curRow.rowStats.coeffBits += best.coeffBits;
1793
0
            curRow.rowStats.miscBits  += best.totalBits - (best.mvBits + best.coeffBits);
1794
1795
0
            for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
1796
0
            {
1797
                /* 1 << shift == number of 8x8 blocks at current depth */
1798
0
                int shift = 2 * (m_param->maxCUDepth - depth);
1799
0
                int cuSize = m_param->maxCUSize >> depth;
1800
1801
0
                curRow.rowStats.intra8x8Cnt += (cuSize == 8) ? (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) :
1802
0
                                                               (int)(frameLog.cntIntra[depth] << shift);
1803
1804
0
                curRow.rowStats.inter8x8Cnt += (int)(frameLog.cntInter[depth] << shift);
1805
0
                curRow.rowStats.skip8x8Cnt += (int)((frameLog.cntSkipCu[depth] + frameLog.cntMergeCu[depth]) << shift);
1806
0
            }
1807
0
        }
1808
13.7k
        curRow.rowStats.totalCtu++;
1809
13.7k
        curRow.rowStats.lumaDistortion   += best.lumaDistortion;
1810
13.7k
        curRow.rowStats.chromaDistortion += best.chromaDistortion;
1811
13.7k
        curRow.rowStats.psyEnergy        += best.psyEnergy;
1812
13.7k
        curRow.rowStats.ssimEnergy       += best.ssimEnergy;
1813
13.7k
        curRow.rowStats.resEnergy        += best.resEnergy;
1814
13.7k
        curRow.rowStats.cntIntraNxN      += frameLog.cntIntraNxN;
1815
13.7k
        curRow.rowStats.totalCu          += frameLog.totalCu;
1816
52.7k
        for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
1817
39.0k
        {
1818
39.0k
            curRow.rowStats.cntSkipCu[depth] += frameLog.cntSkipCu[depth];
1819
39.0k
            curRow.rowStats.cntMergeCu[depth] += frameLog.cntMergeCu[depth];
1820
195k
            for (int m = 0; m < INTER_MODES; m++)
1821
156k
                curRow.rowStats.cuInterDistribution[depth][m] += frameLog.cuInterDistribution[depth][m];
1822
156k
            for (int n = 0; n < INTRA_MODES; n++)
1823
117k
                curRow.rowStats.cuIntraDistribution[depth][n] += frameLog.cuIntraDistribution[depth][n];
1824
39.0k
        }
1825
1826
13.7k
        curEncData.m_cuStat[cuAddr].totalBits = best.totalBits;
1827
13.7k
        x265_emms();
1828
1829
13.7k
        if (!layer && bIsVbv)
1830
0
        {   
1831
            // Update encoded bits, satdCost, baseQP for each CU if tune grain is disabled
1832
0
            FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];    
1833
0
            if ((m_param->bEnableWavefront && ((cuAddr == m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)
1834
0
            {
1835
0
                curEncData.m_rowStat[row].rowSatd += cuStat.vbvCost;
1836
0
                curEncData.m_rowStat[row].rowIntraSatd += cuStat.intraVbvCost;
1837
0
                curEncData.m_rowStat[row].encodedBits += cuStat.totalBits;
1838
0
                curEncData.m_rowStat[row].sumQpRc += cuStat.baseQp;
1839
0
                curEncData.m_rowStat[row].numEncodedCUs = cuAddr;
1840
0
            }
1841
            
1842
            // If current block is at row end checkpoint, call vbv ratecontrol.
1843
0
            if (!m_param->bEnableWavefront && col == numCols - 1)
1844
0
            {
1845
0
                double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
1846
0
                curRow.reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame[layer], row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
1847
0
                qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase);
1848
0
                curEncData.m_rowStat[row].rowQp = qpBase;
1849
0
                curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase);
1850
0
                if (curRow.reEncode < 0)
1851
0
                {
1852
0
                    x265_log(m_param, X265_LOG_DEBUG, "POC %d row %d - encode restart required for VBV, to %.2f from %.2f\n",
1853
0
                        m_frame[layer]->m_poc, row, qpBase, curEncData.m_cuStat[cuAddr].baseQp);
1854
1855
0
                    m_vbvResetTriggerRow[curRow.sliceId] = row;
1856
0
                    m_outStreams[0].copyBits(&m_backupStreams[0]);
1857
1858
0
                    rowCoder.copyState(curRow.bufferedEntropy);
1859
0
                    rowCoder.loadContexts(curRow.bufferedEntropy);
1860
1861
0
                    curRow.completed = 0;
1862
0
                    memset(&curRow.rowStats, 0, sizeof(curRow.rowStats));
1863
0
                    curEncData.m_rowStat[row].numEncodedCUs = 0;
1864
0
                    curEncData.m_rowStat[row].encodedBits = 0;
1865
0
                    curEncData.m_rowStat[row].rowSatd = 0;
1866
0
                    curEncData.m_rowStat[row].rowIntraSatd = 0;
1867
0
                    curEncData.m_rowStat[row].sumQpRc = 0;
1868
0
                    curEncData.m_rowStat[row].sumQpAq = 0;
1869
0
                }
1870
0
            }
1871
            // If current block is at row diagonal checkpoint, call vbv ratecontrol.
1872
0
            else if (m_param->bEnableWavefront && rowInSlice == col && !bFirstRowInSlice)
1873
0
            {
1874
0
                if (m_param->rc.bEnableConstVbv)
1875
0
                {
1876
0
                    uint32_t startCuAddr = numCols * row;
1877
0
                    uint32_t EndCuAddr = startCuAddr + col;
1878
1879
0
                    for (int32_t r = row; r >= (int32_t)m_sliceBaseRow[sliceId]; r--)
1880
0
                    {
1881
0
                        for (uint32_t c = startCuAddr; c <= EndCuAddr && c <= numCols * (r + 1) - 1; c++)
1882
0
                        {
1883
0
                            curEncData.m_rowStat[r].rowSatd += curEncData.m_cuStat[c].vbvCost;
1884
0
                            curEncData.m_rowStat[r].rowIntraSatd += curEncData.m_cuStat[c].intraVbvCost;
1885
0
                            curEncData.m_rowStat[r].encodedBits += curEncData.m_cuStat[c].totalBits;
1886
0
                            curEncData.m_rowStat[r].sumQpRc += curEncData.m_cuStat[c].baseQp;
1887
0
                            curEncData.m_rowStat[r].numEncodedCUs = c;
1888
0
                        }
1889
0
                        if (curRow.reEncode < 0)
1890
0
                            break;
1891
0
                        startCuAddr = EndCuAddr - numCols;
1892
0
                        EndCuAddr = startCuAddr + 1;
1893
0
                    }
1894
0
                }
1895
0
                double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
1896
0
                curRow.reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame[layer], row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
1897
0
                qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase);
1898
0
                curEncData.m_rowStat[row].rowQp = qpBase;
1899
0
                curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase);
1900
1901
0
                if (curRow.reEncode < 0)
1902
0
                {
1903
0
                    x265_log(m_param, X265_LOG_DEBUG, "POC %d row %d - encode restart required for VBV, to %.2f from %.2f\n",
1904
0
                             m_frame[layer]->m_poc, row, qpBase, curEncData.m_cuStat[cuAddr].baseQp);
1905
1906
                    // prevent the WaveFront::findJob() method from providing new jobs
1907
0
                    m_vbvResetTriggerRow[curRow.sliceId] = row;
1908
0
                    m_bAllRowsStop[curRow.sliceId] = true;
1909
1910
0
                    for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1; r >= row; r--)
1911
0
                    {
1912
0
                        CTURow& stopRow = m_rows[r];
1913
1914
0
                        if (r != row)
1915
0
                        {
1916
                            /* if row was active (ready to be run) clear active bit and bitmap bit for this row */
1917
0
                            stopRow.lock.acquire();
1918
0
                            while (stopRow.active)
1919
0
                            {
1920
0
                                if (dequeueRow(m_row_to_idx[r] * 2))
1921
0
                                    stopRow.active = false;
1922
0
                                else
1923
0
                                {
1924
                                    /* we must release the row lock to allow the thread to exit */
1925
0
                                    stopRow.lock.release();
1926
0
                                    GIVE_UP_TIME();
1927
0
                                    stopRow.lock.acquire();
1928
0
                                }
1929
0
                            }
1930
0
                            stopRow.lock.release();
1931
1932
0
                            bool bRowBusy = true;
1933
0
                            do
1934
0
                            {
1935
0
                                stopRow.lock.acquire();
1936
0
                                bRowBusy = stopRow.busy;
1937
0
                                stopRow.lock.release();
1938
1939
0
                                if (bRowBusy)
1940
0
                                {
1941
0
                                    GIVE_UP_TIME();
1942
0
                                }
1943
0
                            }
1944
0
                            while (bRowBusy);
1945
0
                        }
1946
1947
0
                        m_outStreams[r].resetBits();
1948
0
                        stopRow.completed = 0;
1949
0
                        memset(&stopRow.rowStats, 0, sizeof(stopRow.rowStats));
1950
0
                        curEncData.m_rowStat[r].numEncodedCUs = 0;
1951
0
                        curEncData.m_rowStat[r].encodedBits = 0;
1952
0
                        curEncData.m_rowStat[r].rowSatd = 0;
1953
0
                        curEncData.m_rowStat[r].rowIntraSatd = 0;
1954
0
                        curEncData.m_rowStat[r].sumQpRc = 0;
1955
0
                        curEncData.m_rowStat[r].sumQpAq = 0;
1956
0
                    }
1957
1958
0
                    m_bAllRowsStop[curRow.sliceId] = false;
1959
0
                }
1960
0
            }
1961
0
        }
1962
1963
13.7k
        if (m_param->bEnableWavefront && curRow.completed >= 2 && !bLastRowInSlice &&
1964
8.04k
            (!m_bAllRowsStop[curRow.sliceId] || intRow + 1 < m_vbvResetTriggerRow[curRow.sliceId]))
1965
8.04k
        {
1966
            /* activate next row */
1967
8.04k
            ScopedLock below(m_rows[row + 1].lock);
1968
1969
8.04k
            if (m_rows[row + 1].active == false &&
1970
3.05k
                m_rows[row + 1].completed + 2 <= curRow.completed)
1971
3.05k
            {
1972
3.05k
                m_rows[row + 1].active = true;
1973
3.05k
                enqueueRowEncoder(m_row_to_idx[row + 1]);
1974
3.05k
                tryWakeOne(); /* wake up a sleeping thread or set the help wanted flag */
1975
3.05k
            }
1976
8.04k
        }
1977
1978
13.7k
        ScopedLock self(curRow.lock);
1979
13.7k
        if ((m_bAllRowsStop[curRow.sliceId] && intRow > m_vbvResetTriggerRow[curRow.sliceId]) ||
1980
13.7k
            (!bFirstRowInSlice && ((curRow.completed < numCols - 1) || (m_rows[row - 1].completed < numCols)) && m_rows[row - 1].completed < curRow.completed + 2))
1981
740
        {
1982
740
            curRow.active = false;
1983
740
            curRow.busy = false;
1984
740
            ATOMIC_INC(&m_countRowBlocks);
1985
740
            return;
1986
740
        }
1987
13.7k
    }
1988
1989
    /* this row of CTUs has been compressed */
1990
3.27k
    if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv)
1991
0
    {
1992
0
        if (bLastRowInSlice)       
1993
0
        {
1994
0
            for (uint32_t r = m_sliceBaseRow[sliceId]; r < m_sliceBaseRow[sliceId + 1]; r++)
1995
0
            {
1996
0
                for (uint32_t c = curEncData.m_rowStat[r].numEncodedCUs + 1; c < numCols * (r + 1); c++)
1997
0
                {
1998
0
                    curEncData.m_rowStat[r].rowSatd += curEncData.m_cuStat[c].vbvCost;
1999
0
                    curEncData.m_rowStat[r].rowIntraSatd += curEncData.m_cuStat[c].intraVbvCost;
2000
0
                    curEncData.m_rowStat[r].encodedBits += curEncData.m_cuStat[c].totalBits;
2001
0
                    curEncData.m_rowStat[r].sumQpRc += curEncData.m_cuStat[c].baseQp;
2002
0
                    curEncData.m_rowStat[r].numEncodedCUs = c;
2003
0
                }
2004
0
            }
2005
0
        }
2006
0
    }
2007
2008
    /* If encoding with ABR, update update bits and complexity in rate control
2009
     * after a number of rows so the next frame's rateControlStart has more
2010
     * accurate data for estimation. At the start of the encode we update stats
2011
     * after half the frame is encoded, but after this initial period we update
2012
     * after refLagRows (the number of rows reference frames must have completed
2013
     * before referencees may begin encoding) */
2014
3.27k
    if ((!layer) && (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv))
2015
0
    {
2016
0
        uint32_t rowCount = 0;
2017
0
        uint32_t maxRows = m_sliceBaseRow[sliceId + 1] - m_sliceBaseRow[sliceId];
2018
2019
0
        if (!m_rce.encodeOrder)
2020
0
            rowCount = maxRows - 1; 
2021
0
        else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum / m_param->fpsDenom))
2022
0
            rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1);
2023
0
        else
2024
0
            rowCount = X265_MIN(m_refLagRows / m_param->maxSlices, maxRows - 1);
2025
2026
0
        if (rowInSlice == rowCount)
2027
0
        {
2028
0
            m_rowSliceTotalBits[sliceId] = 0;
2029
0
            if (bIsVbv && !(m_param->rc.bEnableConstVbv && m_param->bEnableWavefront))
2030
0
            {
2031
0
                for (uint32_t i = m_sliceBaseRow[sliceId]; i < rowCount + m_sliceBaseRow[sliceId]; i++)
2032
0
                    m_rowSliceTotalBits[sliceId] += curEncData.m_rowStat[i].encodedBits;
2033
0
            }
2034
0
            else
2035
0
            {
2036
0
                uint32_t startAddr = m_sliceBaseRow[sliceId] * numCols;
2037
0
                uint32_t finishAddr = startAddr + rowCount * numCols;
2038
                
2039
0
                for (uint32_t cuAddr = startAddr; cuAddr < finishAddr; cuAddr++)
2040
0
                    m_rowSliceTotalBits[sliceId] += curEncData.m_cuStat[cuAddr].totalBits;
2041
0
            }
2042
2043
0
            if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices)
2044
0
            {
2045
0
                m_rce.rowTotalBits = 0;
2046
0
                for (uint32_t i = 0; i < m_param->maxSlices; i++)
2047
0
                    m_rce.rowTotalBits += m_rowSliceTotalBits[i];
2048
0
                m_top->m_rateControl->rateControlUpdateStats(&m_rce);
2049
0
            }
2050
0
        }
2051
0
    }
2052
2053
    /* flush row bitstream (if WPP and no SAO) or flush frame if no WPP and no SAO */
2054
    /* end_of_sub_stream_one_bit / end_of_slice_segment_flag */
2055
3.27k
       if (!slice->m_bUseSao && (m_param->bEnableWavefront || bLastRowInSlice))
2056
0
               rowCoder.finishSlice();
2057
2058
2059
    /* Processing left Deblock block with current threading */
2060
3.27k
    if ((m_param->bEnableLoopFilter | slice->m_bUseSao) & (rowInSlice >= 2))
2061
1.98k
    {
2062
        /* Check conditional to start previous row process with current threading */
2063
1.98k
        if (m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.get() == (int)numCols)
2064
1.98k
        {
2065
            /* stop threading on current row and restart it */
2066
1.98k
            m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(numCols);
2067
1.98k
            m_frameFilter.m_parallelFilter[row - 1].processTasks(-1);
2068
1.98k
        }
2069
1.98k
    }
2070
2071
    /* trigger row-wise loop filters */
2072
3.27k
    if (m_param->bEnableWavefront)
2073
2.84k
    {
2074
2.84k
        if (rowInSlice >= m_filterRowDelay)
2075
2.31k
        {
2076
2.31k
            enableRowFilter(m_row_to_idx[row - m_filterRowDelay]);
2077
2078
            /* NOTE: Activate filter if first row (row 0) */
2079
2.31k
            if (rowInSlice == m_filterRowDelay)
2080
531
                enqueueRowFilter(m_row_to_idx[row - m_filterRowDelay]);
2081
2.31k
            tryWakeOne();
2082
2.31k
        }
2083
2084
2.84k
        if (bLastRowInSlice)
2085
531
        {
2086
1.06k
            for (uint32_t i = endRowInSlicePlus1 - m_filterRowDelay; i < endRowInSlicePlus1; i++)
2087
531
            {
2088
531
                enableRowFilter(m_row_to_idx[i]);
2089
531
            }
2090
531
            tryWakeOne();
2091
531
        }
2092
2093
        // handle specially case - single row slice
2094
2.84k
        if  (bFirstRowInSlice & bLastRowInSlice)
2095
0
        {
2096
0
            enqueueRowFilter(m_row_to_idx[row]);
2097
0
            tryWakeOne();
2098
0
        }
2099
2.84k
    }
2100
2101
3.27k
    curRow.busy = false;
2102
2103
    // CHECK_ME: Does it always FALSE condition?
2104
3.27k
    if (ATOMIC_INC(&m_completionCount) == 2 * (int)m_numRows)
2105
0
        m_completionEvent.trigger();
2106
3.27k
}
2107
2108
void FrameEncoder::collectDynDataRow(CUData& ctu, FrameStats* rowStats)
2109
0
{
2110
0
    for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
2111
0
    {
2112
0
        for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
2113
0
        {
2114
0
            int offset = (depth * X265_REFINE_INTER_LEVELS) + i;
2115
0
            if (ctu.m_collectCUCount[offset])
2116
0
            {
2117
0
                rowStats->rowVarDyn[offset] += ctu.m_collectCUVariance[offset];
2118
0
                rowStats->rowRdDyn[offset] += ctu.m_collectCURd[offset];
2119
0
                rowStats->rowCntDyn[offset] += ctu.m_collectCUCount[offset];
2120
0
            }
2121
0
        }
2122
0
    }
2123
0
}
2124
2125
void FrameEncoder::collectDynDataFrame(int layer)
2126
0
{
2127
0
    for (uint32_t row = 0; row < m_numRows; row++)
2128
0
    {
2129
0
        for (uint32_t refLevel = 0; refLevel < X265_REFINE_INTER_LEVELS; refLevel++)
2130
0
        {
2131
0
            for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
2132
0
            {
2133
0
                int offset = (depth * X265_REFINE_INTER_LEVELS) + refLevel;
2134
0
                int curFrameIndex = m_frame[layer]->m_encodeOrder - m_top->m_startPoint;
2135
0
                int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
2136
0
                if (m_rows[row].rowStats.rowCntDyn[offset])
2137
0
                {
2138
0
                    m_top->m_variance[index] += m_rows[row].rowStats.rowVarDyn[offset];
2139
0
                    m_top->m_rdCost[index] += m_rows[row].rowStats.rowRdDyn[offset];
2140
0
                    m_top->m_trainingCount[index] += m_rows[row].rowStats.rowCntDyn[offset];
2141
0
                }
2142
0
            }
2143
0
        }
2144
0
    }
2145
0
}
2146
2147
void FrameEncoder::computeAvgTrainingData(int layer)
2148
0
{
2149
0
    if (m_frame[layer]->m_lowres.bScenecut || m_frame[layer]->m_lowres.bKeyframe)
2150
0
    {
2151
0
        m_top->m_startPoint = m_frame[layer]->m_encodeOrder;
2152
0
        int size = (m_param->keyframeMax + m_param->lookaheadDepth) * m_param->maxCUDepth * X265_REFINE_INTER_LEVELS;
2153
0
        memset(m_top->m_variance, 0, size * sizeof(uint64_t));
2154
0
        memset(m_top->m_rdCost, 0, size * sizeof(uint64_t));
2155
0
        memset(m_top->m_trainingCount, 0, size * sizeof(uint32_t));
2156
0
    }
2157
0
    if (m_frame[layer]->m_encodeOrder - m_top->m_startPoint < 2 * m_param->frameNumThreads)
2158
0
        m_frame[layer]->m_classifyFrame = false;
2159
0
    else
2160
0
        m_frame[layer]->m_classifyFrame = true;
2161
2162
0
    int size = m_param->maxCUDepth * X265_REFINE_INTER_LEVELS;
2163
0
    memset(m_frame[layer]->m_classifyRd, 0, size * sizeof(uint64_t));
2164
0
    memset(m_frame[layer]->m_classifyVariance, 0, size * sizeof(uint64_t));
2165
0
    memset(m_frame[layer]->m_classifyCount, 0, size * sizeof(uint32_t));
2166
0
    if (m_frame[layer]->m_classifyFrame)
2167
0
    {
2168
0
        uint32_t limit = m_frame[layer]->m_encodeOrder - m_top->m_startPoint - m_param->frameNumThreads;
2169
0
        for (uint32_t i = 1; i < limit; i++)
2170
0
        {
2171
0
            for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++)
2172
0
            {
2173
0
                for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
2174
0
                {
2175
0
                    int offset = (depth * X265_REFINE_INTER_LEVELS) + j;
2176
0
                    int index = (i* X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
2177
0
                    if (m_top->m_trainingCount[index])
2178
0
                    {
2179
0
                        m_frame[layer]->m_classifyRd[offset] += m_top->m_rdCost[index] / m_top->m_trainingCount[index];
2180
0
                        m_frame[layer]->m_classifyVariance[offset] += m_top->m_variance[index] / m_top->m_trainingCount[index];
2181
0
                        m_frame[layer]->m_classifyCount[offset] += m_top->m_trainingCount[index];
2182
0
                    }
2183
0
                }
2184
0
            }
2185
0
        }
2186
        /* Calculates the average feature values of historic frames that are being considered for the current frame */
2187
0
        int historyCount = m_frame[layer]->m_encodeOrder - m_param->frameNumThreads - m_top->m_startPoint - 1;
2188
0
        if (historyCount)
2189
0
        {
2190
0
            for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++)
2191
0
            {
2192
0
                for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
2193
0
                {
2194
0
                    int offset = (depth * X265_REFINE_INTER_LEVELS) + j;
2195
0
                    m_frame[layer]->m_classifyRd[offset] /= historyCount;
2196
0
                    m_frame[layer]->m_classifyVariance[offset] /= historyCount;
2197
0
                }
2198
0
            }
2199
0
        }
2200
0
    }
2201
0
}
2202
2203
/* collect statistics about CU coding decisions, return total QP */
2204
int FrameEncoder::collectCTUStatistics(const CUData& ctu, FrameStats* log)
2205
13.7k
{
2206
13.7k
    int totQP = 0;
2207
13.7k
    uint32_t depth = 0;
2208
75.1k
    for (uint32_t absPartIdx = 0; absPartIdx < ctu.m_numPartitions; absPartIdx += ctu.m_numPartitions >> (depth * 2))
2209
61.4k
    {
2210
61.4k
        depth = ctu.m_cuDepth[absPartIdx];
2211
61.4k
        totQP += ctu.m_qp[absPartIdx] * (ctu.m_numPartitions >> (depth * 2));
2212
61.4k
    }
2213
2214
13.7k
    if (m_param->csvLogLevel >= 1 || m_param->rc.bStatWrite)
2215
0
    {
2216
0
        if (ctu.m_slice->m_sliceType == I_SLICE)
2217
0
        {
2218
0
            depth = 0;
2219
0
            for (uint32_t absPartIdx = 0; absPartIdx < ctu.m_numPartitions; absPartIdx += ctu.m_numPartitions >> (depth * 2))
2220
0
            {
2221
0
                depth = ctu.m_cuDepth[absPartIdx];
2222
2223
0
                log->totalCu++;
2224
0
                log->cntIntra[depth]++;
2225
2226
0
                if (ctu.m_predMode[absPartIdx] == MODE_NONE)
2227
0
                {
2228
0
                    log->totalCu--;
2229
0
                    log->cntIntra[depth]--;
2230
0
                }
2231
0
                else if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
2232
0
                {
2233
                    /* TODO: log intra modes at absPartIdx +0 to +3 */
2234
0
                    X265_CHECK(ctu.m_log2CUSize[absPartIdx] == 3 && ctu.m_slice->m_sps->quadtreeTULog2MinSize < 3, "Intra NxN found at improbable depth\n");
2235
0
                    log->cntIntraNxN++;
2236
0
                    log->cntIntra[depth]--;
2237
0
                }
2238
0
                else if (ctu.m_lumaIntraDir[absPartIdx] > 1)
2239
0
                    log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
2240
0
                else
2241
0
                    log->cuIntraDistribution[depth][ctu.m_lumaIntraDir[absPartIdx]]++;
2242
0
            }
2243
0
        }
2244
0
        else
2245
0
        {
2246
0
            depth = 0;
2247
0
            for (uint32_t absPartIdx = 0; absPartIdx < ctu.m_numPartitions; absPartIdx += ctu.m_numPartitions >> (depth * 2))
2248
0
            {
2249
0
                depth = ctu.m_cuDepth[absPartIdx];
2250
2251
0
                log->totalCu++;
2252
2253
0
                if (ctu.m_predMode[absPartIdx] == MODE_NONE)
2254
0
                    log->totalCu--;
2255
0
                else if (ctu.isSkipped(absPartIdx))
2256
0
                {
2257
0
                    if (ctu.m_mergeFlag[0])
2258
0
                        log->cntMergeCu[depth]++;
2259
0
                    else
2260
0
                        log->cntSkipCu[depth]++;
2261
0
                }
2262
0
                else if (ctu.isInter(absPartIdx))
2263
0
                {
2264
0
                    log->cntInter[depth]++;
2265
2266
0
                    if (ctu.m_partSize[absPartIdx] < AMP_ID)
2267
0
                        log->cuInterDistribution[depth][ctu.m_partSize[absPartIdx]]++;
2268
0
                    else
2269
0
                        log->cuInterDistribution[depth][AMP_ID]++;
2270
0
                }
2271
0
                else if (ctu.isIntra(absPartIdx))
2272
0
                {
2273
0
                    log->cntIntra[depth]++;
2274
2275
0
                    if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
2276
0
                    {
2277
0
                        X265_CHECK(ctu.m_log2CUSize[absPartIdx] == 3 && ctu.m_slice->m_sps->quadtreeTULog2MinSize < 3, "Intra NxN found at improbable depth\n");
2278
0
                        log->cntIntraNxN++;
2279
0
                        log->cntIntra[depth]--;
2280
                        /* TODO: log intra modes at absPartIdx +0 to +3 */
2281
0
                    }
2282
0
                    else if (ctu.m_lumaIntraDir[absPartIdx] > 1)
2283
0
                        log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
2284
0
                    else
2285
0
                        log->cuIntraDistribution[depth][ctu.m_lumaIntraDir[absPartIdx]]++;
2286
0
                }
2287
0
            }
2288
0
        }
2289
0
    }
2290
2291
13.7k
    return totQP;
2292
13.7k
}
2293
2294
/* DCT-domain noise reduction / adaptive deadzone from libavcodec */
2295
void FrameEncoder::noiseReductionUpdate()
2296
0
{
2297
0
    static const uint32_t maxBlocksPerTrSize[4] = {1 << 18, 1 << 16, 1 << 14, 1 << 12};
2298
2299
0
    for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
2300
0
    {
2301
0
        int trSize = cat & 3;
2302
0
        int coefCount = 1 << ((trSize + 2) * 2);
2303
2304
0
        if (m_nr->nrCount[cat] > maxBlocksPerTrSize[trSize])
2305
0
        {
2306
0
            for (int i = 0; i < coefCount; i++)
2307
0
                m_nr->nrResidualSum[cat][i] >>= 1;
2308
0
            m_nr->nrCount[cat] >>= 1;
2309
0
        }
2310
2311
0
        int nrStrength = cat < 8 ? m_param->noiseReductionIntra : m_param->noiseReductionInter;
2312
0
        uint64_t scaledCount = (uint64_t)nrStrength * m_nr->nrCount[cat];
2313
2314
0
        for (int i = 0; i < coefCount; i++)
2315
0
        {
2316
0
            uint64_t value = scaledCount + m_nr->nrResidualSum[cat][i] / 2;
2317
0
            uint64_t denom = m_nr->nrResidualSum[cat][i] + 1;
2318
0
            m_nr->nrOffsetDenoise[cat][i] = (uint16_t)(value / denom);
2319
0
        }
2320
2321
        // Don't denoise DC coefficients
2322
0
        m_nr->nrOffsetDenoise[cat][0] = 0;
2323
0
    }
2324
0
}
2325
2326
void FrameEncoder::readModel(FilmGrainCharacteristics* m_filmGrain, FILE* filmgrain)
2327
0
{
2328
0
    char const* errorMessage = "Error reading FilmGrain characteristics\n";
2329
0
    FilmGrain m_fg;
2330
0
    x265_fread((char* )&m_fg, sizeof(bool) * 3 + sizeof(uint8_t), 1, filmgrain, errorMessage);
2331
0
    m_filmGrain->m_filmGrainCharacteristicsCancelFlag = m_fg.m_filmGrainCharacteristicsCancelFlag;
2332
0
    m_filmGrain->m_filmGrainCharacteristicsPersistenceFlag = m_fg.m_filmGrainCharacteristicsPersistenceFlag;
2333
0
    m_filmGrain->m_filmGrainModelId = m_fg.m_filmGrainModelId;
2334
0
    m_filmGrain->m_separateColourDescriptionPresentFlag = m_fg.m_separateColourDescriptionPresentFlag;
2335
0
    if (m_filmGrain->m_separateColourDescriptionPresentFlag)
2336
0
    {
2337
0
        ColourDescription m_clr;
2338
0
        x265_fread((char* )&m_clr, sizeof(bool) + sizeof(uint8_t) * 5, 1, filmgrain, errorMessage);
2339
0
        m_filmGrain->m_filmGrainBitDepthLumaMinus8 = m_clr.m_filmGrainBitDepthLumaMinus8;
2340
0
        m_filmGrain->m_filmGrainBitDepthChromaMinus8 = m_clr.m_filmGrainBitDepthChromaMinus8;
2341
0
        m_filmGrain->m_filmGrainFullRangeFlag = m_clr.m_filmGrainFullRangeFlag;
2342
0
        m_filmGrain->m_filmGrainColourPrimaries = m_clr.m_filmGrainColourPrimaries;
2343
0
        m_filmGrain->m_filmGrainTransferCharacteristics = m_clr.m_filmGrainTransferCharacteristics;
2344
0
        m_filmGrain->m_filmGrainMatrixCoeffs = m_clr.m_filmGrainMatrixCoeffs;
2345
0
    }
2346
0
    FGPresent m_present;
2347
0
    x265_fread((char* )&m_present, sizeof(bool) * 3 + sizeof(uint8_t) * 2, 1, filmgrain, errorMessage);
2348
0
    m_filmGrain->m_blendingModeId = m_present.m_blendingModeId;
2349
0
    m_filmGrain->m_log2ScaleFactor = m_present.m_log2ScaleFactor;
2350
0
    m_filmGrain->m_compModel[0].bPresentFlag = m_present.m_presentFlag[0];
2351
0
    m_filmGrain->m_compModel[1].bPresentFlag = m_present.m_presentFlag[1];
2352
0
    m_filmGrain->m_compModel[2].bPresentFlag = m_present.m_presentFlag[2];
2353
0
    for (int i = 0; i < MAX_NUM_COMPONENT; i++)
2354
0
    {
2355
0
        if (m_filmGrain->m_compModel[i].bPresentFlag)
2356
0
        {
2357
0
            x265_fread((char* )(&m_filmGrain->m_compModel[i].m_filmGrainNumIntensityIntervalMinus1), sizeof(uint8_t), 1, filmgrain, errorMessage);
2358
0
            x265_fread((char* )(&m_filmGrain->m_compModel[i].numModelValues), sizeof(uint8_t), 1, filmgrain, errorMessage);
2359
0
            m_filmGrain->m_compModel[i].intensityValues = (FilmGrainCharacteristics::CompModelIntensityValues* ) malloc(sizeof(FilmGrainCharacteristics::CompModelIntensityValues) * (m_filmGrain->m_compModel[i].m_filmGrainNumIntensityIntervalMinus1+1)) ;
2360
0
            for (int j = 0; j <= m_filmGrain->m_compModel[i].m_filmGrainNumIntensityIntervalMinus1; j++)
2361
0
            {
2362
0
                x265_fread((char* )(&m_filmGrain->m_compModel[i].intensityValues[j].intensityIntervalLowerBound), sizeof(uint8_t), 1, filmgrain, errorMessage);
2363
0
                x265_fread((char* )(&m_filmGrain->m_compModel[i].intensityValues[j].intensityIntervalUpperBound), sizeof(uint8_t), 1, filmgrain, errorMessage);
2364
0
                m_filmGrain->m_compModel[i].intensityValues[j].compModelValue = (int* ) malloc(sizeof(int) * (m_filmGrain->m_compModel[i].numModelValues));
2365
0
                for (int k = 0; k < m_filmGrain->m_compModel[i].numModelValues; k++)
2366
0
                {
2367
0
                    x265_fread((char* )(&m_filmGrain->m_compModel[i].intensityValues[j].compModelValue[k]), sizeof(int), 1, filmgrain, errorMessage);
2368
0
                }
2369
0
            }
2370
0
        }
2371
0
    }
2372
0
}
2373
2374
void compute_film_grain_resolution(int width, int height,
2375
                                   int& apply_units_resolution_log2,
2376
                                   int& apply_horz_resolution,
2377
                                   int& apply_vert_resolution)
2378
0
{
2379
0
    unsigned long log2_width, log2_height;
2380
0
    BSF(log2_width, (unsigned long) width);
2381
0
    BSF(log2_height, (unsigned long) height);
2382
2383
0
    int log2 = (log2_width < log2_height) ? log2_width : log2_height;
2384
0
    apply_units_resolution_log2 = log2;
2385
2386
0
    int unit = 1 << log2;
2387
0
    apply_horz_resolution = width / unit;
2388
0
    apply_vert_resolution = height / unit;
2389
2390
0
    return;
2391
0
}
2392
2393
void FrameEncoder::readAomModel(AomFilmGrainCharacteristics* m_aomFilmGrain, FILE* Aomfilmgrain)
2394
0
{
2395
0
    char const* errorMessage = "Error reading Aom FilmGrain characteristics\n";
2396
0
    AomFilmGrain m_afg;
2397
0
    m_afg.m_chroma_scaling_from_luma = 0;
2398
0
    int bitCount = 0;
2399
0
    bitCount += 4; // payload_less_than_4byte_flag(1) + film_grain_param_set_idx(3)
2400
0
    x265_fread((char*)&m_aomFilmGrain->m_apply_grain, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2401
0
    bitCount++;
2402
0
    x265_fread((char*)&m_aomFilmGrain->m_grain_seed, sizeof(uint16_t), 1, Aomfilmgrain, errorMessage);
2403
0
    bitCount+=16;
2404
0
    x265_fread((char*)&m_aomFilmGrain->m_update_grain, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2405
0
    bitCount++;
2406
0
    x265_fread((char*)&m_aomFilmGrain->m_num_y_points, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2407
0
    bitCount+=4;
2408
2409
0
    if (m_aomFilmGrain->m_num_y_points)
2410
0
    {
2411
0
        m_aomFilmGrain->point_y_value_increment_bits = 8;
2412
0
        bitCount += 3;
2413
0
        m_aomFilmGrain->point_y_scaling_bits = 8;
2414
0
        bitCount += 2;
2415
0
        for (int i = 0; i < m_aomFilmGrain->m_num_y_points; i++)
2416
0
        {
2417
0
            for (int j = 0; j < 2; j++)
2418
0
            {
2419
0
                x265_fread((char*)&m_aomFilmGrain->m_scaling_points_y[i][j], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2420
0
                bitCount+=8;
2421
0
            }
2422
0
        }
2423
0
    }
2424
0
    x265_fread((char*)&m_aomFilmGrain->m_num_cb_points, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2425
0
    bitCount+=4;
2426
0
    if (m_aomFilmGrain->m_num_cb_points)
2427
0
    {
2428
0
        m_aomFilmGrain->point_cb_value_increment_bits = 8;
2429
0
        bitCount += 3;
2430
0
        m_aomFilmGrain->point_cb_scaling_bits = 8;
2431
0
        bitCount += 2;
2432
0
        m_aomFilmGrain->cb_scaling_offset = 0;
2433
0
        bitCount += 8;
2434
0
        for (int i = 0; i < m_aomFilmGrain->m_num_cb_points; i++)
2435
0
        {
2436
0
            for (int j = 0; j < 2; j++)
2437
0
            {
2438
0
                x265_fread((char*)&m_aomFilmGrain->m_scaling_points_cb[i][j], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2439
0
                bitCount+=8;
2440
0
            }
2441
0
        }
2442
0
    }
2443
0
    x265_fread((char*)&m_aomFilmGrain->m_num_cr_points, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2444
0
    bitCount+=4;
2445
0
    if (m_aomFilmGrain->m_num_cr_points)
2446
0
    {
2447
0
        m_aomFilmGrain->point_cr_value_increment_bits = 8;
2448
0
        bitCount += 3;
2449
0
        m_aomFilmGrain->point_cr_scaling_bits = 8;
2450
0
        bitCount += 2;
2451
0
        m_aomFilmGrain->cr_scaling_offset = 0;
2452
0
        bitCount += 8;
2453
0
        for (int i = 0; i < m_aomFilmGrain->m_num_cr_points; i++)
2454
0
        {
2455
0
            for (int j = 0; j < 2; j++)
2456
0
            {
2457
0
                x265_fread((char*)&m_aomFilmGrain->m_scaling_points_cr[i][j], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2458
0
                bitCount+=8;
2459
0
            }
2460
0
        }
2461
0
    }
2462
0
    x265_fread((char*)&m_aomFilmGrain->m_scaling_shift, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2463
0
    bitCount+=2;
2464
0
    x265_fread((char*)&m_aomFilmGrain->m_ar_coeff_lag, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2465
0
    bitCount+=2;
2466
0
    if (m_aomFilmGrain->m_num_y_points)
2467
0
    {
2468
0
        bitCount += 2;
2469
0
        for (int i = 0; i < 24; i++)
2470
0
        {
2471
0
            x265_fread((char*)&m_aomFilmGrain->m_ar_coeffs_y[i], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2472
0
            bitCount+=8;
2473
0
        }
2474
0
    }
2475
0
    if (m_aomFilmGrain->m_num_cb_points || m_afg.m_chroma_scaling_from_luma)
2476
0
    {
2477
0
        bitCount += 2;
2478
0
        for (int i = 0; i < 25; i++)
2479
0
        {
2480
0
            x265_fread((char*)&m_aomFilmGrain->m_ar_coeffs_cb[i], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2481
0
            bitCount+=8;
2482
0
        }
2483
0
    }
2484
0
    if (m_aomFilmGrain->m_num_cr_points || m_afg.m_chroma_scaling_from_luma)
2485
0
    {
2486
0
        bitCount += 2;
2487
0
        for (int i = 0; i < 25; i++)
2488
0
        {
2489
0
            x265_fread((char*)&m_aomFilmGrain->m_ar_coeffs_cr[i], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2490
0
            bitCount+=8;
2491
0
        }
2492
0
    }
2493
0
    x265_fread((char*)&m_aomFilmGrain->m_ar_coeff_shift, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2494
0
    bitCount+=2;
2495
0
    x265_fread((char*)&m_aomFilmGrain->m_grain_scale_shift, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2496
0
    bitCount+=2;
2497
0
    if (m_aomFilmGrain->m_num_cb_points)
2498
0
    {
2499
0
        x265_fread((char*)&m_aomFilmGrain->m_cb_mult, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2500
0
        bitCount += 8;
2501
0
        x265_fread((char*)&m_aomFilmGrain->m_cb_luma_mult, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2502
0
        bitCount += 8;
2503
0
        x265_fread((char*)&m_aomFilmGrain->m_cb_offset, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2504
0
        bitCount += 9;
2505
0
    }
2506
0
    if (m_aomFilmGrain->m_num_cr_points)
2507
0
    {
2508
0
        x265_fread((char*)&m_aomFilmGrain->m_cr_mult, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2509
0
        bitCount += 8;
2510
0
        x265_fread((char*)&m_aomFilmGrain->m_cr_luma_mult, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2511
0
        bitCount += 8;
2512
0
        x265_fread((char*)&m_aomFilmGrain->m_cr_offset, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2513
0
        bitCount += 9;
2514
0
    }
2515
0
    x265_fread((char*)&m_aomFilmGrain->m_overlap_flag, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2516
0
    bitCount++;
2517
0
    x265_fread((char*)&m_aomFilmGrain->m_clip_to_restricted_range, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2518
0
    bitCount++;
2519
2520
0
    m_aomFilmGrain->luma_only_flag = m_aomFilmGrain->m_num_cb_points == 0 && m_aomFilmGrain->m_num_cr_points == 0;
2521
0
    bitCount++;
2522
0
    m_aomFilmGrain->subsamplingX = CHROMA_H_SHIFT(m_param->internalCsp);
2523
0
    m_aomFilmGrain->subsamplingY = CHROMA_V_SHIFT(m_param->internalCsp);
2524
0
    if (!m_aomFilmGrain->luma_only_flag)
2525
0
        bitCount += 2; // subsampling_x(1) + subsampling_y(1)
2526
0
    compute_film_grain_resolution(m_param->sourceWidth, m_param->sourceHeight, m_aomFilmGrain->units_resolution_log2,
2527
0
        m_aomFilmGrain->horz_resolution, m_aomFilmGrain->vert_resolution);
2528
0
    bitCount += 28; // apply_units_resolution_log2(4) + apply_horz_resolution(12) + apply_vert_resolution(12)
2529
0
    m_aomFilmGrain->predict_scaling_flag = 0;
2530
0
    bitCount++;
2531
0
    m_aomFilmGrain->predict_y_scaling_flag = 0;
2532
0
    m_aomFilmGrain->predict_cb_scaling_flag = 0;
2533
0
    m_aomFilmGrain->predict_cr_scaling_flag = 0;
2534
0
    m_aomFilmGrain->m_bitDepth = m_param->internalBitDepth;
2535
0
    bitCount++; // videosingnaltypepresentflag
2536
0
    if (m_frame[0]->m_encData->m_slice->m_sps->vuiParameters.videoSignalTypePresentFlag) bitCount += 4; // bit_depth_minus8(3) + cicp_info_present_flag(1)
2537
0
    if (m_frame[0]->m_encData->m_slice->m_sps->vuiParameters.colourDescriptionPresentFlag) bitCount += 25; // colourPrimaries(8) + transferCharacteristics(8) + matrixCoefficients(8)+ videoFullRangeFlag(1)
2538
0
    if (!m_aomFilmGrain->luma_only_flag) {
2539
0
        m_aomFilmGrain->m_chroma_scaling_from_luma = 0;
2540
0
        bitCount++;
2541
0
    }
2542
2543
0
    m_aomFilmGrain->payload_size = (bitCount + 8 - 1) / 8;
2544
0
    m_aomFilmGrain->payload_bits = m_aomFilmGrain->payload_size < 4 ? 2 : 8;
2545
0
    bitCount += m_aomFilmGrain->payload_bits;
2546
0
    m_aomFilmGrain->payload_size = (bitCount + 8 - 1) / 8;
2547
0
}
2548
2549
#if ENABLE_LIBVMAF
2550
void FrameEncoder::vmafFrameLevelScore()
2551
{
2552
    PicYuv *fenc = m_frame[0]->m_fencPic;
2553
    PicYuv *recon = m_frame[0]->m_reconPic[0];
2554
2555
    x265_vmaf_framedata *vmafframedata = (x265_vmaf_framedata*)x265_malloc(sizeof(x265_vmaf_framedata));
2556
    if (!vmafframedata)
2557
    {
2558
        x265_log(NULL, X265_LOG_ERROR, "vmaf frame data alloc failed\n");
2559
    }
2560
2561
    vmafframedata->height = fenc->m_picHeight;
2562
    vmafframedata->width = fenc->m_picWidth;
2563
    vmafframedata->frame_set = 0;
2564
    vmafframedata->internalBitDepth = m_param->internalBitDepth;
2565
    vmafframedata->reference_frame = fenc;
2566
    vmafframedata->distorted_frame = recon;
2567
    fenc->m_vmafScore = x265_calculate_vmaf_framelevelscore(m_param,vmafframedata);
2568
2569
    if (vmafframedata)
2570
    x265_free(vmafframedata);
2571
}
2572
#endif
2573
2574
Frame** FrameEncoder::getEncodedPicture(NALList& output)
2575
6.45k
{
2576
6.45k
    if (m_frame[0] && (m_param->numLayers <= 1 || (MAX_LAYERS > 1 && m_frame[1])))
2577
654
    {
2578
        /* block here until worker thread completes */
2579
654
        m_done.wait();
2580
2581
1.30k
        for (int i = 0; i < m_param->numLayers; i++)
2582
654
        {
2583
654
            m_retFrameBuffer[i] = m_frame[i];
2584
654
            m_frame[i] = NULL;
2585
654
            m_prevOutputTime[i] = x265_mdate();
2586
654
        }
2587
654
        output.takeContents(m_nalList);
2588
654
        return m_retFrameBuffer;
2589
654
    }
2590
2591
5.79k
    return NULL;
2592
6.45k
}
2593
}