Coverage Report

Created: 2026-05-16 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/encoder/frameencoder.cpp
Line
Count
Source
1
/*****************************************************************************
2
 * Copyright (C) 2013-2020 MulticoreWare, Inc
3
 *
4
 * Authors: Chung Shin Yee <shinyee@multicorewareinc.com>
5
 *          Min Chen <chenm003@163.com>
6
 *          Steve Borho <steve@borho.org>
7
 *
8
 * This program is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with this program; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
21
 *
22
 * This program is also available under a commercial proprietary license.
23
 * For more information, contact us at license @ x265.com.
24
 *****************************************************************************/
25
26
#include "common.h"
27
#include "frame.h"
28
#include "framedata.h"
29
#include "wavefront.h"
30
#include "param.h"
31
32
#include "encoder.h"
33
#include "frameencoder.h"
34
#include "common.h"
35
#include "slicetype.h"
36
#include "nal.h"
37
#include "temporalfilter.h"
38
39
#include <iostream>
40
41
namespace X265_NS {
42
void weightAnalyse(Slice& slice, Frame& frame, x265_param& param);
43
44
FrameEncoder::FrameEncoder()
45
0
{
46
0
    m_reconfigure = false;
47
0
    m_isFrameEncoder = true;
48
0
    m_threadActive = true;
49
0
    m_activeWorkerCount = 0;
50
0
    m_completionCount = 0;
51
0
    m_outStreams = NULL;
52
0
    m_backupStreams = NULL;
53
0
    m_substreamSizes = NULL;
54
0
    m_nr = NULL;
55
0
    m_tld = NULL;
56
0
    m_rows = NULL;
57
0
    m_top = NULL;
58
0
    m_param = NULL;
59
0
    m_cuGeoms = NULL;
60
0
    m_ctuGeomMap = NULL;
61
0
    m_localTldIdx = 0;
62
0
    memset(&m_rce, 0, sizeof(RateControlEntry));
63
0
    for (int layer = 0; layer < MAX_LAYERS; layer++)
64
0
    {
65
0
        m_prevOutputTime[layer] = x265_mdate();
66
0
        m_slicetypeWaitTime[layer] = 0;
67
0
        m_frame[layer] = NULL;
68
0
    }
69
0
}
70
71
void FrameEncoder::destroy()
72
0
{
73
0
    if (m_pool)
74
0
    {
75
0
        if (!m_jpId)
76
0
        {
77
0
            int numTLD = m_pool->m_numWorkers;
78
0
            if (!m_param->bEnableWavefront)
79
0
                numTLD += m_pool->m_numProviders;
80
0
            for (int i = 0; i < numTLD; i++)
81
0
                m_tld[i].destroy();
82
0
            delete [] m_tld;
83
0
        }
84
0
    }
85
0
    else
86
0
    {
87
0
        m_tld->destroy();
88
0
        delete m_tld;
89
0
    }
90
91
0
    delete[] m_rows;
92
0
    delete[] m_outStreams;
93
0
    delete[] m_backupStreams;
94
0
    X265_FREE(m_sliceBaseRow);
95
0
    X265_FREE((void*)m_bAllRowsStop);
96
0
    X265_FREE((void*)m_vbvResetTriggerRow);
97
0
    X265_FREE(m_sliceMaxBlockRow);
98
0
    X265_FREE(m_cuGeoms);
99
0
    X265_FREE(m_ctuGeomMap);
100
0
    X265_FREE(m_substreamSizes);
101
0
    X265_FREE(m_nr);
102
0
    X265_FREE(m_retFrameBuffer);
103
104
0
    m_frameFilter.destroy();
105
106
0
    if (m_param->bEmitHRDSEI || !!m_param->interlaceMode)
107
0
    {
108
0
        delete m_rce.picTimingSEI;
109
0
        delete m_rce.hrdTiming;
110
0
    }
111
0
}
112
113
bool FrameEncoder::init(Encoder *top, int numRows, int numCols)
114
0
{
115
0
    m_top = top;
116
0
    m_param = top->m_param;
117
0
    m_numRows = numRows;
118
0
    m_numCols = numCols;
119
0
    m_reconfigure = false;
120
0
    m_filterRowDelay = ((m_param->bEnableSAO && m_param->bSaoNonDeblocked)
121
0
                        || (!m_param->bEnableLoopFilter && m_param->bEnableSAO)) ?
122
0
                        2 : (m_param->bEnableSAO || m_param->bEnableLoopFilter ? 1 : 0);
123
0
    m_filterRowDelayCus = m_filterRowDelay * numCols;
124
0
    m_rows = new CTURow[m_numRows];
125
0
    bool ok = !!m_numRows;
126
127
0
    m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
128
0
    m_bAllRowsStop = X265_MALLOC(bool, m_param->maxSlices);
129
0
    m_vbvResetTriggerRow = X265_MALLOC(int, m_param->maxSlices);
130
0
    ok &= !!m_sliceBaseRow;
131
0
    m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) / m_param->maxSlices;
132
0
    uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;    
133
0
    uint32_t rowSum = sliceGroupSizeAccu;
134
0
    uint32_t sidx = 0;
135
0
    for (uint32_t i = 0; i < m_numRows; i++)
136
0
    {
137
0
        const uint32_t rowRange = (rowSum >> 8);
138
0
        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
139
0
        {
140
0
            rowSum += sliceGroupSizeAccu;
141
0
            m_sliceBaseRow[++sidx] = i;
142
0
        }
143
0
    }
144
0
    X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
145
0
    m_sliceBaseRow[0] = 0;
146
0
    m_sliceBaseRow[m_param->maxSlices] = m_numRows;
147
148
0
    m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
149
0
    ok &= !!m_sliceMaxBlockRow;
150
0
    uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16;
151
0
    sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices;
152
0
    rowSum = sliceGroupSizeAccu;
153
0
    sidx = 0;
154
0
    for (uint32_t i = 0; i < maxBlockRows; i++)
155
0
    {
156
0
        const uint32_t rowRange = (rowSum >> 8);
157
0
        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
158
0
        {
159
0
            rowSum += sliceGroupSizeAccu;
160
0
            m_sliceMaxBlockRow[++sidx] = i;
161
0
        }
162
0
    }
163
0
    m_sliceMaxBlockRow[0] = 0;
164
0
    m_sliceMaxBlockRow[m_param->maxSlices] = maxBlockRows;
165
166
    /* determine full motion search range */
167
0
    int range  = m_param->searchRange;       /* fpel search */
168
0
    range += !!(m_param->searchMethod < 2);  /* diamond/hex range check lag */
169
0
    range += NTAPS_LUMA / 2;                 /* subpel filter half-length */
170
0
    range += 2 + (MotionEstimate::hpelIterationCount(m_param->subpelRefine) + 1) / 2; /* subpel refine steps */
171
0
    m_refLagRows = /*(m_param->maxSlices > 1 ? 1 : 0) +*/ 1 + ((range + m_param->maxCUSize - 1) / m_param->maxCUSize);
172
173
    // NOTE: 2 times of numRows because both Encoder and Filter in same queue
174
0
    if (!WaveFront::init(m_numRows * 2))
175
0
    {
176
0
        x265_log(m_param, X265_LOG_ERROR, "unable to initialize wavefront queue\n");
177
0
        m_pool = NULL;
178
0
    }
179
180
0
    m_frameFilter.init(top, this, numRows, numCols);
181
182
    // initialize HRD parameters of SPS
183
0
    if (m_param->bEmitHRDSEI || !!m_param->interlaceMode)
184
0
    {
185
0
        m_rce.picTimingSEI = new SEIPictureTiming;
186
0
        m_rce.hrdTiming = new HRDTiming;
187
188
0
        ok &= m_rce.picTimingSEI && m_rce.hrdTiming;
189
0
    }
190
191
0
    if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
192
0
        m_nr = X265_MALLOC(NoiseReduction, 1);
193
0
    if (m_nr)
194
0
        memset(m_nr, 0, sizeof(NoiseReduction));
195
0
    else
196
0
        m_param->noiseReductionIntra = m_param->noiseReductionInter = 0;
197
198
    // 7.4.7.1 - Ceil( Log2( PicSizeInCtbsY ) ) bits
199
0
    {
200
0
        unsigned long tmp;
201
0
        BSR(tmp, (numRows * numCols - 1));
202
0
        m_sliceAddrBits = (uint16_t)(tmp + 1);
203
0
    }
204
205
0
    m_tmeDeps.resize(m_numRows);
206
207
0
    m_retFrameBuffer = X265_MALLOC(Frame*, m_param->numLayers);
208
0
    for (int layer = 0; layer < m_param->numLayers; layer++)
209
0
        m_retFrameBuffer[layer] = NULL;
210
0
    return ok;
211
0
}
212
213
/* Generate a complete list of unique geom sets for the current picture dimensions */
214
bool FrameEncoder::initializeGeoms()
215
0
{
216
    /* Geoms only vary between CTUs in the presence of picture edges */
217
0
    int maxCUSize = m_param->maxCUSize;
218
0
    int minCUSize = m_param->minCUSize;
219
0
    int heightRem = m_param->sourceHeight & (maxCUSize - 1);
220
0
    int widthRem = m_param->sourceWidth & (maxCUSize - 1);
221
0
    int allocGeoms = 1; // body
222
0
    if (heightRem && widthRem)
223
0
        allocGeoms = 4; // body, right, bottom, corner
224
0
    else if (heightRem || widthRem)
225
0
        allocGeoms = 2; // body, right or bottom
226
227
0
    m_ctuGeomMap = X265_MALLOC(uint32_t, m_numRows * m_numCols);
228
0
    m_cuGeoms = X265_MALLOC(CUGeom, allocGeoms * CUGeom::MAX_GEOMS);
229
0
    if (!m_cuGeoms || !m_ctuGeomMap)
230
0
        return false;
231
232
    // body
233
0
    CUData::calcCTUGeoms(maxCUSize, maxCUSize, maxCUSize, minCUSize, m_cuGeoms);
234
0
    memset(m_ctuGeomMap, 0, sizeof(uint32_t) * m_numRows * m_numCols);
235
0
    if (allocGeoms == 1)
236
0
        return true;
237
238
0
    int countGeoms = 1;
239
0
    if (widthRem)
240
0
    {
241
        // right
242
0
        CUData::calcCTUGeoms(widthRem, maxCUSize, maxCUSize, minCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS);
243
0
        for (uint32_t i = 0; i < m_numRows; i++)
244
0
        {
245
0
            uint32_t ctuAddr = m_numCols * (i + 1) - 1;
246
0
            m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS;
247
0
        }
248
0
        countGeoms++;
249
0
    }
250
0
    if (heightRem)
251
0
    {
252
        // bottom
253
0
        CUData::calcCTUGeoms(maxCUSize, heightRem, maxCUSize, minCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS);
254
0
        for (uint32_t i = 0; i < m_numCols; i++)
255
0
        {
256
0
            uint32_t ctuAddr = m_numCols * (m_numRows - 1) + i;
257
0
            m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS;
258
0
        }
259
0
        countGeoms++;
260
261
0
        if (widthRem)
262
0
        {
263
            // corner
264
0
            CUData::calcCTUGeoms(widthRem, heightRem, maxCUSize, minCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS);
265
266
0
            uint32_t ctuAddr = m_numCols * m_numRows - 1;
267
0
            m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS;
268
0
            countGeoms++;
269
0
        }
270
0
        X265_CHECK(countGeoms == allocGeoms, "geometry match check failure\n");
271
0
    }
272
273
0
    return true;
274
0
}
275
276
bool FrameEncoder::startCompressFrame(Frame* curFrame[MAX_LAYERS])
277
0
{
278
0
    for (int layer = 0; layer < m_param->numLayers; layer++)
279
0
    {
280
0
        m_slicetypeWaitTime[layer] = x265_mdate() - m_prevOutputTime[layer];
281
0
        m_frame[layer] = curFrame[layer];
282
0
        curFrame[layer]->m_encData->m_frameEncoderID = m_jpId;
283
0
        curFrame[layer]->m_encData->m_jobProvider = this;
284
0
        curFrame[layer]->m_encData->m_slice->m_mref = m_mref;
285
0
    }
286
0
    m_sliceType = curFrame[0]->m_lowres.sliceType;
287
288
0
    if (!m_cuGeoms)
289
0
    {
290
0
        if (!initializeGeoms())
291
0
            return false;
292
0
    }
293
294
0
    m_enable.trigger();
295
0
    return true;
296
0
}
297
298
void FrameEncoder::threadMain()
299
0
{
300
0
    THREAD_NAME("Frame", m_jpId);
301
302
0
    if (m_pool)
303
0
    {
304
0
        m_pool->setCurrentThreadAffinity();
305
306
        /* the first FE on each NUMA node is responsible for allocating thread
307
         * local data for all worker threads in that pool. If WPP is disabled, then
308
         * each FE also needs a TLD instance */
309
0
        if (!m_jpId)
310
0
        {
311
0
            int numTLD = m_pool->m_numWorkers;
312
0
            if (!m_param->bEnableWavefront)
313
0
                numTLD += m_pool->m_numProviders;
314
315
0
            m_tld = new ThreadLocalData[numTLD];
316
0
            for (int i = 0; i < numTLD; i++)
317
0
            {
318
0
                m_tld[i].analysis.initSearch(*m_param, m_top->m_scalingList);
319
0
                m_tld[i].analysis.create(m_tld);
320
0
            }
321
322
0
            for (int i = 0; i < m_pool->m_numProviders; i++)
323
0
            {
324
0
                if (m_pool->m_jpTable[i]->m_isFrameEncoder) /* ugh; over-allocation and other issues here */
325
0
                {
326
0
                    FrameEncoder *peer = dynamic_cast<FrameEncoder*>(m_pool->m_jpTable[i]);
327
0
                    peer->m_tld = m_tld;
328
0
                }
329
0
            }
330
0
        }
331
332
0
        if (m_param->bEnableWavefront)
333
0
            m_localTldIdx = -1; // cause exception if used
334
0
        else
335
0
            m_localTldIdx = m_pool->m_numWorkers + m_jpId;
336
0
    }
337
0
    else
338
0
    {
339
0
        m_tld = new ThreadLocalData;
340
0
        m_tld->analysis.initSearch(*m_param, m_top->m_scalingList);
341
0
        m_tld->analysis.create(NULL);
342
0
        m_localTldIdx = 0;
343
0
    }
344
345
0
    m_done.trigger();     /* signal that thread is initialized */
346
0
    m_enable.wait();      /* Encoder::encode() triggers this event */
347
348
0
    while (m_threadActive)
349
0
    {
350
0
        if (m_param->bCTUInfo)
351
0
        {
352
0
            while (!m_frame[0]->m_ctuInfo)
353
0
                m_frame[0]->m_copied.wait();
354
0
        }
355
0
        if ((m_param->bAnalysisType == AVC_INFO) && !strlen(m_param->analysisSave) && !strlen(m_param->analysisLoad) && !(IS_X265_TYPE_I(m_frame[0]->m_lowres.sliceType)))
356
0
        {
357
0
            while (((m_frame[0]->m_analysisData.interData == NULL && m_frame[0]->m_analysisData.intraData == NULL) || (uint32_t)m_frame[0]->m_poc != m_frame[0]->m_analysisData.poc))
358
0
                m_frame[0]->m_copyMVType.wait();
359
0
        }
360
361
0
        for (int layer = 0; layer < m_param->numLayers; layer++)
362
0
            compressFrame(layer);
363
0
        m_done.trigger(); /* FrameEncoder::getEncodedPicture() blocks for this event */
364
0
        m_enable.wait();
365
0
    }
366
0
}
367
368
void FrameEncoder::WeightAnalysis::processTasks(int /* workerThreadId */)
369
0
{
370
0
    Frame* frame = master.m_frame[master.m_sLayerId];
371
0
    weightAnalyse(*frame->m_encData->m_slice, *frame, *master.m_param);
372
0
}
373
374
375
uint32_t getBsLength( int32_t code )
376
0
{
377
0
    uint32_t ucode = (code <= 0) ? -code << 1 : (code << 1) - 1;
378
379
0
    ++ucode;
380
0
    unsigned long idx;
381
0
    BSR( idx, ucode );
382
0
    uint32_t length = (uint32_t)idx * 2 + 1;
383
384
0
    return length;
385
0
}
386
387
bool FrameEncoder::writeToneMapInfo(x265_sei_payload *payload)
388
0
{
389
0
    bool payloadChange = false;
390
0
    if (m_top->m_prevTonemapPayload.payload != NULL && payload->payloadSize == m_top->m_prevTonemapPayload.payloadSize)
391
0
    {
392
0
        if (memcmp(m_top->m_prevTonemapPayload.payload, payload->payload, payload->payloadSize) != 0)
393
0
            payloadChange = true;
394
0
    }
395
0
    else
396
0
    {
397
0
        payloadChange = true;
398
0
        if (m_top->m_prevTonemapPayload.payload != NULL)
399
0
            x265_free(m_top->m_prevTonemapPayload.payload);
400
0
        m_top->m_prevTonemapPayload.payload = (uint8_t*)x265_malloc(sizeof(uint8_t)* payload->payloadSize);
401
0
    }
402
403
0
    if (payloadChange)
404
0
    {
405
0
        m_top->m_prevTonemapPayload.payloadType = payload->payloadType;
406
0
        m_top->m_prevTonemapPayload.payloadSize = payload->payloadSize;
407
0
        memcpy(m_top->m_prevTonemapPayload.payload, payload->payload, payload->payloadSize);
408
0
    }
409
410
0
    bool isIDR = m_frame[0]->m_lowres.sliceType == X265_TYPE_IDR;
411
0
    return (payloadChange || isIDR);
412
0
}
413
414
void FrameEncoder::writeTrailingSEIMessages(int layer)
415
0
{
416
0
    Slice* slice = m_frame[layer]->m_encData->m_slice;
417
0
    int planes = (m_param->internalCsp != X265_CSP_I400) ? 3 : 1;
418
0
    int32_t payloadSize = 0;
419
420
0
    if (m_param->decodedPictureHashSEI == 1)
421
0
    {
422
0
        m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::MD5;
423
0
        for (int i = 0; i < planes; i++)
424
0
            MD5Final(&m_seiReconPictureDigest.m_state[i], m_seiReconPictureDigest.m_digest[i]);
425
0
        payloadSize = 1 + 16 * planes;
426
0
    }
427
0
    else if (m_param->decodedPictureHashSEI == 2)
428
0
    {
429
0
        m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::CRC;
430
0
        for (int i = 0; i < planes; i++)
431
0
            crcFinish(m_seiReconPictureDigest.m_crc[i], m_seiReconPictureDigest.m_digest[i]);
432
0
        payloadSize = 1 + 2 * planes;
433
0
    }
434
0
    else if (m_param->decodedPictureHashSEI == 3)
435
0
    {
436
0
        m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::CHECKSUM;
437
0
        for (int i = 0; i < planes; i++)
438
0
            checksumFinish(m_seiReconPictureDigest.m_checksum[i], m_seiReconPictureDigest.m_digest[i]);
439
0
        payloadSize = 1 + 4 * planes;
440
0
    }
441
442
0
    m_seiReconPictureDigest.setSize(payloadSize);
443
0
    m_seiReconPictureDigest.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_SUFFIX_SEI, m_nalList, false, layer);
444
0
}
445
446
void FrameEncoder::compressFrame(int layer)
447
0
{
448
0
    ProfileScopeEvent(frameThread);
449
450
0
    m_startCompressTime[layer] = x265_mdate();
451
0
    m_totalActiveWorkerCount = 0;
452
0
    m_activeWorkerCountSamples = 0;
453
0
    m_totalWorkerElapsedTime[layer] = 0;
454
0
    m_totalThreadedMETime[layer] = 0;
455
0
    m_totalThreadedMEWait[layer] = 0;
456
0
    m_totalNoWorkerTime[layer] = 0;
457
0
    m_countRowBlocks = 0;
458
0
    m_allRowsAvailableTime[layer] = 0;
459
0
    m_stallStartTime[layer] = 0;
460
461
0
    m_completionCount = 0;
462
0
    memset((void*)m_bAllRowsStop, 0, sizeof(bool) * m_param->maxSlices);
463
0
    memset((void*)m_vbvResetTriggerRow, -1, sizeof(int) * m_param->maxSlices);
464
0
    m_rowSliceTotalBits[0] = 0;
465
0
    m_rowSliceTotalBits[1] = 0;
466
467
0
    m_SSDY[layer] = m_SSDU[layer] = m_SSDV[layer] = 0;
468
0
    m_ssim[layer] = 0;
469
0
    m_ssimCnt[layer] = 0;
470
0
    memset(&(m_frame[layer]->m_encData->m_frameStats), 0, sizeof(m_frame[layer]->m_encData->m_frameStats));
471
0
    m_sLayerId = layer;
472
473
0
    if (m_param->rc.aqMode != X265_AQ_EDGE && m_param->recursionSkipMode == EDGE_BASED_RSKIP)
474
0
    {
475
0
        int height = m_frame[layer]->m_fencPic->m_picHeight;
476
0
        int width = m_frame[layer]->m_fencPic->m_picWidth;
477
0
        intptr_t stride = m_frame[layer]->m_fencPic->m_stride;
478
479
0
        if (!computeEdge(m_frame[layer]->m_edgeBitPic, m_frame[layer]->m_fencPic->m_picOrg[0], NULL, stride, height, width, false, 1))
480
0
        {
481
0
            x265_log(m_param, X265_LOG_ERROR, " Failed to compute edge !");
482
0
        }
483
0
    }
484
485
    /* Emit access unit delimiter unless this is the first frame and the user is
486
     * not repeating headers (since AUD is supposed to be the first NAL in the access
487
     * unit) */
488
0
    Slice* slice = m_frame[layer]->m_encData->m_slice;
489
490
0
    if (m_param->bEnableEndOfSequence && m_frame[layer]->m_lowres.sliceType == X265_TYPE_IDR && m_frame[layer]->m_poc)
491
0
    {
492
0
        m_bs.resetBits();
493
0
        m_nalList.serialize(NAL_UNIT_EOS, m_bs);
494
0
    }
495
496
0
    if (m_param->bEnableAccessUnitDelimiters && (m_frame[layer]->m_poc || m_param->bRepeatHeaders))
497
0
    {
498
0
        m_bs.resetBits();
499
0
        m_entropyCoder.setBitstream(&m_bs);
500
0
        m_entropyCoder.codeAUD(*slice);
501
0
        m_bs.writeByteAlignment();
502
0
        m_nalList.serialize(NAL_UNIT_ACCESS_UNIT_DELIMITER, m_bs);
503
0
        if (m_param->bSingleSeiNal)
504
0
            m_bs.resetBits();
505
0
    }
506
0
    if (m_frame[layer]->m_lowres.bKeyframe && m_param->bRepeatHeaders)
507
0
    {
508
0
        if (m_param->bOptRefListLengthPPS)
509
0
        {
510
0
            ScopedLock refIdxLock(m_top->m_sliceRefIdxLock);
511
0
            m_top->updateRefIdx();
512
0
        }
513
0
        if (m_top->m_param->rc.bStatRead  && m_top->m_param->bMultiPassOptRPS)
514
0
        {
515
0
            ScopedLock refIdxLock(m_top->m_rpsInSpsLock);
516
0
            if (!m_top->computeSPSRPSIndex())
517
0
            {
518
0
                x265_log(m_param, X265_LOG_ERROR, "compute commonly RPS failed!\n");
519
0
                m_top->m_aborted = true;
520
0
            }
521
0
            m_top->getStreamHeaders(m_nalList, m_entropyCoder, m_bs);
522
0
        }
523
0
        else
524
0
            m_top->getStreamHeaders(m_nalList, m_entropyCoder, m_bs);
525
0
    }
526
527
0
    if (m_top->m_param->rc.bStatRead && m_top->m_param->bMultiPassOptRPS)
528
0
        m_frame[layer]->m_encData->m_slice->m_rpsIdx = (m_top->m_rateControl->m_rce2Pass + m_frame[layer]->m_encodeOrder)->rpsIdx;
529
530
    // Weighted Prediction parameters estimation.
531
0
    bool bUseWeightP = slice->m_sliceType == P_SLICE && slice->m_pps->bUseWeightPred && !layer;
532
0
    bool bUseWeightB = slice->m_sliceType == B_SLICE && slice->m_pps->bUseWeightedBiPred && !layer;
533
534
0
    WeightParam* reuseWP = NULL;
535
0
    if (m_param->analysisLoad[0] && (bUseWeightP || bUseWeightB))
536
0
        reuseWP = (WeightParam*)m_frame[layer]->m_analysisData.wt;
537
538
0
    if (bUseWeightP || bUseWeightB)
539
0
    {
540
#if DETAILED_CU_STATS
541
        m_cuStats.countWeightAnalyze++;
542
        ScopedElapsedTime time(m_cuStats.weightAnalyzeTime);
543
#endif
544
0
        if (strlen(m_param->analysisLoad))
545
0
        {
546
0
            for (int list = 0; list < slice->isInterB() + 1; list++) 
547
0
            {
548
0
                for (int plane = 0; plane < (m_param->internalCsp != X265_CSP_I400 ? 3 : 1); plane++)
549
0
                {
550
0
                    for (int ref = 1; ref < slice->m_numRefIdx[list]; ref++)
551
0
                        SET_WEIGHT(slice->m_weightPredTable[list][ref][plane], false, 1 << reuseWP->log2WeightDenom, reuseWP->log2WeightDenom, 0);
552
0
                    slice->m_weightPredTable[list][0][plane] = *(reuseWP++);
553
0
                }
554
0
            }
555
0
        }
556
0
        else
557
0
        {
558
0
            WeightAnalysis wa(*this);
559
0
            if (m_pool && wa.tryBondPeers(*this, 1))
560
                /* use an idle worker for weight analysis */
561
0
                wa.waitForExit();
562
0
            else
563
0
                weightAnalyse(*slice, *m_frame[layer], *m_param);
564
0
        }
565
0
    }
566
0
    else
567
0
        slice->disableWeights();
568
569
0
    if (strlen(m_param->analysisSave) && (bUseWeightP || bUseWeightB))
570
0
        reuseWP = (WeightParam*)m_frame[layer]->m_analysisData.wt;
571
    // Generate motion references
572
0
    int numPredDir = slice->isInterP() ? 1 : slice->isInterB() ? 2 : 0;
573
0
    for (int l = 0; l < numPredDir; l++)
574
0
    {
575
0
        for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
576
0
        {
577
0
            WeightParam *w = NULL;
578
0
            if ((bUseWeightP || bUseWeightB) && slice->m_weightPredTable[l][ref][0].wtPresent)
579
0
                w = slice->m_weightPredTable[l][ref];
580
0
            slice->m_refReconPicList[l][ref] = slice->m_refFrameList[l][ref]->m_reconPic[0];
581
0
            m_mref[l][ref].init(slice->m_refReconPicList[l][ref], w, *m_param);
582
0
        }
583
0
        if (strlen(m_param->analysisSave) && (bUseWeightP || bUseWeightB))
584
0
        {
585
0
            for (int i = 0; i < (m_param->internalCsp != X265_CSP_I400 ? 3 : 1); i++)
586
0
                *(reuseWP++) = slice->m_weightPredTable[l][0][i];
587
0
        }
588
589
0
    }
590
591
0
    int numTLD;
592
0
    if (m_pool)
593
0
        numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers : m_pool->m_numWorkers + m_pool->m_numProviders;
594
0
    else
595
0
        numTLD = 1;
596
597
    /* Get the QP for this frame from rate control. This call may block until
598
     * frames ahead of it in encode order have called rateControlEnd() */
599
0
    int qp = (layer == 0) ? m_top->m_rateControl->rateControlStart(m_frame[layer], &m_rce, m_top) : (int)m_rce.newQp;
600
601
0
    m_rce.newQp = qp;
602
603
0
    if (!!layer && m_top->m_lookahead->m_bAdaptiveQuant)
604
0
    {
605
0
        int ncu;
606
0
        if (m_param->rc.qgSize == 8)
607
0
            ncu = m_top->m_rateControl->m_ncu * 4;
608
0
        else
609
0
            ncu = m_top->m_rateControl->m_ncu;
610
0
        if (m_param->numViews > 1)
611
0
        {
612
0
            for (int i = 0; i < ncu; i++)
613
0
            {
614
0
                m_frame[layer]->m_lowres.qpCuTreeOffset[i] = m_frame[0]->m_lowres.qpCuTreeOffset[i];
615
0
                m_frame[layer]->m_lowres.qpAqOffset[i] = m_frame[0]->m_lowres.qpAqOffset[i];
616
0
            }
617
0
        }
618
0
        else if (m_param->numScalableLayers > 1)
619
0
        {
620
0
            memset(m_frame[layer]->m_lowres.qpCuTreeOffset, 0, sizeof(double)*ncu);
621
0
            memset(m_frame[layer]->m_lowres.qpAqOffset, 0, sizeof(double)* ncu);
622
0
        }
623
624
0
        m_frame[layer]->m_encData->m_avgQpAq = m_frame[0]->m_encData->m_avgQpAq;
625
0
        m_frame[layer]->m_encData->m_avgQpRc = m_frame[0]->m_encData->m_avgQpRc;
626
0
        if (!!m_param->rc.hevcAq)
627
0
        {
628
0
            for (uint32_t d = 0; d < 4; d++)
629
0
            {
630
0
                int ctuSizeIdx = 6 - g_log2Size[m_param->maxCUSize];
631
0
                int aqDepth = g_log2Size[m_param->maxCUSize] - g_log2Size[m_param->rc.qgSize];
632
0
                if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
633
0
                    continue;
634
0
                PicQPAdaptationLayer* pcAQLayer0 = &m_frame[0]->m_lowres.pAQLayer[d];
635
0
                PicQPAdaptationLayer* pcAQLayer1 = &m_frame[layer]->m_lowres.pAQLayer[d];
636
0
                const uint32_t aqPartWidth = m_frame[0]->m_lowres.pAQLayer[d].aqPartWidth;
637
0
                const uint32_t aqPartHeight = m_frame[0]->m_lowres.pAQLayer[d].aqPartHeight;
638
0
                double* pcQP0 = pcAQLayer0->dQpOffset;
639
0
                double* pcCuTree0 = pcAQLayer0->dCuTreeOffset;
640
0
                double* pcQP1 = pcAQLayer1->dQpOffset;
641
0
                double* pcCuTree1 = pcAQLayer1->dCuTreeOffset;
642
0
                if (m_param->numViews > 1)
643
0
                {
644
0
                    for (uint32_t y = 0; y < m_frame[0]->m_fencPic->m_picHeight; y += aqPartHeight)
645
0
                    {
646
0
                        for (uint32_t x = 0; x < m_frame[0]->m_fencPic->m_picWidth; x += aqPartWidth, pcQP0++, pcCuTree0++, pcQP1++, pcCuTree1++)
647
0
                        {
648
0
                            *pcQP1 = *pcQP0;
649
0
                            *pcCuTree1 = *pcCuTree0;
650
0
                        }
651
0
                    }
652
0
                }
653
0
                else if (m_param->numScalableLayers > 1)
654
0
                {
655
0
                    int numAQPartInWidth = (m_frame[0]->m_fencPic->m_picWidth + aqPartWidth - 1) / aqPartWidth;
656
0
                    int numAQPartInHeight = (m_frame[0]->m_fencPic->m_picHeight + aqPartHeight - 1) / aqPartHeight;
657
0
                    memset(m_frame[layer]->m_lowres.pAQLayer[d].dQpOffset, 0, sizeof(double)*numAQPartInWidth* numAQPartInHeight);
658
0
                    memset(m_frame[layer]->m_lowres.pAQLayer[d].dCuTreeOffset, 0, sizeof(double)* numAQPartInWidth* numAQPartInHeight);
659
0
                }
660
0
            }
661
0
        }
662
0
    }
663
0
    if (m_param->bEnableTemporalFilter)
664
0
    {
665
0
        m_frame[layer]->m_mcstf->m_QP = qp;
666
0
        m_frame[layer]->m_mcstf->bilateralFilter(m_frame[layer], m_frame[layer]->m_mcstfRefList, m_param->temporalFilterStrength);
667
0
    }
668
669
0
    if (m_nr)
670
0
    {
671
0
        if (qp > QP_MAX_SPEC && m_frame[layer]->m_param->rc.vbvBufferSize)
672
0
        {
673
0
            for (int i = 0; i < numTLD; i++)
674
0
            {
675
0
                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = m_top->m_offsetEmergency[qp - QP_MAX_SPEC - 1];
676
0
                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum = m_top->m_residualSumEmergency;
677
0
                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count = m_top->m_countEmergency;
678
0
            }
679
0
        }
680
0
        else
681
0
        {
682
0
            if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
683
0
            {
684
0
                for (int i = 0; i < numTLD; i++)
685
0
                {
686
0
                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrOffsetDenoise;
687
0
                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrResidualSum;
688
0
                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrCount;
689
0
                }
690
0
            }
691
0
            else
692
0
            {
693
0
                for (int i = 0; i < numTLD; i++)
694
0
                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = NULL;
695
0
            }
696
0
        }
697
0
    }
698
699
    /* Clip slice QP to 0-51 spec range before encoding */
700
0
    slice->m_sliceQp = x265_clip3(-QP_BD_OFFSET, QP_MAX_SPEC, qp);
701
0
    if (m_param->bHDR10Opt)
702
0
    {
703
0
        int qpCb = x265_clip3(-12, 0, (int)floor((m_top->m_cB * ((-.46) * qp + 9.26)) + 0.5 ));
704
0
        int qpCr = x265_clip3(-12, 0, (int)floor((m_top->m_cR * ((-.46) * qp + 9.26)) + 0.5 ));
705
0
        slice->m_chromaQpOffset[0] = slice->m_pps->chromaQpOffset[0] + qpCb < -12 ? (qpCb + (-12 - (slice->m_pps->chromaQpOffset[0] + qpCb))) : qpCb;
706
0
        slice->m_chromaQpOffset[1] = slice->m_pps->chromaQpOffset[1] + qpCr < -12 ? (qpCr + (-12 - (slice->m_pps->chromaQpOffset[1] + qpCr))) : qpCr;
707
0
    }
708
709
0
    if (m_param->bOptQpPPS && m_param->bRepeatHeaders)
710
0
    {
711
0
        ScopedLock qpLock(m_top->m_sliceQpLock);
712
0
        for (int i = 0; i < (QP_MAX_MAX + 1); i++)
713
0
        {
714
0
            int delta = slice->m_sliceQp - (i + 1);
715
0
            int codeLength = getBsLength( delta );
716
0
            m_top->m_iBitsCostSum[i] += codeLength;
717
0
        }
718
0
        m_top->m_iFrameNum++;
719
0
    }
720
0
    m_initSliceContext.resetEntropy(*slice);
721
722
0
    m_frameFilter.start(m_frame[layer], m_initSliceContext);
723
724
    /* ensure all rows are blocked prior to initializing row CTU counters */
725
0
    WaveFront::clearEnabledRowMask();
726
727
0
    WaveFront::setLayerId(layer);
728
    /* reset entropy coders and compute slice id */
729
0
    m_entropyCoder.load(m_initSliceContext);
730
0
    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)   
731
0
        for (uint32_t row = m_sliceBaseRow[sliceId]; row < m_sliceBaseRow[sliceId + 1]; row++)
732
0
            m_rows[row].init(m_initSliceContext, sliceId);   
733
734
    // reset slice counter for rate control update
735
0
    m_sliceCnt = 0;
736
737
0
    uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : m_param->maxSlices;
738
0
    X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices == 1), "Multiple slices without WPP unsupport now!");
739
0
    if (!m_outStreams)
740
0
    {
741
0
        m_outStreams = new Bitstream[numSubstreams];
742
0
        if (!m_param->bEnableWavefront)
743
0
            m_backupStreams = new Bitstream[numSubstreams];
744
0
        m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);
745
0
        if (!slice->m_bUseSao)
746
0
        {
747
0
            for (uint32_t i = 0; i < numSubstreams; i++)
748
0
                m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);
749
0
        }
750
0
    }
751
0
    else
752
0
    {
753
0
        for (uint32_t i = 0; i < numSubstreams; i++)
754
0
        {
755
0
            m_outStreams[i].resetBits();
756
0
            if (!slice->m_bUseSao)
757
0
                m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);
758
0
            else
759
0
                m_rows[i].rowGoOnCoder.setBitstream(NULL);
760
0
        }
761
0
    }
762
763
0
    m_rce.encodeOrder = m_frame[layer]->m_encodeOrder;
764
0
    int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0;
765
766
0
    if (m_frame[layer]->m_lowres.bKeyframe)
767
0
    {
768
0
        if (m_param->bEmitHRDSEI)
769
0
        {
770
0
            SEIBufferingPeriod* bpSei = &m_top->m_rateControl->m_bufPeriodSEI;
771
772
            // since the temporal layer HRD is not ready, we assumed it is fixed
773
0
            bpSei->m_auCpbRemovalDelayDelta = 1;
774
0
            bpSei->m_cpbDelayOffset = 0;
775
0
            bpSei->m_dpbDelayOffset = 0;
776
0
            bpSei->m_concatenationFlag = (m_param->bEnableHRDConcatFlag && !m_frame[layer]->m_poc) ? true : false;
777
778
            // hrdFullness() calculates the initial CPB removal delay and offset
779
0
            m_top->m_rateControl->hrdFullness(bpSei);
780
0
            bpSei->writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
781
782
0
            m_top->m_lastBPSEI = m_rce.encodeOrder;
783
0
        }
784
785
0
        if (m_frame[layer]->m_lowres.sliceType == X265_TYPE_IDR && m_param->bEmitIDRRecoverySEI)
786
0
        {
787
            /* Recovery Point SEI require the SPS to be "activated" */
788
0
            SEIRecoveryPoint sei;
789
0
            sei.m_recoveryPocCnt = 0;
790
0
            sei.m_exactMatchingFlag = true;
791
0
            sei.m_brokenLinkFlag = false;
792
0
            sei.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
793
0
        }
794
0
    }
795
796
0
    if ((m_param->bEmitHRDSEI || !!m_param->interlaceMode))
797
0
    {
798
0
        SEIPictureTiming *sei = m_rce.picTimingSEI;
799
0
        const VUI *vui = &slice->m_sps->vuiParameters;
800
0
        const HRDInfo *hrd = &vui->hrdParameters;
801
0
        int poc = slice->m_poc;
802
803
0
        if (vui->frameFieldInfoPresentFlag)
804
0
        {
805
0
            if (m_param->interlaceMode > 0)
806
0
            {
807
0
                if( m_param->interlaceMode == 2 )
808
0
                {   
809
                    // m_picStruct should be set to 3 or 4 when field feature is enabled
810
0
                    if (m_param->bField)
811
                        // 3: Top field, bottom field, in that order; 4: Bottom field, top field, in that order
812
0
                        sei->m_picStruct = (slice->m_fieldNum == 1) ? 4 : 3;
813
0
                    else
814
0
                        sei->m_picStruct = (poc & 1) ? 1 /* top */ : 2 /* bottom */;
815
0
                }     
816
0
                else if (m_param->interlaceMode == 1)
817
0
                {
818
0
                    if (m_param->bField)
819
0
                        sei->m_picStruct = (slice->m_fieldNum == 1) ? 3: 4;
820
0
                    else
821
0
                        sei->m_picStruct = (poc & 1) ? 2 /* bottom */ : 1 /* top */;
822
0
                }
823
0
            }
824
0
            else if (m_param->bEnableFrameDuplication)
825
0
                sei->m_picStruct = m_frame[layer]->m_picStruct;
826
0
            else
827
0
                sei->m_picStruct = m_param->pictureStructure;
828
829
0
            sei->m_sourceScanType = m_param->interlaceMode ? 0 : 1;
830
831
0
            sei->m_duplicateFlag = false;
832
0
        }
833
834
0
        if (vui->hrdParametersPresentFlag)
835
0
        {
836
            // The m_aucpbremoval delay specifies how many clock ticks the
837
            // access unit associated with the picture timing SEI message has to
838
            // wait after removal of the access unit with the most recent
839
            // buffering period SEI message
840
0
            sei->m_auCpbRemovalDelay = X265_MIN(X265_MAX(1, m_rce.encodeOrder - prevBPSEI), (1 << hrd->cpbRemovalDelayLength));
841
0
            sei->m_picDpbOutputDelay = slice->m_sps->numReorderPics[m_frame[layer]->m_tempLayer] + poc - m_rce.encodeOrder;
842
0
        }
843
844
0
        sei->writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
845
0
    }
846
847
0
    if (m_param->preferredTransferCharacteristics > -1 && slice->isIRAP())
848
0
    {
849
0
        SEIAlternativeTC m_seiAlternativeTC;
850
0
        m_seiAlternativeTC.m_preferredTransferCharacteristics = m_param->preferredTransferCharacteristics;
851
0
        m_seiAlternativeTC.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
852
0
    }
853
    /* Write Film grain characteristics if present */
854
0
    if (this->m_top->m_filmGrainIn)
855
0
    {
856
0
        FilmGrainCharacteristics m_filmGrain;
857
        /* Read the Film grain model file */
858
0
        readModel(&m_filmGrain, this->m_top->m_filmGrainIn);
859
0
        m_filmGrain.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
860
0
    }
861
    /* Write Aom film grain characteristics if present */
862
0
    if (this->m_top->m_aomFilmGrainIn)
863
0
    {
864
0
        AomFilmGrainCharacteristics m_aomFilmGrain;
865
        /* Read the Film grain model file */
866
0
        readAomModel(&m_aomFilmGrain, this->m_top->m_aomFilmGrainIn);
867
0
        m_aomFilmGrain.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal);
868
0
    }
869
    /* Write user SEI */
870
0
    for (int i = 0; i < m_frame[layer]->m_userSEI.numPayloads; i++)
871
0
    {
872
0
        x265_sei_payload *payload = &m_frame[layer]->m_userSEI.payloads[i];
873
0
        if (payload->payloadType == USER_DATA_UNREGISTERED)
874
0
        {
875
0
            SEIuserDataUnregistered sei;
876
0
            sei.m_userData = payload->payload;
877
0
            sei.setSize(payload->payloadSize);
878
0
            sei.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
879
0
        }
880
0
        else if (payload->payloadType == USER_DATA_REGISTERED_ITU_T_T35)
881
0
        {
882
0
            bool writeSei = m_param->bDhdr10opt ? writeToneMapInfo(payload) : true;
883
0
            if (writeSei)
884
0
            {
885
0
                SEIuserDataRegistered sei;
886
0
                sei.m_userData = payload->payload;
887
0
                sei.setSize(payload->payloadSize);
888
0
                sei.writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal, layer);
889
0
            }
890
0
        }
891
0
        else
892
0
            x265_log(m_param, X265_LOG_ERROR, "Unrecognized SEI type\n");
893
0
    }
894
895
0
    bool isSei = ((m_frame[layer]->m_lowres.bKeyframe && m_param->bRepeatHeaders) || m_param->bEmitHRDSEI ||
896
0
                 !!m_param->interlaceMode || (m_frame[layer]->m_lowres.sliceType == X265_TYPE_IDR && m_param->bEmitIDRRecoverySEI) ||
897
0
                   m_frame[layer]->m_userSEI.numPayloads);
898
899
0
    if (isSei && m_param->bSingleSeiNal)
900
0
    {
901
0
        m_bs.writeByteAlignment();
902
0
        m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
903
0
    }
904
    /* CQP and CRF (without capped VBV) doesn't use mid-frame statistics to 
905
     * tune RateControl parameters for other frames.
906
     * Hence, for these modes, update m_startEndOrder and unlock RC for previous threads waiting in
907
     * RateControlEnd here, after the slice contexts are initialized. For the rest - ABR
908
     * and VBV, unlock only after rateControlUpdateStats of this frame is called */
909
0
    if (m_param->rc.rateControlMode != X265_RC_ABR && !m_top->m_rateControl->m_isVbv)
910
0
    {
911
0
        m_top->m_rateControl->m_startEndOrder.incr();
912
913
0
        if (m_rce.encodeOrder < m_param->frameNumThreads - 1)
914
0
            m_top->m_rateControl->m_startEndOrder.incr(); // faked rateControlEnd calls for negative frames
915
0
    }
916
917
0
    if (m_param->bDynamicRefine)
918
0
        computeAvgTrainingData(layer);
919
920
    /* Analyze CTU rows, most of the hard work is done here.  Frame is
921
     * compressed in a wave-front pattern if WPP is enabled. Row based loop
922
     * filters runs behind the CTU compression and reconstruction */
923
924
0
    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
925
0
        m_rows[m_sliceBaseRow[sliceId]].active = true;
926
    
927
0
    if (m_param->bEnableWavefront)
928
0
    {
929
0
        int i = 0;
930
0
        for (uint32_t rowInSlice = 0; rowInSlice < m_sliceGroupSize; rowInSlice++)
931
0
        {
932
0
            for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
933
0
            {
934
0
                const uint32_t sliceStartRow = m_sliceBaseRow[sliceId];
935
0
                const uint32_t sliceEndRow = m_sliceBaseRow[sliceId + 1] - 1;
936
0
                const uint32_t row = sliceStartRow + rowInSlice;
937
0
                if (row > sliceEndRow)
938
0
                    continue;
939
0
                m_row_to_idx[row] = i;
940
0
                m_idx_to_row[i] = row;
941
0
                i += 1;
942
0
            }
943
0
        }
944
0
    }
945
946
0
    if (m_param->bEnableWavefront)
947
0
    {
948
0
        for (uint32_t rowInSlice = 0; rowInSlice < m_sliceGroupSize; rowInSlice++)
949
0
        {
950
0
            for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
951
0
            {
952
0
                const uint32_t sliceStartRow = m_sliceBaseRow[sliceId];
953
0
                const uint32_t sliceEndRow = m_sliceBaseRow[sliceId + 1] - 1;
954
0
                const uint32_t row = sliceStartRow + rowInSlice;
955
956
0
                X265_CHECK(row < m_numRows, "slices row fault was detected");
957
958
0
                if (row > sliceEndRow)
959
0
                    continue;
960
961
                // block until all reference frames have reconstructed the rows we need
962
0
                for (int l = 0; l < numPredDir; l++)
963
0
                {
964
0
                    for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
965
0
                    {
966
0
                        Frame *refpic = slice->m_refFrameList[l][ref];
967
968
#if ENABLE_SCC_EXT
969
                        /*Exempt the current pic as reference*/
970
                        if (m_param->bEnableSCC && refpic->m_poc == m_frame[layer]->m_poc)
971
                            continue;
972
#endif
973
974
                        // NOTE: we unnecessary wait row that beyond current slice boundary
975
0
                        const int rowIdx = X265_MIN(sliceEndRow, (row + m_refLagRows));
976
977
0
                        while (refpic->m_reconRowFlag[rowIdx].get() == 0)
978
0
                            refpic->m_reconRowFlag[rowIdx].waitForChange(0);
979
980
0
                        if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
981
0
                            m_mref[l][ref].applyWeight(rowIdx, m_numRows, sliceEndRow, sliceId);
982
0
                    }
983
0
                }
984
                
985
0
                enableRowEncoder(m_row_to_idx[row]); /* clear external dependency for this row */
986
987
0
                if (m_top->m_threadedME && !slice->isIntra())
988
0
                {
989
0
                    ScopedLock lock(m_tmeDepLock);
990
0
                    m_tmeDeps[row].external = true;
991
0
                    m_top->m_threadedME->enqueueReadyRows(row, layer, this);
992
0
                }
993
994
0
                if (!rowInSlice)
995
0
                {
996
0
                    m_row0WaitTime[layer] = x265_mdate();
997
0
                    enqueueRowEncoder(m_row_to_idx[row]); /* clear internal dependency, start wavefront */
998
0
                }
999
0
                tryWakeOne();
1000
0
            } // end of loop rowInSlice
1001
0
        } // end of loop sliceId
1002
1003
0
        m_allRowsAvailableTime[layer] = x265_mdate();
1004
0
        tryWakeOne(); /* ensure one thread is active or help-wanted flag is set prior to blocking */
1005
0
        static const int block_ms = 250;
1006
0
        while (m_completionEvent.timedWait(block_ms))
1007
0
            tryWakeOne();
1008
0
    }
1009
0
    else
1010
0
    {
1011
0
        for (uint32_t i = 0; i < m_numRows + m_filterRowDelay; i++)
1012
0
        {
1013
            // compress
1014
0
            if (i < m_numRows)
1015
0
            {
1016
                // block until all reference frames have reconstructed the rows we need
1017
0
                for (int l = 0; l < numPredDir; l++)
1018
0
                {
1019
0
                    int list = l;
1020
0
                    for (int ref = 0; ref < slice->m_numRefIdx[list]; ref++)
1021
0
                    {
1022
0
                        Frame *refpic = slice->m_refFrameList[list][ref];
1023
1024
#if ENABLE_SCC_EXT
1025
                        /*Exempt the current pic as reference*/
1026
                        if (m_param->bEnableSCC && refpic->m_poc == m_frame[layer]->m_poc)
1027
                            continue;
1028
#endif
1029
1030
0
                        const int rowIdx = X265_MIN(m_numRows - 1, (i + m_refLagRows));
1031
0
                        while (refpic->m_reconRowFlag[rowIdx].get() == 0)
1032
0
                            refpic->m_reconRowFlag[rowIdx].waitForChange(0);
1033
1034
0
                        if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
1035
0
                            m_mref[list][ref].applyWeight(rowIdx, m_numRows, m_numRows, 0);
1036
0
                    }
1037
0
                }
1038
1039
0
                if (m_top->m_threadedME && !slice->isIntra())
1040
0
                {
1041
0
                    ScopedLock lock(m_tmeDepLock);
1042
0
                    m_tmeDeps[i].external = true;
1043
0
                    m_top->m_threadedME->enqueueReadyRows(i, layer, this);
1044
0
                }
1045
1046
0
                if (!i)
1047
0
                    m_row0WaitTime[layer] = x265_mdate();
1048
0
                else if (i == m_numRows - 1)
1049
0
                    m_allRowsAvailableTime[layer] = x265_mdate();
1050
0
                processRowEncoder(i, m_tld[m_localTldIdx], layer);
1051
0
            }
1052
1053
            // filter
1054
0
            if (i >= m_filterRowDelay)
1055
0
                m_frameFilter.processRow(i - m_filterRowDelay, layer);
1056
0
        }
1057
0
    }
1058
#if ENABLE_LIBVMAF
1059
    vmafFrameLevelScore();
1060
#endif
1061
1062
0
    m_tmeDepLock.acquire();
1063
0
    m_tmeDeps.clear();
1064
0
    m_tmeDeps.resize(m_numRows);
1065
0
    m_tmeDepLock.release();
1066
1067
0
    if (m_param->maxSlices > 1)
1068
0
    {
1069
0
        PicYuv *reconPic = m_frame[layer]->m_reconPic[0];
1070
0
        uint32_t height = reconPic->m_picHeight;
1071
0
        initDecodedPictureHashSEI(0, 0, height, layer);
1072
0
    } 
1073
1074
0
    if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame[layer]->m_encodeOrder) //Avoid collecting data that will not be used by future frames.
1075
0
        collectDynDataFrame(layer);
1076
1077
0
    if (m_param->bEnableTemporalFilter && m_top->isFilterThisframe(m_frame[layer]->m_mcstf->m_sliceTypeConfig, m_frame[layer]->m_lowres.sliceType))
1078
0
    {
1079
        //Reset the MCSTF context in Frame Encoder and Frame
1080
0
        for (int i = 0; i < (m_frame[layer]->m_mcstf->m_range << 1); i++)
1081
0
        {
1082
0
            memset(m_frame[layer]->m_mcstfRefList[i].mvs0, 0, sizeof(MV) * ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
1083
0
            memset(m_frame[layer]->m_mcstfRefList[i].mvs1, 0, sizeof(MV) * ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
1084
0
            memset(m_frame[layer]->m_mcstfRefList[i].mvs2, 0, sizeof(MV) * ((m_param->sourceWidth / 16) * (m_param->sourceHeight / 16)));
1085
0
            memset(m_frame[layer]->m_mcstfRefList[i].mvs,  0, sizeof(MV) * ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
1086
0
            memset(m_frame[layer]->m_mcstfRefList[i].noise, 0, sizeof(int) * ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
1087
0
            memset(m_frame[layer]->m_mcstfRefList[i].error, 0, sizeof(int) * ((m_param->sourceWidth / 4) * (m_param->sourceHeight / 4)));
1088
1089
0
            m_frame[layer]->m_mcstf->m_numRef = 0;
1090
0
        }
1091
0
    }
1092
1093
1094
0
    if (m_param->rc.bStatWrite)
1095
0
    {
1096
0
        int totalI = 0, totalP = 0, totalSkip = 0;
1097
1098
        // accumulate intra,inter,skip cu count per frame for 2 pass
1099
0
        for (uint32_t i = 0; i < m_numRows; i++)
1100
0
        {
1101
0
            m_frame[layer]->m_encData->m_frameStats.mvBits    += m_rows[i].rowStats.mvBits;
1102
0
            m_frame[layer]->m_encData->m_frameStats.coeffBits += m_rows[i].rowStats.coeffBits;
1103
0
            m_frame[layer]->m_encData->m_frameStats.miscBits  += m_rows[i].rowStats.miscBits;
1104
0
            totalI                                     += m_rows[i].rowStats.intra8x8Cnt;
1105
0
            totalP                                     += m_rows[i].rowStats.inter8x8Cnt;
1106
0
            totalSkip                                  += m_rows[i].rowStats.skip8x8Cnt;
1107
0
        }
1108
0
        int totalCuCount = totalI + totalP + totalSkip;
1109
0
        m_frame[layer]->m_encData->m_frameStats.percent8x8Intra = (double)totalI / totalCuCount;
1110
0
        m_frame[layer]->m_encData->m_frameStats.percent8x8Inter = (double)totalP / totalCuCount;
1111
0
        m_frame[layer]->m_encData->m_frameStats.percent8x8Skip  = (double)totalSkip / totalCuCount;
1112
0
    }
1113
1114
0
    if (m_param->csvLogLevel >= 1)
1115
0
    {
1116
0
        for (uint32_t i = 0; i < m_numRows; i++)
1117
0
        {
1118
0
            m_frame[layer]->m_encData->m_frameStats.cntIntraNxN += m_rows[i].rowStats.cntIntraNxN;
1119
0
            m_frame[layer]->m_encData->m_frameStats.totalCu += m_rows[i].rowStats.totalCu;
1120
0
            m_frame[layer]->m_encData->m_frameStats.totalCtu += m_rows[i].rowStats.totalCtu;
1121
0
            m_frame[layer]->m_encData->m_frameStats.lumaDistortion += m_rows[i].rowStats.lumaDistortion;
1122
0
            m_frame[layer]->m_encData->m_frameStats.chromaDistortion += m_rows[i].rowStats.chromaDistortion;
1123
0
            m_frame[layer]->m_encData->m_frameStats.psyEnergy += m_rows[i].rowStats.psyEnergy;
1124
0
            m_frame[layer]->m_encData->m_frameStats.ssimEnergy += m_rows[i].rowStats.ssimEnergy;
1125
0
            m_frame[layer]->m_encData->m_frameStats.resEnergy += m_rows[i].rowStats.resEnergy;
1126
0
            for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
1127
0
            {
1128
0
                m_frame[layer]->m_encData->m_frameStats.cntSkipCu[depth] += m_rows[i].rowStats.cntSkipCu[depth];
1129
0
                m_frame[layer]->m_encData->m_frameStats.cntMergeCu[depth] += m_rows[i].rowStats.cntMergeCu[depth];
1130
0
                for (int m = 0; m < INTER_MODES; m++)
1131
0
                    m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][m] += m_rows[i].rowStats.cuInterDistribution[depth][m];
1132
0
                for (int n = 0; n < INTRA_MODES; n++)
1133
0
                    m_frame[layer]->m_encData->m_frameStats.cuIntraDistribution[depth][n] += m_rows[i].rowStats.cuIntraDistribution[depth][n];
1134
0
            }
1135
0
        }
1136
0
        m_frame[layer]->m_encData->m_frameStats.percentIntraNxN = (double)(m_frame[layer]->m_encData->m_frameStats.cntIntraNxN * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1137
1138
0
        for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
1139
0
        {
1140
0
            m_frame[layer]->m_encData->m_frameStats.percentSkipCu[depth] = (double)(m_frame[layer]->m_encData->m_frameStats.cntSkipCu[depth] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1141
0
            m_frame[layer]->m_encData->m_frameStats.percentMergeCu[depth] = (double)(m_frame[layer]->m_encData->m_frameStats.cntMergeCu[depth] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1142
0
            for (int n = 0; n < INTRA_MODES; n++)
1143
0
                m_frame[layer]->m_encData->m_frameStats.percentIntraDistribution[depth][n] = (double)(m_frame[layer]->m_encData->m_frameStats.cuIntraDistribution[depth][n] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1144
0
            uint64_t cuInterRectCnt = 0; // sum of Nx2N, 2NxN counts
1145
0
            cuInterRectCnt += m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][1] + m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][2];
1146
0
            m_frame[layer]->m_encData->m_frameStats.percentInterDistribution[depth][0] = (double)(m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][0] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1147
0
            m_frame[layer]->m_encData->m_frameStats.percentInterDistribution[depth][1] = (double)(cuInterRectCnt * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1148
0
            m_frame[layer]->m_encData->m_frameStats.percentInterDistribution[depth][2] = (double)(m_frame[layer]->m_encData->m_frameStats.cuInterDistribution[depth][3] * 100) / m_frame[layer]->m_encData->m_frameStats.totalCu;
1149
0
        }
1150
0
    }
1151
1152
0
    if (m_param->csvLogLevel >= 2)
1153
0
    {
1154
0
        m_frame[layer]->m_encData->m_frameStats.avgLumaDistortion = (double)(m_frame[layer]->m_encData->m_frameStats.lumaDistortion) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1155
0
        m_frame[layer]->m_encData->m_frameStats.avgChromaDistortion = (double)(m_frame[layer]->m_encData->m_frameStats.chromaDistortion) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1156
0
        m_frame[layer]->m_encData->m_frameStats.avgPsyEnergy = (double)(m_frame[layer]->m_encData->m_frameStats.psyEnergy) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1157
0
        m_frame[layer]->m_encData->m_frameStats.avgSsimEnergy = (double)(m_frame[layer]->m_encData->m_frameStats.ssimEnergy) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1158
0
        m_frame[layer]->m_encData->m_frameStats.avgResEnergy = (double)(m_frame[layer]->m_encData->m_frameStats.resEnergy) / m_frame[layer]->m_encData->m_frameStats.totalCtu;
1159
0
    }
1160
1161
0
    m_bs.resetBits();
1162
0
    m_entropyCoder.load(m_initSliceContext);
1163
0
    m_entropyCoder.setBitstream(&m_bs);
1164
1165
    // finish encode of each CTU row, only required when SAO is enabled
1166
0
    if (slice->m_bUseSao)
1167
0
        encodeSlice(0, layer);
1168
1169
0
    m_entropyCoder.setBitstream(&m_bs);
1170
1171
0
    if (m_param->maxSlices > 1)
1172
0
    {
1173
0
        uint32_t nextSliceRow = 0;
1174
1175
0
        for(uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
1176
0
        {
1177
0
            m_bs.resetBits();
1178
1179
0
            const uint32_t sliceAddr = nextSliceRow * m_numCols;
1180
0
            if (m_param->bOptRefListLengthPPS)
1181
0
            {
1182
0
                ScopedLock refIdxLock(m_top->m_sliceRefIdxLock);
1183
0
                m_top->analyseRefIdx(slice->m_numRefIdx);
1184
0
            }
1185
0
            m_entropyCoder.codeSliceHeader(*slice, *m_frame[layer]->m_encData, sliceAddr, m_sliceAddrBits, slice->m_sliceQp, layer);
1186
1187
            // Find rows of current slice
1188
0
            const uint32_t prevSliceRow = nextSliceRow;
1189
0
            while(nextSliceRow < m_numRows && m_rows[nextSliceRow].sliceId == sliceId)
1190
0
                nextSliceRow++;
1191
1192
            // serialize each row, record final lengths in slice header
1193
0
            uint32_t maxStreamSize = m_nalList.serializeSubstreams(&m_substreamSizes[prevSliceRow], (nextSliceRow - prevSliceRow), &m_outStreams[prevSliceRow]);
1194
1195
            // complete the slice header by writing WPP row-starts
1196
0
            m_entropyCoder.setBitstream(&m_bs);
1197
0
            if (slice->m_pps->bEntropyCodingSyncEnabled)
1198
0
                m_entropyCoder.codeSliceHeaderWPPEntryPoints(&m_substreamSizes[prevSliceRow], (nextSliceRow - prevSliceRow - 1), maxStreamSize);
1199
            
1200
0
            m_bs.writeByteAlignment();
1201
1202
0
            m_nalList.serialize(slice->m_nalUnitType, m_bs, layer, (!!m_param->bEnableTemporalSubLayers ? m_frame[layer]->m_tempLayer + 1 : (1 + (slice->m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N))));
1203
0
        }
1204
0
    }
1205
0
    else
1206
0
    {
1207
0
        if (m_param->bOptRefListLengthPPS)
1208
0
        {
1209
0
            ScopedLock refIdxLock(m_top->m_sliceRefIdxLock);
1210
0
            m_top->analyseRefIdx(slice->m_numRefIdx);
1211
0
        }
1212
0
        m_entropyCoder.codeSliceHeader(*slice, *m_frame[layer]->m_encData, 0, 0, slice->m_sliceQp, layer);
1213
1214
        // serialize each row, record final lengths in slice header
1215
0
        uint32_t maxStreamSize = m_nalList.serializeSubstreams(m_substreamSizes, numSubstreams, m_outStreams);
1216
1217
        // complete the slice header by writing WPP row-starts
1218
0
        m_entropyCoder.setBitstream(&m_bs);
1219
0
        if (slice->m_pps->bEntropyCodingSyncEnabled)
1220
0
            m_entropyCoder.codeSliceHeaderWPPEntryPoints(m_substreamSizes, (slice->m_sps->numCuInHeight - 1), maxStreamSize);
1221
0
        m_bs.writeByteAlignment();
1222
1223
0
        m_nalList.serialize(slice->m_nalUnitType, m_bs, layer, (!!m_param->bEnableTemporalSubLayers ? m_frame[layer]->m_tempLayer + 1 : (1 + (slice->m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N))));
1224
0
    }
1225
1226
0
    if (m_param->decodedPictureHashSEI)
1227
0
        writeTrailingSEIMessages(layer);
1228
1229
0
    uint64_t bytes = 0;
1230
0
    for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
1231
0
    {
1232
0
        int type = m_nalList.m_nal[i].type;
1233
1234
        // exclude SEI
1235
0
        if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
1236
0
        {
1237
0
            bytes += m_nalList.m_nal[i].sizeBytes;
1238
            // and exclude start code prefix
1239
0
            bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
1240
0
        }
1241
0
    }
1242
0
    m_accessUnitBits[layer] = (layer) ? (bytes - (m_accessUnitBits[0] >> 3)) << 3 : bytes << 3;
1243
1244
0
    int filler = 0;
1245
    /* rateControlEnd may also block for earlier frames to call rateControlUpdateStats */
1246
0
    if (!layer && m_top->m_rateControl->rateControlEnd(m_frame[layer], m_accessUnitBits[layer], &m_rce, &filler) < 0)
1247
0
        m_top->m_aborted = true;
1248
1249
#if ENABLE_ALPHA
1250
    if (layer && m_param->numScalableLayers > 1)
1251
        m_frame[layer]->m_encData->m_avgQpAq = m_frame[layer]->m_encData->m_avgQpRc;
1252
#endif
1253
#if ENABLE_MULTIVIEW
1254
    if (layer && m_param->numViews > 1)
1255
    {
1256
        double avgQpAq = 0;
1257
        for (uint32_t i = 0; i < slice->m_sps->numCuInHeight; i++)
1258
            avgQpAq += m_frame[layer]->m_encData->m_rowStat[i].sumQpAq;
1259
1260
        avgQpAq /= (slice->m_sps->numCUsInFrame * m_param->num4x4Partitions);
1261
        m_frame[layer]->m_encData->m_avgQpAq = avgQpAq;
1262
    }
1263
#endif
1264
1265
0
    if (filler > 0)
1266
0
    {
1267
0
        filler = (filler - FILLER_OVERHEAD * 8) >> 3;
1268
0
        m_bs.resetBits();
1269
0
        while (filler > 0)
1270
0
        {
1271
0
            m_bs.write(0xff, 8);
1272
0
            filler--;
1273
0
        }
1274
0
        m_bs.writeByteAlignment();
1275
0
        m_nalList.serialize(NAL_UNIT_FILLER_DATA, m_bs);
1276
0
        bytes += m_nalList.m_nal[m_nalList.m_numNal - 1].sizeBytes;
1277
0
        bytes -= 3; //exclude start code prefix
1278
0
        m_accessUnitBits[layer] = bytes << 3;
1279
0
    }
1280
1281
0
    if (m_frame[layer]->m_rpu.payloadSize)
1282
0
    {
1283
0
        m_bs.resetBits();
1284
0
        for (int i = 0; i < m_frame[layer]->m_rpu.payloadSize; i++)
1285
0
            m_bs.write(m_frame[layer]->m_rpu.payload[i], 8);
1286
0
        m_nalList.serialize(NAL_UNIT_UNSPECIFIED, m_bs);
1287
0
    }
1288
1289
0
    m_endCompressTime[layer] = x265_mdate();
1290
1291
    /* Decrement referenced frame reference counts, allow them to be recycled */
1292
0
    for (int l = 0; l < numPredDir; l++)
1293
0
    {
1294
0
        for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
1295
0
        {
1296
0
            Frame *refpic = slice->m_refFrameList[l][ref];
1297
0
            ATOMIC_DEC(&refpic->m_countRefEncoders);
1298
0
        }
1299
0
    }
1300
1301
0
    if (m_nr)
1302
0
    {
1303
0
        bool nrEnabled = (m_rce.newQp < QP_MAX_SPEC || !m_param->rc.vbvBufferSize) && (m_param->noiseReductionIntra || m_param->noiseReductionInter);
1304
1305
0
        if (nrEnabled)
1306
0
        {
1307
            /* Accumulate NR statistics from all worker threads */
1308
0
            for (int i = 0; i < numTLD; i++)
1309
0
            {
1310
0
                NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
1311
0
                for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
1312
0
                {
1313
0
                    for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)
1314
0
                        m_nr->nrResidualSum[cat][coeff] += nr->nrResidualSum[cat][coeff];
1315
1316
0
                    m_nr->nrCount[cat] += nr->nrCount[cat];
1317
0
                }
1318
0
            }
1319
1320
0
            noiseReductionUpdate();
1321
1322
            /* Copy updated NR coefficients back to all worker threads */
1323
0
            for (int i = 0; i < numTLD; i++)
1324
0
            {
1325
0
                NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
1326
0
                memcpy(nr->nrOffsetDenoise, m_nr->nrOffsetDenoise, sizeof(uint16_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
1327
0
                memset(nr->nrCount, 0, sizeof(uint32_t)* MAX_NUM_TR_CATEGORIES);
1328
0
                memset(nr->nrResidualSum, 0, sizeof(uint32_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
1329
0
            }
1330
0
        }
1331
0
    }
1332
1333
#if DETAILED_CU_STATS
1334
    /* Accumulate CU statistics from each worker thread, we could report
1335
     * per-frame stats here, but currently we do not. */
1336
    for (int i = 0; i < numTLD; i++)
1337
        m_cuStats.accumulate(m_tld[i].analysis.m_stats[m_jpId], *m_param);
1338
#endif
1339
1340
0
    m_endFrameTime[layer] = x265_mdate();
1341
0
}
1342
1343
void FrameEncoder::initDecodedPictureHashSEI(int row, int cuAddr, int height, int layer)
1344
0
{
1345
0
    PicYuv *reconPic = m_frame[layer]->m_reconPic[0];
1346
0
    uint32_t width = reconPic->m_picWidth;  
1347
0
    intptr_t stride = reconPic->m_stride;
1348
0
    uint32_t maxCUHeight = m_param->maxCUSize;
1349
1350
0
    const uint32_t hChromaShift = CHROMA_H_SHIFT(m_param->internalCsp);
1351
0
    const uint32_t vChromaShift = CHROMA_V_SHIFT(m_param->internalCsp);
1352
1353
0
    if (m_param->decodedPictureHashSEI == 1)
1354
0
    {
1355
0
        if (!row)
1356
0
            MD5Init(&m_seiReconPictureDigest.m_state[0]);
1357
1358
0
        updateMD5Plane(m_seiReconPictureDigest.m_state[0], reconPic->getLumaAddr(cuAddr), width, height, stride);
1359
0
        if (m_param->internalCsp != X265_CSP_I400)
1360
0
        {
1361
0
            if (!row)
1362
0
            {
1363
0
                MD5Init(&m_seiReconPictureDigest.m_state[1]);
1364
0
                MD5Init(&m_seiReconPictureDigest.m_state[2]);
1365
0
            }
1366
1367
0
            width >>= hChromaShift;
1368
0
            height >>= vChromaShift;
1369
0
            stride = reconPic->m_strideC;
1370
1371
0
            updateMD5Plane(m_seiReconPictureDigest.m_state[1], reconPic->getCbAddr(cuAddr), width, height, stride);
1372
0
            updateMD5Plane(m_seiReconPictureDigest.m_state[2], reconPic->getCrAddr(cuAddr), width, height, stride);
1373
0
        }
1374
0
    }
1375
0
    else if (m_param->decodedPictureHashSEI == 2)
1376
0
    {
1377
1378
0
        if (!row)
1379
0
            m_seiReconPictureDigest.m_crc[0] = 0xffff;
1380
1381
0
        updateCRC(reconPic->getLumaAddr(cuAddr), m_seiReconPictureDigest.m_crc[0], height, width, stride);
1382
0
        if (m_param->internalCsp != X265_CSP_I400)
1383
0
        {
1384
0
            width >>= hChromaShift;
1385
0
            height >>= vChromaShift;
1386
0
            stride = reconPic->m_strideC;
1387
0
            m_seiReconPictureDigest.m_crc[1] = m_seiReconPictureDigest.m_crc[2] = 0xffff;
1388
1389
0
            updateCRC(reconPic->getCbAddr(cuAddr), m_seiReconPictureDigest.m_crc[1], height, width, stride);
1390
0
            updateCRC(reconPic->getCrAddr(cuAddr), m_seiReconPictureDigest.m_crc[2], height, width, stride);
1391
0
        }
1392
0
    }
1393
0
    else if (m_param->decodedPictureHashSEI == 3)
1394
0
    {
1395
0
        if (!row)
1396
0
            m_seiReconPictureDigest.m_checksum[0] = 0;
1397
1398
0
        updateChecksum(reconPic->m_picOrg[0], m_seiReconPictureDigest.m_checksum[0], height, width, stride, row, maxCUHeight);
1399
0
        if (m_param->internalCsp != X265_CSP_I400)
1400
0
        {
1401
0
            width >>= hChromaShift;
1402
0
            height >>= vChromaShift;
1403
0
            stride = reconPic->m_strideC;
1404
0
            maxCUHeight >>= vChromaShift;
1405
1406
0
            if (!row)
1407
0
                m_seiReconPictureDigest.m_checksum[1] = m_seiReconPictureDigest.m_checksum[2] = 0;
1408
1409
0
            updateChecksum(reconPic->m_picOrg[1], m_seiReconPictureDigest.m_checksum[1], height, width, stride, row, maxCUHeight);
1410
0
            updateChecksum(reconPic->m_picOrg[2], m_seiReconPictureDigest.m_checksum[2], height, width, stride, row, maxCUHeight);
1411
0
        }
1412
0
    }
1413
0
}
1414
1415
void FrameEncoder::encodeSlice(uint32_t sliceAddr, int layer)
1416
0
{
1417
0
    Slice* slice = m_frame[layer]->m_encData->m_slice;
1418
0
    const uint32_t widthInLCUs = slice->m_sps->numCuInWidth;
1419
0
    const uint32_t lastCUAddr = (slice->m_endCUAddr + m_param->num4x4Partitions - 1) / m_param->num4x4Partitions;
1420
0
    const uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : 1;
1421
1422
0
    SAOParam* saoParam = slice->m_sps->bUseSAO && slice->m_bUseSao ? m_frame[layer]->m_encData->m_saoParam : NULL;
1423
0
    for (uint32_t cuAddr = sliceAddr; cuAddr < lastCUAddr; cuAddr++)
1424
0
    {
1425
0
        uint32_t col = cuAddr % widthInLCUs;
1426
0
        uint32_t row = cuAddr / widthInLCUs;
1427
0
        uint32_t subStrm = row % numSubstreams;
1428
0
        CUData* ctu = m_frame[layer]->m_encData->getPicCTU(cuAddr);
1429
1430
0
        m_entropyCoder.setBitstream(&m_outStreams[subStrm]);
1431
1432
        // Synchronize cabac probabilities with upper-right CTU if it's available and we're at the start of a line.
1433
0
        if (m_param->bEnableWavefront && !col && row)
1434
0
        {
1435
0
            m_entropyCoder.copyState(m_initSliceContext);
1436
0
            m_entropyCoder.loadContexts(m_rows[row - 1].bufferedEntropy);
1437
0
        }
1438
1439
        // Initialize slice context
1440
0
        if (ctu->m_bFirstRowInSlice && !col)
1441
0
            m_entropyCoder.load(m_initSliceContext);
1442
1443
0
        if (saoParam)
1444
0
        {
1445
0
            if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
1446
0
            {
1447
0
                int mergeLeft = col && saoParam->ctuParam[0][cuAddr].mergeMode == SAO_MERGE_LEFT;
1448
0
                int mergeUp = !ctu->m_bFirstRowInSlice && saoParam->ctuParam[0][cuAddr].mergeMode == SAO_MERGE_UP;
1449
0
                if (col)
1450
0
                    m_entropyCoder.codeSaoMerge(mergeLeft);
1451
0
                if (!ctu->m_bFirstRowInSlice && !mergeLeft)
1452
0
                    m_entropyCoder.codeSaoMerge(mergeUp);
1453
0
                if (!mergeLeft && !mergeUp)
1454
0
                {
1455
0
                    if (saoParam->bSaoFlag[0])
1456
0
                        m_entropyCoder.codeSaoOffset(saoParam->ctuParam[0][cuAddr], 0);
1457
0
                    if (saoParam->bSaoFlag[1])
1458
0
                    {
1459
0
                        m_entropyCoder.codeSaoOffset(saoParam->ctuParam[1][cuAddr], 1);
1460
0
                        m_entropyCoder.codeSaoOffset(saoParam->ctuParam[2][cuAddr], 2);
1461
0
                    }
1462
0
                }
1463
0
            }
1464
0
            else
1465
0
            {
1466
0
                for (int i = 0; i < (m_param->internalCsp != X265_CSP_I400 ? 3 : 1); i++)
1467
0
                    saoParam->ctuParam[i][cuAddr].reset();
1468
0
            }
1469
0
        }
1470
1471
        // final coding (bitstream generation) for this CU
1472
0
        m_entropyCoder.encodeCTU(*ctu, m_cuGeoms[m_ctuGeomMap[cuAddr]]);
1473
1474
0
        if (m_param->bEnableWavefront)
1475
0
        {
1476
0
            if (col == 1)
1477
                // Store probabilities of second CTU in line into buffer
1478
0
                m_rows[row].bufferedEntropy.loadContexts(m_entropyCoder);
1479
1480
0
            if (col == widthInLCUs - 1)
1481
0
                m_entropyCoder.finishSlice();
1482
0
        }
1483
0
    }
1484
1485
0
    if (!m_param->bEnableWavefront)
1486
0
        m_entropyCoder.finishSlice();
1487
0
}
1488
1489
void FrameEncoder::processRow(int row, int threadId, int layer)
1490
0
{
1491
0
    int64_t startTime = x265_mdate();
1492
0
    if (ATOMIC_INC(&m_activeWorkerCount) == 1 && m_stallStartTime[layer])
1493
0
        m_totalNoWorkerTime[layer] += x265_mdate() - m_stallStartTime[layer];
1494
1495
0
    const uint32_t realRow = m_idx_to_row[row >> 1];
1496
0
    const uint32_t typeNum = m_idx_to_row[row & 1];
1497
1498
0
    if (!typeNum)
1499
0
    {
1500
0
        processRowEncoder(realRow, m_tld[threadId], layer);
1501
0
    }
1502
0
    else
1503
0
    {
1504
0
        m_frameFilter.processRow(realRow, layer);
1505
1506
        // NOTE: Active next row
1507
0
        if (realRow != m_sliceBaseRow[m_rows[realRow].sliceId + 1] - 1)
1508
0
            enqueueRowFilter(m_row_to_idx[realRow + 1]);
1509
0
    }
1510
1511
0
    if (ATOMIC_DEC(&m_activeWorkerCount) == 0)
1512
0
        m_stallStartTime[layer] = x265_mdate();
1513
1514
0
    m_totalWorkerElapsedTime[layer] += x265_mdate() - startTime; // not thread safe, but good enough
1515
0
}
1516
1517
// Called by worker threads
1518
void FrameEncoder::processRowEncoder(int intRow, ThreadLocalData& tld, int layer)
1519
0
{
1520
0
    const uint32_t row = (uint32_t)intRow;
1521
0
    CTURow& curRow = m_rows[row];
1522
1523
0
    if (m_param->bEnableWavefront)
1524
0
    {
1525
0
        ScopedLock self(curRow.lock);
1526
0
        if (!curRow.active)
1527
            /* VBV restart is in progress, exit out */
1528
0
            return;
1529
0
        if (curRow.busy)
1530
0
        {
1531
            /* On multi-socket Windows servers, we have seen problems with
1532
             * ATOMIC_CAS which resulted in multiple worker threads processing
1533
             * the same CU row, which often resulted in bad pointer accesses. We
1534
             * believe the problem is fixed, but are leaving this check in place
1535
             * to prevent crashes in case it is not */
1536
0
            x265_log(m_param, X265_LOG_WARNING,
1537
0
                     "internal error - simultaneous row access detected. Please report HW to x265-devel@videolan.org\n");
1538
0
            return;
1539
0
        }
1540
0
        curRow.busy = true;
1541
0
    }
1542
1543
    /* When WPP is enabled, every row has its own row coder instance. Otherwise
1544
     * they share row 0 */
1545
0
    Entropy& rowCoder = m_param->bEnableWavefront ? curRow.rowGoOnCoder : m_rows[0].rowGoOnCoder;
1546
0
    FrameData& curEncData = *m_frame[layer]->m_encData;
1547
0
    Slice *slice = curEncData.m_slice;
1548
1549
0
    const uint32_t numCols = m_numCols;
1550
0
    const uint32_t lineStartCUAddr = row * numCols;
1551
0
    bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
1552
1553
0
    const uint32_t sliceId = curRow.sliceId;
1554
0
    uint32_t maxBlockCols = (m_frame[layer]->m_fencPic->m_picWidth + (16 - 1)) / 16;
1555
0
    uint32_t noOfBlocks = m_param->maxCUSize / 16;
1556
0
    const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row - 1].sliceId != curRow.sliceId)) ? 1 : 0;
1557
0
    const uint32_t bLastRowInSlice = ((row == m_numRows - 1) || (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0;
1558
0
    const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1];
1559
0
    const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];
1560
1561
    // Load SBAC coder context from previous row and initialize row state.
1562
0
    if (bFirstRowInSlice && !curRow.completed)        
1563
0
        rowCoder.load(m_initSliceContext);     
1564
1565
    // calculate mean QP for consistent deltaQP signalling calculation
1566
0
    if (m_param->bOptCUDeltaQP)
1567
0
    {
1568
0
        ScopedLock self(curRow.lock);
1569
0
        if (!curRow.avgQPComputed)
1570
0
        {
1571
0
            if (m_param->bEnableWavefront || !row)
1572
0
            {
1573
0
                double meanQPOff = 0;
1574
0
                bool isReferenced = IS_REFERENCED(m_frame[layer]);
1575
0
                double *qpoffs = (isReferenced && m_param->rc.cuTree) ? m_frame[layer]->m_lowres.qpCuTreeOffset : m_frame[layer]->m_lowres.qpAqOffset;
1576
0
                if (qpoffs)
1577
0
                {
1578
0
                    uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;
1579
1580
0
                    uint32_t cuYStart = 0, height = m_frame[layer]->m_fencPic->m_picHeight;
1581
0
                    if (m_param->bEnableWavefront)
1582
0
                    {
1583
0
                        cuYStart = intRow * m_param->maxCUSize;
1584
0
                        height = cuYStart + m_param->maxCUSize;
1585
0
                    }
1586
1587
0
                    uint32_t qgSize = m_param->rc.qgSize, width = m_frame[layer]->m_fencPic->m_picWidth;
1588
0
                    uint32_t maxOffsetCols = (m_frame[layer]->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr;
1589
0
                    uint32_t count = 0;
1590
0
                    for (uint32_t cuY = cuYStart; cuY < height && (cuY < m_frame[layer]->m_fencPic->m_picHeight); cuY += qgSize)
1591
0
                    {
1592
0
                        for (uint32_t cuX = 0; cuX < width; cuX += qgSize)
1593
0
                        {
1594
0
                            double qp_offset = 0;
1595
0
                            uint32_t cnt = 0;
1596
1597
0
                            for (uint32_t block_yy = cuY; block_yy < cuY + qgSize && block_yy < m_frame[layer]->m_fencPic->m_picHeight; block_yy += loopIncr)
1598
0
                            {
1599
0
                                for (uint32_t block_xx = cuX; block_xx < cuX + qgSize && block_xx < width; block_xx += loopIncr)
1600
0
                                {
1601
0
                                    int idx = ((block_yy / loopIncr) * (maxOffsetCols)) + (block_xx / loopIncr);
1602
0
                                    qp_offset += qpoffs[idx];
1603
0
                                    cnt++;
1604
0
                                }
1605
0
                            }
1606
0
                            qp_offset /= cnt;
1607
0
                            meanQPOff += qp_offset;
1608
0
                            count++;
1609
0
                        }
1610
0
                    }
1611
0
                    meanQPOff /= count;
1612
0
                }
1613
0
                rowCoder.m_meanQP = slice->m_sliceQp + meanQPOff;
1614
0
            }
1615
0
            else
1616
0
            {
1617
0
                rowCoder.m_meanQP = m_rows[0].rowGoOnCoder.m_meanQP;
1618
0
            }
1619
0
            curRow.avgQPComputed = 1;
1620
0
        }
1621
0
    }
1622
1623
    // Initialize restrict on MV range in slices
1624
0
    tld.analysis.m_sliceMinY = -(int32_t)(rowInSlice * m_param->maxCUSize * 4) + 3 * 4;
1625
0
    tld.analysis.m_sliceMaxY = (int32_t)((endRowInSlicePlus1 - 1 - row) * (m_param->maxCUSize * 4) - 4 * 4);
1626
1627
    // Handle single row slice
1628
0
    if (tld.analysis.m_sliceMaxY < tld.analysis.m_sliceMinY)
1629
0
        tld.analysis.m_sliceMaxY = tld.analysis.m_sliceMinY = 0;
1630
1631
0
    if (m_top->m_threadedME && !slice->isIntra())
1632
0
    {
1633
0
        ScopedLock lock(m_tmeDepLock);
1634
0
        m_tmeDeps[row].internal = true;
1635
0
        m_top->m_threadedME->enqueueReadyRows(row, layer, this);
1636
0
    }
1637
1638
0
    while (curRow.completed < numCols)
1639
0
    {
1640
0
        ProfileScopeEvent(encodeCTU);
1641
1642
0
        const uint32_t col = curRow.completed;
1643
0
        const uint32_t cuAddr = lineStartCUAddr + col;
1644
0
        CUData* ctu = curEncData.getPicCTU(cuAddr);
1645
0
        const uint32_t bLastCuInSlice = (bLastRowInSlice & (col == numCols - 1)) ? 1 : 0;
1646
1647
        /* Must wait for TME to finish before initCTU because both threads
1648
         * operate on the same CUData — the encoder's initCTU would corrupt
1649
         * data that deriveMVsForCTU is still reading. */
1650
0
        if (m_top->m_threadedME && slice->m_sliceType != I_SLICE)
1651
0
        {
1652
0
            int64_t waitStart = x265_mdate();
1653
0
            bool waited = false;
1654
1655
0
            while (m_frame[layer]->m_ctuMEFlags[cuAddr].get() == 0)
1656
0
            {
1657
#ifdef DETAILED_CU_STATS
1658
                tld.analysis.m_stats[m_jpId].countTmeBlockedCTUs++;
1659
#endif
1660
0
                m_frame[layer]->m_ctuMEFlags[cuAddr].waitForChange(0);
1661
0
                waited = true;
1662
0
            }
1663
1664
0
            int64_t waitEnd = x265_mdate();
1665
0
            if (waited)
1666
0
                ATOMIC_ADD(&m_totalThreadedMEWait[layer], waitEnd - waitStart);
1667
0
        }
1668
1669
0
        ctu->initCTU(*m_frame[layer], cuAddr, slice->m_sliceQp, bFirstRowInSlice, bLastRowInSlice, bLastCuInSlice);
1670
1671
0
        if (!layer && bIsVbv)
1672
0
        {
1673
0
            if (col == 0 && !m_param->bEnableWavefront)
1674
0
            {
1675
0
                m_backupStreams[0].copyBits(&m_outStreams[0]);
1676
0
                curRow.bufferedEntropy.copyState(rowCoder);
1677
0
                curRow.bufferedEntropy.loadContexts(rowCoder);
1678
0
            }
1679
0
            if (bFirstRowInSlice && m_vbvResetTriggerRow[curRow.sliceId] != intRow)
1680
0
            {
1681
0
                curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc;
1682
0
                curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(curEncData.m_avgQpRc);
1683
0
            }
1684
1685
0
            FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
1686
0
            if (m_param->bEnableWavefront && rowInSlice >= col && !bFirstRowInSlice && m_vbvResetTriggerRow[curRow.sliceId] != intRow)
1687
0
                cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;
1688
0
            else if (!m_param->bEnableWavefront && !bFirstRowInSlice && m_vbvResetTriggerRow[curRow.sliceId] != intRow)
1689
0
                cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;
1690
0
            else
1691
0
                cuStat.baseQp = curEncData.m_rowStat[row].rowQp;
1692
1693
            /* TODO: use defines from slicetype.h for lowres block size */
1694
0
            uint32_t block_y = (ctu->m_cuPelY >> m_param->maxLog2CUSize) * noOfBlocks;
1695
0
            uint32_t block_x = (ctu->m_cuPelX >> m_param->maxLog2CUSize) * noOfBlocks;
1696
0
            if (!strlen(m_param->analysisLoad) || !m_param->bDisableLookahead)
1697
0
            {
1698
0
                cuStat.vbvCost = 0;
1699
0
                cuStat.intraVbvCost = 0;
1700
1701
0
                for (uint32_t h = 0; h < noOfBlocks && block_y < m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++)
1702
0
                {
1703
0
                    uint32_t idx = block_x + (block_y * maxBlockCols);
1704
1705
0
                    for (uint32_t w = 0; w < noOfBlocks && (block_x + w) < maxBlockCols; w++, idx++)
1706
0
                    {
1707
0
                        cuStat.vbvCost += m_frame[layer]->m_lowres.lowresCostForRc[idx] & LOWRES_COST_MASK;
1708
0
                        cuStat.intraVbvCost += m_frame[layer]->m_lowres.intraCost[idx];
1709
0
                    }
1710
0
                }
1711
0
            }
1712
0
        }
1713
0
        else
1714
0
            curEncData.m_cuStat[cuAddr].baseQp = curEncData.m_avgQpRc;
1715
1716
0
        if (m_param->bEnableWavefront && !col && !bFirstRowInSlice)
1717
0
        {
1718
            // Load SBAC coder context from previous row and initialize row state.
1719
0
            rowCoder.copyState(m_initSliceContext);
1720
0
            rowCoder.loadContexts(m_rows[row - 1].bufferedEntropy);
1721
0
        }
1722
0
        if (m_param->dynamicRd && (int32_t)(m_rce.qpaRc - m_rce.qpNoVbv) > 0)
1723
0
            ctu->m_vbvAffected = true;
1724
1725
        // Does all the CU analysis, returns best top level mode decision
1726
0
        Mode& best = tld.analysis.compressCTU(*ctu, *m_frame[layer], m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
1727
1728
        /* startPoint > encodeOrder is true when the start point changes for
1729
        a new GOP but few frames from the previous GOP is still incomplete.
1730
        The data of frames in this interval will not be used by any future frames. */
1731
0
        if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame[layer]->m_encodeOrder)
1732
0
            collectDynDataRow(*ctu, &curRow.rowStats);
1733
1734
        // take a sample of the current active worker count
1735
0
        ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount);
1736
0
        ATOMIC_INC(&m_activeWorkerCountSamples);
1737
1738
        /* advance top-level row coder to include the context of this CTU.
1739
         * if SAO is disabled, rowCoder writes the final CTU bitstream */
1740
0
        rowCoder.encodeCTU(*ctu, m_cuGeoms[m_ctuGeomMap[cuAddr]]);
1741
1742
0
        if (m_param->bEnableWavefront && col == 1)
1743
            // Save CABAC state for next row
1744
0
            curRow.bufferedEntropy.loadContexts(rowCoder);
1745
1746
        /* SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas */
1747
0
        if (slice->m_bUseSao && m_param->bSaoNonDeblocked)
1748
0
            m_frameFilter.m_parallelFilter[row].m_sao.calcSaoStatsCu_BeforeDblk(m_frame[layer], col, row);
1749
1750
        /* Deblock with idle threading */
1751
0
        if (m_param->bEnableLoopFilter | slice->m_bUseSao)
1752
0
        {
1753
            // NOTE: in VBV mode, we may reencode anytime, so we can't do Deblock stage-Horizon and SAO
1754
0
            if (!bIsVbv)
1755
0
            {
1756
                // Delay one row to avoid intra prediction conflict
1757
0
                if (m_pool && !bFirstRowInSlice)
1758
0
                {                    
1759
0
                    int allowCol = col;
1760
1761
                    // avoid race condition on last column
1762
0
                    if (rowInSlice >= 2)
1763
0
                    {
1764
0
                        allowCol = X265_MIN(((col == numCols - 1) ? m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.get()
1765
0
                                                                  : m_frameFilter.m_parallelFilter[row - 2].m_lastCol.get()), (int)col);
1766
0
                    }
1767
0
                    m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(allowCol);
1768
0
                }
1769
1770
                // Last Row may start early
1771
0
                if (m_pool && bLastRowInSlice)
1772
0
                {
1773
                    // Deblocking last row
1774
0
                    int allowCol = col;
1775
1776
                    // avoid race condition on last column
1777
0
                    if (rowInSlice >= 2)
1778
0
                    {
1779
0
                        allowCol = X265_MIN(((col == numCols - 1) ? m_frameFilter.m_parallelFilter[row - 1].m_lastDeblocked.get()
1780
0
                                                                  : m_frameFilter.m_parallelFilter[row - 1].m_lastCol.get()), (int)col);
1781
0
                    }
1782
0
                    m_frameFilter.m_parallelFilter[row].m_allowedCol.set(allowCol);
1783
0
                }
1784
0
            } // end of !bIsVbv
1785
0
        }
1786
        // Both Loopfilter and SAO Disabled
1787
0
        else
1788
0
        {
1789
0
            m_frameFilter.m_parallelFilter[row].processPostCu(col);
1790
0
        }
1791
1792
        // Completed CU processing
1793
0
        curRow.completed++;
1794
1795
0
        FrameStats frameLog;
1796
0
        curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu, &frameLog);
1797
1798
        // copy number of intra, inter cu per row into frame stats for 2 pass
1799
0
        if (m_param->rc.bStatWrite)
1800
0
        {
1801
0
            curRow.rowStats.mvBits    += best.mvBits;
1802
0
            curRow.rowStats.coeffBits += best.coeffBits;
1803
0
            curRow.rowStats.miscBits  += best.totalBits - (best.mvBits + best.coeffBits);
1804
1805
0
            for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
1806
0
            {
1807
                /* 1 << shift == number of 8x8 blocks at current depth */
1808
0
                int shift = 2 * (m_param->maxCUDepth - depth);
1809
0
                int cuSize = m_param->maxCUSize >> depth;
1810
1811
0
                curRow.rowStats.intra8x8Cnt += (cuSize == 8) ? (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) :
1812
0
                                                               (int)(frameLog.cntIntra[depth] << shift);
1813
1814
0
                curRow.rowStats.inter8x8Cnt += (int)(frameLog.cntInter[depth] << shift);
1815
0
                curRow.rowStats.skip8x8Cnt += (int)((frameLog.cntSkipCu[depth] + frameLog.cntMergeCu[depth]) << shift);
1816
0
            }
1817
0
        }
1818
0
        curRow.rowStats.totalCtu++;
1819
0
        curRow.rowStats.lumaDistortion   += best.lumaDistortion;
1820
0
        curRow.rowStats.chromaDistortion += best.chromaDistortion;
1821
0
        curRow.rowStats.psyEnergy        += best.psyEnergy;
1822
0
        curRow.rowStats.ssimEnergy       += best.ssimEnergy;
1823
0
        curRow.rowStats.resEnergy        += best.resEnergy;
1824
0
        curRow.rowStats.cntIntraNxN      += frameLog.cntIntraNxN;
1825
0
        curRow.rowStats.totalCu          += frameLog.totalCu;
1826
0
        for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
1827
0
        {
1828
0
            curRow.rowStats.cntSkipCu[depth] += frameLog.cntSkipCu[depth];
1829
0
            curRow.rowStats.cntMergeCu[depth] += frameLog.cntMergeCu[depth];
1830
0
            for (int m = 0; m < INTER_MODES; m++)
1831
0
                curRow.rowStats.cuInterDistribution[depth][m] += frameLog.cuInterDistribution[depth][m];
1832
0
            for (int n = 0; n < INTRA_MODES; n++)
1833
0
                curRow.rowStats.cuIntraDistribution[depth][n] += frameLog.cuIntraDistribution[depth][n];
1834
0
        }
1835
1836
0
        curEncData.m_cuStat[cuAddr].totalBits = best.totalBits;
1837
0
        x265_emms();
1838
1839
0
        if (!layer && bIsVbv)
1840
0
        {   
1841
            // Update encoded bits, satdCost, baseQP for each CU if tune grain is disabled
1842
0
            FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];    
1843
0
            if ((m_param->bEnableWavefront && ((cuAddr == m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)
1844
0
            {
1845
0
                curEncData.m_rowStat[row].rowSatd += cuStat.vbvCost;
1846
0
                curEncData.m_rowStat[row].rowIntraSatd += cuStat.intraVbvCost;
1847
0
                curEncData.m_rowStat[row].encodedBits += cuStat.totalBits;
1848
0
                curEncData.m_rowStat[row].sumQpRc += cuStat.baseQp;
1849
0
                curEncData.m_rowStat[row].numEncodedCUs = cuAddr;
1850
0
            }
1851
            
1852
            // If current block is at row end checkpoint, call vbv ratecontrol.
1853
0
            if (!m_param->bEnableWavefront && col == numCols - 1)
1854
0
            {
1855
0
                double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
1856
0
                curRow.reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame[layer], row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
1857
0
                qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase);
1858
0
                curEncData.m_rowStat[row].rowQp = qpBase;
1859
0
                curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase);
1860
0
                if (curRow.reEncode < 0)
1861
0
                {
1862
0
                    x265_log(m_param, X265_LOG_DEBUG, "POC %d row %d - encode restart required for VBV, to %.2f from %.2f\n",
1863
0
                        m_frame[layer]->m_poc, row, qpBase, curEncData.m_cuStat[cuAddr].baseQp);
1864
1865
0
                    m_vbvResetTriggerRow[curRow.sliceId] = row;
1866
0
                    m_outStreams[0].copyBits(&m_backupStreams[0]);
1867
1868
0
                    rowCoder.copyState(curRow.bufferedEntropy);
1869
0
                    rowCoder.loadContexts(curRow.bufferedEntropy);
1870
1871
0
                    curRow.completed = 0;
1872
0
                    memset(&curRow.rowStats, 0, sizeof(curRow.rowStats));
1873
0
                    curEncData.m_rowStat[row].numEncodedCUs = 0;
1874
0
                    curEncData.m_rowStat[row].encodedBits = 0;
1875
0
                    curEncData.m_rowStat[row].rowSatd = 0;
1876
0
                    curEncData.m_rowStat[row].rowIntraSatd = 0;
1877
0
                    curEncData.m_rowStat[row].sumQpRc = 0;
1878
0
                    curEncData.m_rowStat[row].sumQpAq = 0;
1879
0
                }
1880
0
            }
1881
            // If current block is at row diagonal checkpoint, call vbv ratecontrol.
1882
0
            else if (m_param->bEnableWavefront && rowInSlice == col && !bFirstRowInSlice)
1883
0
            {
1884
0
                if (m_param->rc.bEnableConstVbv)
1885
0
                {
1886
0
                    uint32_t startCuAddr = numCols * row;
1887
0
                    uint32_t EndCuAddr = startCuAddr + col;
1888
1889
0
                    for (int32_t r = row; r >= (int32_t)m_sliceBaseRow[sliceId]; r--)
1890
0
                    {
1891
0
                        for (uint32_t c = startCuAddr; c <= EndCuAddr && c <= numCols * (r + 1) - 1; c++)
1892
0
                        {
1893
0
                            curEncData.m_rowStat[r].rowSatd += curEncData.m_cuStat[c].vbvCost;
1894
0
                            curEncData.m_rowStat[r].rowIntraSatd += curEncData.m_cuStat[c].intraVbvCost;
1895
0
                            curEncData.m_rowStat[r].encodedBits += curEncData.m_cuStat[c].totalBits;
1896
0
                            curEncData.m_rowStat[r].sumQpRc += curEncData.m_cuStat[c].baseQp;
1897
0
                            curEncData.m_rowStat[r].numEncodedCUs = c;
1898
0
                        }
1899
0
                        if (curRow.reEncode < 0)
1900
0
                            break;
1901
0
                        startCuAddr = EndCuAddr - numCols;
1902
0
                        EndCuAddr = startCuAddr + 1;
1903
0
                    }
1904
0
                }
1905
0
                double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
1906
0
                curRow.reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame[layer], row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
1907
0
                qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase);
1908
0
                curEncData.m_rowStat[row].rowQp = qpBase;
1909
0
                curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase);
1910
1911
0
                if (curRow.reEncode < 0)
1912
0
                {
1913
0
                    x265_log(m_param, X265_LOG_DEBUG, "POC %d row %d - encode restart required for VBV, to %.2f from %.2f\n",
1914
0
                             m_frame[layer]->m_poc, row, qpBase, curEncData.m_cuStat[cuAddr].baseQp);
1915
1916
                    // prevent the WaveFront::findJob() method from providing new jobs
1917
0
                    m_vbvResetTriggerRow[curRow.sliceId] = row;
1918
0
                    m_bAllRowsStop[curRow.sliceId] = true;
1919
1920
0
                    for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1; r >= row; r--)
1921
0
                    {
1922
0
                        CTURow& stopRow = m_rows[r];
1923
1924
0
                        if (r != row)
1925
0
                        {
1926
                            /* if row was active (ready to be run) clear active bit and bitmap bit for this row */
1927
0
                            stopRow.lock.acquire();
1928
0
                            while (stopRow.active)
1929
0
                            {
1930
0
                                if (dequeueRow(m_row_to_idx[r] * 2))
1931
0
                                    stopRow.active = false;
1932
0
                                else
1933
0
                                {
1934
                                    /* we must release the row lock to allow the thread to exit */
1935
0
                                    stopRow.lock.release();
1936
0
                                    GIVE_UP_TIME();
1937
0
                                    stopRow.lock.acquire();
1938
0
                                }
1939
0
                            }
1940
0
                            stopRow.lock.release();
1941
1942
0
                            bool bRowBusy = true;
1943
0
                            do
1944
0
                            {
1945
0
                                stopRow.lock.acquire();
1946
0
                                bRowBusy = stopRow.busy;
1947
0
                                stopRow.lock.release();
1948
1949
0
                                if (bRowBusy)
1950
0
                                {
1951
0
                                    GIVE_UP_TIME();
1952
0
                                }
1953
0
                            }
1954
0
                            while (bRowBusy);
1955
0
                        }
1956
1957
0
                        m_outStreams[r].resetBits();
1958
0
                        stopRow.completed = 0;
1959
0
                        memset(&stopRow.rowStats, 0, sizeof(stopRow.rowStats));
1960
0
                        curEncData.m_rowStat[r].numEncodedCUs = 0;
1961
0
                        curEncData.m_rowStat[r].encodedBits = 0;
1962
0
                        curEncData.m_rowStat[r].rowSatd = 0;
1963
0
                        curEncData.m_rowStat[r].rowIntraSatd = 0;
1964
0
                        curEncData.m_rowStat[r].sumQpRc = 0;
1965
0
                        curEncData.m_rowStat[r].sumQpAq = 0;
1966
0
                    }
1967
1968
0
                    m_bAllRowsStop[curRow.sliceId] = false;
1969
0
                }
1970
0
            }
1971
0
        }
1972
1973
0
        if (m_param->bEnableWavefront && curRow.completed >= 2 && !bLastRowInSlice &&
1974
0
            (!m_bAllRowsStop[curRow.sliceId] || intRow + 1 < m_vbvResetTriggerRow[curRow.sliceId]))
1975
0
        {
1976
            /* activate next row */
1977
0
            ScopedLock below(m_rows[row + 1].lock);
1978
1979
0
            if (m_rows[row + 1].active == false &&
1980
0
                m_rows[row + 1].completed + 2 <= curRow.completed)
1981
0
            {
1982
0
                m_rows[row + 1].active = true;
1983
0
                enqueueRowEncoder(m_row_to_idx[row + 1]);
1984
0
                tryWakeOne(); /* wake up a sleeping thread or set the help wanted flag */
1985
0
            }
1986
0
        }
1987
1988
0
        ScopedLock self(curRow.lock);
1989
0
        if ((m_bAllRowsStop[curRow.sliceId] && intRow > m_vbvResetTriggerRow[curRow.sliceId]) ||
1990
0
            (!bFirstRowInSlice && ((curRow.completed < numCols - 1) || (m_rows[row - 1].completed < numCols)) && m_rows[row - 1].completed < curRow.completed + 2))
1991
0
        {
1992
0
            curRow.active = false;
1993
0
            curRow.busy = false;
1994
0
            ATOMIC_INC(&m_countRowBlocks);
1995
0
            return;
1996
0
        }
1997
0
    }
1998
1999
    /* this row of CTUs has been compressed */
2000
0
    if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv)
2001
0
    {
2002
0
        if (bLastRowInSlice)       
2003
0
        {
2004
0
            for (uint32_t r = m_sliceBaseRow[sliceId]; r < m_sliceBaseRow[sliceId + 1]; r++)
2005
0
            {
2006
0
                for (uint32_t c = curEncData.m_rowStat[r].numEncodedCUs + 1; c < numCols * (r + 1); c++)
2007
0
                {
2008
0
                    curEncData.m_rowStat[r].rowSatd += curEncData.m_cuStat[c].vbvCost;
2009
0
                    curEncData.m_rowStat[r].rowIntraSatd += curEncData.m_cuStat[c].intraVbvCost;
2010
0
                    curEncData.m_rowStat[r].encodedBits += curEncData.m_cuStat[c].totalBits;
2011
0
                    curEncData.m_rowStat[r].sumQpRc += curEncData.m_cuStat[c].baseQp;
2012
0
                    curEncData.m_rowStat[r].numEncodedCUs = c;
2013
0
                }
2014
0
            }
2015
0
        }
2016
0
    }
2017
2018
    /* If encoding with ABR, update update bits and complexity in rate control
2019
     * after a number of rows so the next frame's rateControlStart has more
2020
     * accurate data for estimation. At the start of the encode we update stats
2021
     * after half the frame is encoded, but after this initial period we update
2022
     * after refLagRows (the number of rows reference frames must have completed
2023
     * before referencees may begin encoding) */
2024
0
    if ((!layer) && (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv))
2025
0
    {
2026
0
        uint32_t rowCount = 0;
2027
0
        uint32_t maxRows = m_sliceBaseRow[sliceId + 1] - m_sliceBaseRow[sliceId];
2028
2029
0
        if (!m_rce.encodeOrder)
2030
0
            rowCount = maxRows - 1; 
2031
0
        else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum / m_param->fpsDenom))
2032
0
            rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1);
2033
0
        else
2034
0
            rowCount = X265_MIN(m_refLagRows / m_param->maxSlices, maxRows - 1);
2035
2036
0
        if (rowInSlice == rowCount)
2037
0
        {
2038
0
            m_rowSliceTotalBits[sliceId] = 0;
2039
0
            if (bIsVbv && !(m_param->rc.bEnableConstVbv && m_param->bEnableWavefront))
2040
0
            {
2041
0
                for (uint32_t i = m_sliceBaseRow[sliceId]; i < rowCount + m_sliceBaseRow[sliceId]; i++)
2042
0
                    m_rowSliceTotalBits[sliceId] += curEncData.m_rowStat[i].encodedBits;
2043
0
            }
2044
0
            else
2045
0
            {
2046
0
                uint32_t startAddr = m_sliceBaseRow[sliceId] * numCols;
2047
0
                uint32_t finishAddr = startAddr + rowCount * numCols;
2048
                
2049
0
                for (uint32_t cuAddr = startAddr; cuAddr < finishAddr; cuAddr++)
2050
0
                    m_rowSliceTotalBits[sliceId] += curEncData.m_cuStat[cuAddr].totalBits;
2051
0
            }
2052
2053
0
            if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices)
2054
0
            {
2055
0
                m_rce.rowTotalBits = 0;
2056
0
                for (uint32_t i = 0; i < m_param->maxSlices; i++)
2057
0
                    m_rce.rowTotalBits += m_rowSliceTotalBits[i];
2058
0
                m_top->m_rateControl->rateControlUpdateStats(&m_rce);
2059
0
            }
2060
0
        }
2061
0
    }
2062
2063
    /* flush row bitstream (if WPP and no SAO) or flush frame if no WPP and no SAO */
2064
    /* end_of_sub_stream_one_bit / end_of_slice_segment_flag */
2065
0
       if (!slice->m_bUseSao && (m_param->bEnableWavefront || bLastRowInSlice))
2066
0
               rowCoder.finishSlice();
2067
2068
2069
    /* Processing left Deblock block with current threading */
2070
0
    if ((m_param->bEnableLoopFilter | slice->m_bUseSao) & (rowInSlice >= 2))
2071
0
    {
2072
        /* Check conditional to start previous row process with current threading */
2073
0
        if (m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.get() == (int)numCols)
2074
0
        {
2075
            /* stop threading on current row and restart it */
2076
0
            m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(numCols);
2077
0
            m_frameFilter.m_parallelFilter[row - 1].processTasks(-1);
2078
0
        }
2079
0
    }
2080
2081
    /* trigger row-wise loop filters */
2082
0
    if (m_param->bEnableWavefront)
2083
0
    {
2084
0
        if (rowInSlice >= m_filterRowDelay)
2085
0
        {
2086
0
            enableRowFilter(m_row_to_idx[row - m_filterRowDelay]);
2087
2088
            /* NOTE: Activate filter if first row (row 0) */
2089
0
            if (rowInSlice == m_filterRowDelay)
2090
0
                enqueueRowFilter(m_row_to_idx[row - m_filterRowDelay]);
2091
0
            tryWakeOne();
2092
0
        }
2093
2094
0
        if (bLastRowInSlice)
2095
0
        {
2096
0
            for (uint32_t i = endRowInSlicePlus1 - m_filterRowDelay; i < endRowInSlicePlus1; i++)
2097
0
            {
2098
0
                enableRowFilter(m_row_to_idx[i]);
2099
0
            }
2100
0
            tryWakeOne();
2101
0
        }
2102
2103
        // handle specially case - single row slice
2104
0
        if  (bFirstRowInSlice & bLastRowInSlice)
2105
0
        {
2106
0
            enqueueRowFilter(m_row_to_idx[row]);
2107
0
            tryWakeOne();
2108
0
        }
2109
0
    }
2110
2111
0
    curRow.busy = false;
2112
2113
    // CHECK_ME: Does it always FALSE condition?
2114
0
    if (ATOMIC_INC(&m_completionCount) == 2 * (int)m_numRows)
2115
0
        m_completionEvent.trigger();
2116
0
}
2117
2118
void FrameEncoder::collectDynDataRow(CUData& ctu, FrameStats* rowStats)
2119
0
{
2120
0
    for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
2121
0
    {
2122
0
        for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
2123
0
        {
2124
0
            int offset = (depth * X265_REFINE_INTER_LEVELS) + i;
2125
0
            if (ctu.m_collectCUCount[offset])
2126
0
            {
2127
0
                rowStats->rowVarDyn[offset] += ctu.m_collectCUVariance[offset];
2128
0
                rowStats->rowRdDyn[offset] += ctu.m_collectCURd[offset];
2129
0
                rowStats->rowCntDyn[offset] += ctu.m_collectCUCount[offset];
2130
0
            }
2131
0
        }
2132
0
    }
2133
0
}
2134
2135
void FrameEncoder::collectDynDataFrame(int layer)
2136
0
{
2137
0
    for (uint32_t row = 0; row < m_numRows; row++)
2138
0
    {
2139
0
        for (uint32_t refLevel = 0; refLevel < X265_REFINE_INTER_LEVELS; refLevel++)
2140
0
        {
2141
0
            for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
2142
0
            {
2143
0
                int offset = (depth * X265_REFINE_INTER_LEVELS) + refLevel;
2144
0
                int curFrameIndex = m_frame[layer]->m_encodeOrder - m_top->m_startPoint;
2145
0
                int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
2146
0
                if (m_rows[row].rowStats.rowCntDyn[offset])
2147
0
                {
2148
0
                    m_top->m_variance[index] += m_rows[row].rowStats.rowVarDyn[offset];
2149
0
                    m_top->m_rdCost[index] += m_rows[row].rowStats.rowRdDyn[offset];
2150
0
                    m_top->m_trainingCount[index] += m_rows[row].rowStats.rowCntDyn[offset];
2151
0
                }
2152
0
            }
2153
0
        }
2154
0
    }
2155
0
}
2156
2157
void FrameEncoder::computeAvgTrainingData(int layer)
2158
0
{
2159
0
    if (m_frame[layer]->m_lowres.bScenecut || m_frame[layer]->m_lowres.bKeyframe)
2160
0
    {
2161
0
        m_top->m_startPoint = m_frame[layer]->m_encodeOrder;
2162
0
        int size = (m_param->keyframeMax + m_param->lookaheadDepth) * m_param->maxCUDepth * X265_REFINE_INTER_LEVELS;
2163
0
        memset(m_top->m_variance, 0, size * sizeof(uint64_t));
2164
0
        memset(m_top->m_rdCost, 0, size * sizeof(uint64_t));
2165
0
        memset(m_top->m_trainingCount, 0, size * sizeof(uint32_t));
2166
0
    }
2167
0
    if (m_frame[layer]->m_encodeOrder - m_top->m_startPoint < 2 * m_param->frameNumThreads)
2168
0
        m_frame[layer]->m_classifyFrame = false;
2169
0
    else
2170
0
        m_frame[layer]->m_classifyFrame = true;
2171
2172
0
    int size = m_param->maxCUDepth * X265_REFINE_INTER_LEVELS;
2173
0
    memset(m_frame[layer]->m_classifyRd, 0, size * sizeof(uint64_t));
2174
0
    memset(m_frame[layer]->m_classifyVariance, 0, size * sizeof(uint64_t));
2175
0
    memset(m_frame[layer]->m_classifyCount, 0, size * sizeof(uint32_t));
2176
0
    if (m_frame[layer]->m_classifyFrame)
2177
0
    {
2178
0
        uint32_t limit = m_frame[layer]->m_encodeOrder - m_top->m_startPoint - m_param->frameNumThreads;
2179
0
        for (uint32_t i = 1; i < limit; i++)
2180
0
        {
2181
0
            for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++)
2182
0
            {
2183
0
                for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
2184
0
                {
2185
0
                    int offset = (depth * X265_REFINE_INTER_LEVELS) + j;
2186
0
                    int index = (i* X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
2187
0
                    if (m_top->m_trainingCount[index])
2188
0
                    {
2189
0
                        m_frame[layer]->m_classifyRd[offset] += m_top->m_rdCost[index] / m_top->m_trainingCount[index];
2190
0
                        m_frame[layer]->m_classifyVariance[offset] += m_top->m_variance[index] / m_top->m_trainingCount[index];
2191
0
                        m_frame[layer]->m_classifyCount[offset] += m_top->m_trainingCount[index];
2192
0
                    }
2193
0
                }
2194
0
            }
2195
0
        }
2196
        /* Calculates the average feature values of historic frames that are being considered for the current frame */
2197
0
        int historyCount = m_frame[layer]->m_encodeOrder - m_param->frameNumThreads - m_top->m_startPoint - 1;
2198
0
        if (historyCount)
2199
0
        {
2200
0
            for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++)
2201
0
            {
2202
0
                for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
2203
0
                {
2204
0
                    int offset = (depth * X265_REFINE_INTER_LEVELS) + j;
2205
0
                    m_frame[layer]->m_classifyRd[offset] /= historyCount;
2206
0
                    m_frame[layer]->m_classifyVariance[offset] /= historyCount;
2207
0
                }
2208
0
            }
2209
0
        }
2210
0
    }
2211
0
}
2212
2213
/* collect statistics about CU coding decisions, return total QP */
2214
int FrameEncoder::collectCTUStatistics(const CUData& ctu, FrameStats* log)
2215
0
{
2216
0
    int totQP = 0;
2217
0
    uint32_t depth = 0;
2218
0
    for (uint32_t absPartIdx = 0; absPartIdx < ctu.m_numPartitions; absPartIdx += ctu.m_numPartitions >> (depth * 2))
2219
0
    {
2220
0
        depth = ctu.m_cuDepth[absPartIdx];
2221
0
        totQP += ctu.m_qp[absPartIdx] * (ctu.m_numPartitions >> (depth * 2));
2222
0
    }
2223
2224
0
    if (m_param->csvLogLevel >= 1 || m_param->rc.bStatWrite)
2225
0
    {
2226
0
        if (ctu.m_slice->m_sliceType == I_SLICE)
2227
0
        {
2228
0
            depth = 0;
2229
0
            for (uint32_t absPartIdx = 0; absPartIdx < ctu.m_numPartitions; absPartIdx += ctu.m_numPartitions >> (depth * 2))
2230
0
            {
2231
0
                depth = ctu.m_cuDepth[absPartIdx];
2232
2233
0
                log->totalCu++;
2234
0
                log->cntIntra[depth]++;
2235
2236
0
                if (ctu.m_predMode[absPartIdx] == MODE_NONE)
2237
0
                {
2238
0
                    log->totalCu--;
2239
0
                    log->cntIntra[depth]--;
2240
0
                }
2241
0
                else if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
2242
0
                {
2243
                    /* TODO: log intra modes at absPartIdx +0 to +3 */
2244
0
                    X265_CHECK(ctu.m_log2CUSize[absPartIdx] == 3 && ctu.m_slice->m_sps->quadtreeTULog2MinSize < 3, "Intra NxN found at improbable depth\n");
2245
0
                    log->cntIntraNxN++;
2246
0
                    log->cntIntra[depth]--;
2247
0
                }
2248
0
                else if (ctu.m_lumaIntraDir[absPartIdx] > 1)
2249
0
                    log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
2250
0
                else
2251
0
                    log->cuIntraDistribution[depth][ctu.m_lumaIntraDir[absPartIdx]]++;
2252
0
            }
2253
0
        }
2254
0
        else
2255
0
        {
2256
0
            depth = 0;
2257
0
            for (uint32_t absPartIdx = 0; absPartIdx < ctu.m_numPartitions; absPartIdx += ctu.m_numPartitions >> (depth * 2))
2258
0
            {
2259
0
                depth = ctu.m_cuDepth[absPartIdx];
2260
2261
0
                log->totalCu++;
2262
2263
0
                if (ctu.m_predMode[absPartIdx] == MODE_NONE)
2264
0
                    log->totalCu--;
2265
0
                else if (ctu.isSkipped(absPartIdx))
2266
0
                {
2267
0
                    if (ctu.m_mergeFlag[0])
2268
0
                        log->cntMergeCu[depth]++;
2269
0
                    else
2270
0
                        log->cntSkipCu[depth]++;
2271
0
                }
2272
0
                else if (ctu.isInter(absPartIdx))
2273
0
                {
2274
0
                    log->cntInter[depth]++;
2275
2276
0
                    if (ctu.m_partSize[absPartIdx] < AMP_ID)
2277
0
                        log->cuInterDistribution[depth][ctu.m_partSize[absPartIdx]]++;
2278
0
                    else
2279
0
                        log->cuInterDistribution[depth][AMP_ID]++;
2280
0
                }
2281
0
                else if (ctu.isIntra(absPartIdx))
2282
0
                {
2283
0
                    log->cntIntra[depth]++;
2284
2285
0
                    if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
2286
0
                    {
2287
0
                        X265_CHECK(ctu.m_log2CUSize[absPartIdx] == 3 && ctu.m_slice->m_sps->quadtreeTULog2MinSize < 3, "Intra NxN found at improbable depth\n");
2288
0
                        log->cntIntraNxN++;
2289
0
                        log->cntIntra[depth]--;
2290
                        /* TODO: log intra modes at absPartIdx +0 to +3 */
2291
0
                    }
2292
0
                    else if (ctu.m_lumaIntraDir[absPartIdx] > 1)
2293
0
                        log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
2294
0
                    else
2295
0
                        log->cuIntraDistribution[depth][ctu.m_lumaIntraDir[absPartIdx]]++;
2296
0
                }
2297
0
            }
2298
0
        }
2299
0
    }
2300
2301
0
    return totQP;
2302
0
}
2303
2304
/* DCT-domain noise reduction / adaptive deadzone from libavcodec */
2305
void FrameEncoder::noiseReductionUpdate()
2306
0
{
2307
0
    static const uint32_t maxBlocksPerTrSize[4] = {1 << 18, 1 << 16, 1 << 14, 1 << 12};
2308
2309
0
    for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
2310
0
    {
2311
0
        int trSize = cat & 3;
2312
0
        int coefCount = 1 << ((trSize + 2) * 2);
2313
2314
0
        if (m_nr->nrCount[cat] > maxBlocksPerTrSize[trSize])
2315
0
        {
2316
0
            for (int i = 0; i < coefCount; i++)
2317
0
                m_nr->nrResidualSum[cat][i] >>= 1;
2318
0
            m_nr->nrCount[cat] >>= 1;
2319
0
        }
2320
2321
0
        int nrStrength = cat < 8 ? m_param->noiseReductionIntra : m_param->noiseReductionInter;
2322
0
        uint64_t scaledCount = (uint64_t)nrStrength * m_nr->nrCount[cat];
2323
2324
0
        for (int i = 0; i < coefCount; i++)
2325
0
        {
2326
0
            uint64_t value = scaledCount + m_nr->nrResidualSum[cat][i] / 2;
2327
0
            uint64_t denom = m_nr->nrResidualSum[cat][i] + 1;
2328
0
            m_nr->nrOffsetDenoise[cat][i] = (uint16_t)(value / denom);
2329
0
        }
2330
2331
        // Don't denoise DC coefficients
2332
0
        m_nr->nrOffsetDenoise[cat][0] = 0;
2333
0
    }
2334
0
}
2335
2336
void FrameEncoder::readModel(FilmGrainCharacteristics* m_filmGrain, FILE* filmgrain)
2337
0
{
2338
0
    char const* errorMessage = "Error reading FilmGrain characteristics\n";
2339
0
    FilmGrain m_fg;
2340
0
    x265_fread((char* )&m_fg, sizeof(bool) * 3 + sizeof(uint8_t), 1, filmgrain, errorMessage);
2341
0
    m_filmGrain->m_filmGrainCharacteristicsCancelFlag = m_fg.m_filmGrainCharacteristicsCancelFlag;
2342
0
    m_filmGrain->m_filmGrainCharacteristicsPersistenceFlag = m_fg.m_filmGrainCharacteristicsPersistenceFlag;
2343
0
    m_filmGrain->m_filmGrainModelId = m_fg.m_filmGrainModelId;
2344
0
    m_filmGrain->m_separateColourDescriptionPresentFlag = m_fg.m_separateColourDescriptionPresentFlag;
2345
0
    if (m_filmGrain->m_separateColourDescriptionPresentFlag)
2346
0
    {
2347
0
        ColourDescription m_clr;
2348
0
        x265_fread((char* )&m_clr, sizeof(bool) + sizeof(uint8_t) * 5, 1, filmgrain, errorMessage);
2349
0
        m_filmGrain->m_filmGrainBitDepthLumaMinus8 = m_clr.m_filmGrainBitDepthLumaMinus8;
2350
0
        m_filmGrain->m_filmGrainBitDepthChromaMinus8 = m_clr.m_filmGrainBitDepthChromaMinus8;
2351
0
        m_filmGrain->m_filmGrainFullRangeFlag = m_clr.m_filmGrainFullRangeFlag;
2352
0
        m_filmGrain->m_filmGrainColourPrimaries = m_clr.m_filmGrainColourPrimaries;
2353
0
        m_filmGrain->m_filmGrainTransferCharacteristics = m_clr.m_filmGrainTransferCharacteristics;
2354
0
        m_filmGrain->m_filmGrainMatrixCoeffs = m_clr.m_filmGrainMatrixCoeffs;
2355
0
    }
2356
0
    FGPresent m_present;
2357
0
    x265_fread((char* )&m_present, sizeof(bool) * 3 + sizeof(uint8_t) * 2, 1, filmgrain, errorMessage);
2358
0
    m_filmGrain->m_blendingModeId = m_present.m_blendingModeId;
2359
0
    m_filmGrain->m_log2ScaleFactor = m_present.m_log2ScaleFactor;
2360
0
    m_filmGrain->m_compModel[0].bPresentFlag = m_present.m_presentFlag[0];
2361
0
    m_filmGrain->m_compModel[1].bPresentFlag = m_present.m_presentFlag[1];
2362
0
    m_filmGrain->m_compModel[2].bPresentFlag = m_present.m_presentFlag[2];
2363
0
    for (int i = 0; i < MAX_NUM_COMPONENT; i++)
2364
0
    {
2365
0
        if (m_filmGrain->m_compModel[i].bPresentFlag)
2366
0
        {
2367
0
            x265_fread((char* )(&m_filmGrain->m_compModel[i].m_filmGrainNumIntensityIntervalMinus1), sizeof(uint8_t), 1, filmgrain, errorMessage);
2368
0
            x265_fread((char* )(&m_filmGrain->m_compModel[i].numModelValues), sizeof(uint8_t), 1, filmgrain, errorMessage);
2369
0
            m_filmGrain->m_compModel[i].intensityValues = (FilmGrainCharacteristics::CompModelIntensityValues* ) malloc(sizeof(FilmGrainCharacteristics::CompModelIntensityValues) * (m_filmGrain->m_compModel[i].m_filmGrainNumIntensityIntervalMinus1+1)) ;
2370
0
            for (int j = 0; j <= m_filmGrain->m_compModel[i].m_filmGrainNumIntensityIntervalMinus1; j++)
2371
0
            {
2372
0
                x265_fread((char* )(&m_filmGrain->m_compModel[i].intensityValues[j].intensityIntervalLowerBound), sizeof(uint8_t), 1, filmgrain, errorMessage);
2373
0
                x265_fread((char* )(&m_filmGrain->m_compModel[i].intensityValues[j].intensityIntervalUpperBound), sizeof(uint8_t), 1, filmgrain, errorMessage);
2374
0
                m_filmGrain->m_compModel[i].intensityValues[j].compModelValue = (int* ) malloc(sizeof(int) * (m_filmGrain->m_compModel[i].numModelValues));
2375
0
                for (int k = 0; k < m_filmGrain->m_compModel[i].numModelValues; k++)
2376
0
                {
2377
0
                    x265_fread((char* )(&m_filmGrain->m_compModel[i].intensityValues[j].compModelValue[k]), sizeof(int), 1, filmgrain, errorMessage);
2378
0
                }
2379
0
            }
2380
0
        }
2381
0
    }
2382
0
}
2383
2384
void compute_film_grain_resolution(int width, int height,
2385
                                   int& apply_units_resolution_log2,
2386
                                   int& apply_horz_resolution,
2387
                                   int& apply_vert_resolution)
2388
0
{
2389
0
    unsigned long log2_width, log2_height;
2390
0
    BSF(log2_width, (unsigned long) width);
2391
0
    BSF(log2_height, (unsigned long) height);
2392
2393
0
    int log2 = (log2_width < log2_height) ? log2_width : log2_height;
2394
0
    apply_units_resolution_log2 = log2;
2395
2396
0
    int unit = 1 << log2;
2397
0
    apply_horz_resolution = width / unit;
2398
0
    apply_vert_resolution = height / unit;
2399
2400
0
    return;
2401
0
}
2402
2403
void FrameEncoder::readAomModel(AomFilmGrainCharacteristics* m_aomFilmGrain, FILE* Aomfilmgrain)
2404
0
{
2405
0
    char const* errorMessage = "Error reading Aom FilmGrain characteristics\n";
2406
0
    AomFilmGrain m_afg;
2407
0
    m_afg.m_chroma_scaling_from_luma = 0;
2408
0
    int bitCount = 0;
2409
0
    bitCount += 4; // payload_less_than_4byte_flag(1) + film_grain_param_set_idx(3)
2410
0
    x265_fread((char*)&m_aomFilmGrain->m_apply_grain, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2411
0
    bitCount++;
2412
0
    x265_fread((char*)&m_aomFilmGrain->m_grain_seed, sizeof(uint16_t), 1, Aomfilmgrain, errorMessage);
2413
0
    bitCount+=16;
2414
0
    x265_fread((char*)&m_aomFilmGrain->m_update_grain, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2415
0
    bitCount++;
2416
0
    x265_fread((char*)&m_aomFilmGrain->m_num_y_points, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2417
0
    bitCount+=4;
2418
2419
0
    if (m_aomFilmGrain->m_num_y_points)
2420
0
    {
2421
0
        m_aomFilmGrain->point_y_value_increment_bits = 8;
2422
0
        bitCount += 3;
2423
0
        m_aomFilmGrain->point_y_scaling_bits = 8;
2424
0
        bitCount += 2;
2425
0
        for (int i = 0; i < m_aomFilmGrain->m_num_y_points; i++)
2426
0
        {
2427
0
            for (int j = 0; j < 2; j++)
2428
0
            {
2429
0
                x265_fread((char*)&m_aomFilmGrain->m_scaling_points_y[i][j], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2430
0
                bitCount+=8;
2431
0
            }
2432
0
        }
2433
0
    }
2434
0
    x265_fread((char*)&m_aomFilmGrain->m_num_cb_points, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2435
0
    bitCount+=4;
2436
0
    if (m_aomFilmGrain->m_num_cb_points)
2437
0
    {
2438
0
        m_aomFilmGrain->point_cb_value_increment_bits = 8;
2439
0
        bitCount += 3;
2440
0
        m_aomFilmGrain->point_cb_scaling_bits = 8;
2441
0
        bitCount += 2;
2442
0
        m_aomFilmGrain->cb_scaling_offset = 0;
2443
0
        bitCount += 8;
2444
0
        for (int i = 0; i < m_aomFilmGrain->m_num_cb_points; i++)
2445
0
        {
2446
0
            for (int j = 0; j < 2; j++)
2447
0
            {
2448
0
                x265_fread((char*)&m_aomFilmGrain->m_scaling_points_cb[i][j], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2449
0
                bitCount+=8;
2450
0
            }
2451
0
        }
2452
0
    }
2453
0
    x265_fread((char*)&m_aomFilmGrain->m_num_cr_points, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2454
0
    bitCount+=4;
2455
0
    if (m_aomFilmGrain->m_num_cr_points)
2456
0
    {
2457
0
        m_aomFilmGrain->point_cr_value_increment_bits = 8;
2458
0
        bitCount += 3;
2459
0
        m_aomFilmGrain->point_cr_scaling_bits = 8;
2460
0
        bitCount += 2;
2461
0
        m_aomFilmGrain->cr_scaling_offset = 0;
2462
0
        bitCount += 8;
2463
0
        for (int i = 0; i < m_aomFilmGrain->m_num_cr_points; i++)
2464
0
        {
2465
0
            for (int j = 0; j < 2; j++)
2466
0
            {
2467
0
                x265_fread((char*)&m_aomFilmGrain->m_scaling_points_cr[i][j], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2468
0
                bitCount+=8;
2469
0
            }
2470
0
        }
2471
0
    }
2472
0
    x265_fread((char*)&m_aomFilmGrain->m_scaling_shift, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2473
0
    bitCount+=2;
2474
0
    x265_fread((char*)&m_aomFilmGrain->m_ar_coeff_lag, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2475
0
    bitCount+=2;
2476
0
    if (m_aomFilmGrain->m_num_y_points)
2477
0
    {
2478
0
        bitCount += 2;
2479
0
        for (int i = 0; i < 24; i++)
2480
0
        {
2481
0
            x265_fread((char*)&m_aomFilmGrain->m_ar_coeffs_y[i], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2482
0
            bitCount+=8;
2483
0
        }
2484
0
    }
2485
0
    if (m_aomFilmGrain->m_num_cb_points || m_afg.m_chroma_scaling_from_luma)
2486
0
    {
2487
0
        bitCount += 2;
2488
0
        for (int i = 0; i < 25; i++)
2489
0
        {
2490
0
            x265_fread((char*)&m_aomFilmGrain->m_ar_coeffs_cb[i], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2491
0
            bitCount+=8;
2492
0
        }
2493
0
    }
2494
0
    if (m_aomFilmGrain->m_num_cr_points || m_afg.m_chroma_scaling_from_luma)
2495
0
    {
2496
0
        bitCount += 2;
2497
0
        for (int i = 0; i < 25; i++)
2498
0
        {
2499
0
            x265_fread((char*)&m_aomFilmGrain->m_ar_coeffs_cr[i], sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2500
0
            bitCount+=8;
2501
0
        }
2502
0
    }
2503
0
    x265_fread((char*)&m_aomFilmGrain->m_ar_coeff_shift, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2504
0
    bitCount+=2;
2505
0
    x265_fread((char*)&m_aomFilmGrain->m_grain_scale_shift, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2506
0
    bitCount+=2;
2507
0
    if (m_aomFilmGrain->m_num_cb_points)
2508
0
    {
2509
0
        x265_fread((char*)&m_aomFilmGrain->m_cb_mult, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2510
0
        bitCount += 8;
2511
0
        x265_fread((char*)&m_aomFilmGrain->m_cb_luma_mult, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2512
0
        bitCount += 8;
2513
0
        x265_fread((char*)&m_aomFilmGrain->m_cb_offset, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2514
0
        bitCount += 9;
2515
0
    }
2516
0
    if (m_aomFilmGrain->m_num_cr_points)
2517
0
    {
2518
0
        x265_fread((char*)&m_aomFilmGrain->m_cr_mult, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2519
0
        bitCount += 8;
2520
0
        x265_fread((char*)&m_aomFilmGrain->m_cr_luma_mult, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2521
0
        bitCount += 8;
2522
0
        x265_fread((char*)&m_aomFilmGrain->m_cr_offset, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2523
0
        bitCount += 9;
2524
0
    }
2525
0
    x265_fread((char*)&m_aomFilmGrain->m_overlap_flag, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2526
0
    bitCount++;
2527
0
    x265_fread((char*)&m_aomFilmGrain->m_clip_to_restricted_range, sizeof(int32_t), 1, Aomfilmgrain, errorMessage);
2528
0
    bitCount++;
2529
2530
0
    m_aomFilmGrain->luma_only_flag = m_aomFilmGrain->m_num_cb_points == 0 && m_aomFilmGrain->m_num_cr_points == 0;
2531
0
    bitCount++;
2532
0
    m_aomFilmGrain->subsamplingX = CHROMA_H_SHIFT(m_param->internalCsp);
2533
0
    m_aomFilmGrain->subsamplingY = CHROMA_V_SHIFT(m_param->internalCsp);
2534
0
    if (!m_aomFilmGrain->luma_only_flag)
2535
0
        bitCount += 2; // subsampling_x(1) + subsampling_y(1)
2536
0
    compute_film_grain_resolution(m_param->sourceWidth, m_param->sourceHeight, m_aomFilmGrain->units_resolution_log2,
2537
0
        m_aomFilmGrain->horz_resolution, m_aomFilmGrain->vert_resolution);
2538
0
    bitCount += 28; // apply_units_resolution_log2(4) + apply_horz_resolution(12) + apply_vert_resolution(12)
2539
0
    m_aomFilmGrain->predict_scaling_flag = 0;
2540
0
    bitCount++;
2541
0
    m_aomFilmGrain->predict_y_scaling_flag = 0;
2542
0
    m_aomFilmGrain->predict_cb_scaling_flag = 0;
2543
0
    m_aomFilmGrain->predict_cr_scaling_flag = 0;
2544
0
    m_aomFilmGrain->m_bitDepth = m_param->internalBitDepth;
2545
0
    bitCount++; // videosingnaltypepresentflag
2546
0
    if (m_frame[0]->m_encData->m_slice->m_sps->vuiParameters.videoSignalTypePresentFlag) bitCount += 4; // bit_depth_minus8(3) + cicp_info_present_flag(1)
2547
0
    if (m_frame[0]->m_encData->m_slice->m_sps->vuiParameters.colourDescriptionPresentFlag) bitCount += 25; // colourPrimaries(8) + transferCharacteristics(8) + matrixCoefficients(8)+ videoFullRangeFlag(1)
2548
0
    if (!m_aomFilmGrain->luma_only_flag) {
2549
0
        m_aomFilmGrain->m_chroma_scaling_from_luma = 0;
2550
0
        bitCount++;
2551
0
    }
2552
2553
0
    m_aomFilmGrain->payload_size = (bitCount + 8 - 1) / 8;
2554
0
    m_aomFilmGrain->payload_bits = m_aomFilmGrain->payload_size < 4 ? 2 : 8;
2555
0
    bitCount += m_aomFilmGrain->payload_bits;
2556
0
    m_aomFilmGrain->payload_size = (bitCount + 8 - 1) / 8;
2557
0
}
2558
2559
#if ENABLE_LIBVMAF
2560
void FrameEncoder::vmafFrameLevelScore()
2561
{
2562
    PicYuv *fenc = m_frame[0]->m_fencPic;
2563
    PicYuv *recon = m_frame[0]->m_reconPic[0];
2564
2565
    x265_vmaf_framedata *vmafframedata = (x265_vmaf_framedata*)x265_malloc(sizeof(x265_vmaf_framedata));
2566
    if (!vmafframedata)
2567
    {
2568
        x265_log(NULL, X265_LOG_ERROR, "vmaf frame data alloc failed\n");
2569
    }
2570
2571
    vmafframedata->height = fenc->m_picHeight;
2572
    vmafframedata->width = fenc->m_picWidth;
2573
    vmafframedata->frame_set = 0;
2574
    vmafframedata->internalBitDepth = m_param->internalBitDepth;
2575
    vmafframedata->reference_frame = fenc;
2576
    vmafframedata->distorted_frame = recon;
2577
    fenc->m_vmafScore = x265_calculate_vmaf_framelevelscore(m_param,vmafframedata);
2578
2579
    if (vmafframedata)
2580
    x265_free(vmafframedata);
2581
}
2582
#endif
2583
2584
Frame** FrameEncoder::getEncodedPicture(NALList& output)
2585
0
{
2586
0
    if (m_frame[0] && (m_param->numLayers <= 1 || (MAX_LAYERS > 1 && m_frame[1])))
2587
0
    {
2588
        /* block here until worker thread completes */
2589
0
        m_done.wait();
2590
2591
0
        for (int i = 0; i < m_param->numLayers; i++)
2592
0
        {
2593
0
            m_retFrameBuffer[i] = m_frame[i];
2594
0
            m_frame[i] = NULL;
2595
0
            m_prevOutputTime[i] = x265_mdate();
2596
0
        }
2597
0
        output.takeContents(m_nalList);
2598
0
        return m_retFrameBuffer;
2599
0
    }
2600
2601
0
    return NULL;
2602
0
}
2603
}