/work/x265/source/encoder/frameencoder.h
Line | Count | Source |
1 | | /***************************************************************************** |
2 | | * Copyright (C) 2013-2020 MulticoreWare, Inc |
3 | | * |
4 | | * Authors: Shin Yee <shinyee@multicorewareinc.com> |
5 | | * Min Chen <chenm003@163.com> |
6 | | * |
7 | | * This program is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 2 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * This program is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
20 | | * |
21 | | * This program is also available under a commercial proprietary license. |
22 | | * For more information, contact us at license @ x265.com. |
23 | | *****************************************************************************/ |
24 | | |
25 | | #ifndef X265_FRAMEENCODER_H |
26 | | #define X265_FRAMEENCODER_H |
27 | | |
28 | | #include "common.h" |
29 | | #include "wavefront.h" |
30 | | #include "bitstream.h" |
31 | | #include "frame.h" |
32 | | #include "picyuv.h" |
33 | | #include "md5.h" |
34 | | |
35 | | #include "analysis.h" |
36 | | #include "sao.h" |
37 | | |
38 | | #include "entropy.h" |
39 | | #include "framefilter.h" |
40 | | #include "ratecontrol.h" |
41 | | #include "reference.h" |
42 | | #include "nal.h" |
43 | | #include "temporalfilter.h" |
44 | | #include "threadedme.h" |
45 | | #include <queue> |
46 | | |
47 | | namespace X265_NS { |
48 | | // private x265 namespace |
49 | | |
50 | | class ThreadPool; |
51 | | class Encoder; |
52 | | |
53 | 0 | #define ANGULAR_MODE_ID 2 |
54 | 0 | #define AMP_ID 3 |
55 | | |
56 | | struct StatisticLog |
57 | | { |
58 | | uint64_t cntInter[4]; |
59 | | uint64_t cntIntra[4]; |
60 | | uint64_t cuInterDistribution[4][INTER_MODES]; |
61 | | uint64_t cuIntraDistribution[4][INTRA_MODES]; |
62 | | uint64_t cntIntraNxN; |
63 | | uint64_t cntSkipCu[4]; |
64 | | uint64_t cntTotalCu[4]; |
65 | | uint64_t totalCu; |
66 | | |
67 | | StatisticLog() |
68 | 0 | { |
69 | 0 | memset(this, 0, sizeof(StatisticLog)); |
70 | 0 | } |
71 | | }; |
72 | | |
73 | | /* manages the state of encoding one row of CTU blocks. When |
74 | | * WPP is active, several rows will be simultaneously encoded. */ |
75 | | struct CTURow |
76 | | { |
77 | | Entropy bufferedEntropy; /* store CTU2 context for next row CTU0 */ |
78 | | Entropy rowGoOnCoder; /* store context between CTUs, code bitstream if !SAO */ |
79 | | unsigned int sliceId; /* store current row slice id */ |
80 | | |
81 | | FrameStats rowStats; |
82 | | |
83 | | /* Threading variables */ |
84 | | |
85 | | /* This lock must be acquired when reading or writing m_active or m_busy */ |
86 | | Lock lock; |
87 | | |
88 | | /* row is ready to run, has no neighbor dependencies. The row may have |
89 | | * external dependencies (reference frame pixels) that prevent it from being |
90 | | * processed, so it may stay with m_active=true for some time before it is |
91 | | * encoded by a worker thread. */ |
92 | | volatile bool active; |
93 | | |
94 | | /* row is being processed by a worker thread. This flag is only true when a |
95 | | * worker thread is within the context of FrameEncoder::processRow(). This |
96 | | * flag is used to detect multiple possible wavefront problems. */ |
97 | | volatile bool busy; |
98 | | |
99 | | /* count of completed CUs in this row */ |
100 | | volatile uint32_t completed; |
101 | | volatile uint32_t avgQPComputed; |
102 | | |
103 | | volatile int reEncode; |
104 | | |
105 | | /* called at the start of each frame to initialize state */ |
106 | | void init(Entropy& initContext, unsigned int sid) |
107 | 3.27k | { |
108 | 3.27k | active = false; |
109 | 3.27k | busy = false; |
110 | 3.27k | completed = 0; |
111 | 3.27k | avgQPComputed = 0; |
112 | 3.27k | sliceId = sid; |
113 | 3.27k | reEncode = 0; |
114 | 3.27k | memset(&rowStats, 0, sizeof(rowStats)); |
115 | 3.27k | rowGoOnCoder.load(initContext); |
116 | 3.27k | } |
117 | | }; |
118 | | |
119 | | /*Film grain characteristics*/ |
120 | | struct FilmGrain |
121 | | { |
122 | | bool m_filmGrainCharacteristicsCancelFlag; |
123 | | bool m_filmGrainCharacteristicsPersistenceFlag; |
124 | | bool m_separateColourDescriptionPresentFlag; |
125 | | uint8_t m_filmGrainModelId; |
126 | | uint8_t m_blendingModeId; |
127 | | uint8_t m_log2ScaleFactor; |
128 | | }; |
129 | | |
130 | | struct ColourDescription |
131 | | { |
132 | | bool m_filmGrainFullRangeFlag; |
133 | | uint8_t m_filmGrainBitDepthLumaMinus8; |
134 | | uint8_t m_filmGrainBitDepthChromaMinus8; |
135 | | uint8_t m_filmGrainColourPrimaries; |
136 | | uint8_t m_filmGrainTransferCharacteristics; |
137 | | uint8_t m_filmGrainMatrixCoeffs; |
138 | | }; |
139 | | |
140 | | struct FGPresent |
141 | | { |
142 | | uint8_t m_blendingModeId; |
143 | | uint8_t m_log2ScaleFactor; |
144 | | bool m_presentFlag[3]; |
145 | | }; |
146 | | |
147 | | struct AomFilmGrain |
148 | | { |
149 | | int32_t m_apply_grain; |
150 | | int32_t m_update_grain; |
151 | | int32_t m_scaling_points_y[14][2]; |
152 | | int32_t m_num_y_points; |
153 | | int32_t m_scaling_points_cb[10][2]; |
154 | | int32_t m_num_cb_points; |
155 | | int32_t m_scaling_points_cr[10][2]; |
156 | | int32_t m_num_cr_points; |
157 | | int32_t m_scaling_shift; |
158 | | int32_t m_ar_coeff_lag; |
159 | | int32_t m_ar_coeffs_y[24]; |
160 | | int32_t m_ar_coeffs_cb[25]; |
161 | | int32_t m_ar_coeffs_cr[25]; |
162 | | int32_t m_ar_coeff_shift; |
163 | | int32_t m_cb_mult; |
164 | | int32_t m_cb_luma_mult; |
165 | | int32_t m_cb_offset; |
166 | | int32_t m_cr_mult; |
167 | | int32_t m_cr_luma_mult; |
168 | | int32_t m_cr_offset; |
169 | | int32_t m_overlap_flag; |
170 | | int32_t m_clip_to_restricted_range; |
171 | | int32_t m_bitDepth; |
172 | | int32_t m_chroma_scaling_from_luma; |
173 | | int32_t m_grain_scale_shift; |
174 | | uint16_t m_grain_seed; |
175 | | }; |
176 | | |
177 | | // Manages the wave-front processing of a single encoding frame |
178 | | class FrameEncoder : public WaveFront, public Thread |
179 | | { |
180 | | public: |
181 | | |
182 | | FrameEncoder(); |
183 | | |
184 | 2.89k | virtual ~FrameEncoder() {} |
185 | | |
186 | | virtual bool init(Encoder *top, int numRows, int numCols); |
187 | | |
188 | | void destroy(); |
189 | | |
190 | | /* triggers encode of a new frame by the worker thread */ |
191 | | bool startCompressFrame(Frame* curFrame[MAX_LAYERS]); |
192 | | |
193 | | /* blocks until worker thread is done, returns access unit */ |
194 | | Frame **getEncodedPicture(NALList& list); |
195 | | |
196 | | void initDecodedPictureHashSEI(int row, int cuAddr, int height, int layer); |
197 | | |
198 | | Event m_enable; |
199 | | Event m_done; |
200 | | Event m_completionEvent; |
201 | | int m_localTldIdx; |
202 | | bool m_reconfigure; /* reconfigure in progress */ |
203 | | volatile bool m_threadActive; |
204 | | volatile bool *m_bAllRowsStop; |
205 | | volatile int m_completionCount; |
206 | | volatile int *m_vbvResetTriggerRow; |
207 | | volatile int m_sliceCnt; |
208 | | |
209 | | uint32_t m_numRows; |
210 | | uint32_t m_numCols; |
211 | | uint32_t m_filterRowDelay; |
212 | | uint32_t m_filterRowDelayCus; |
213 | | uint32_t m_refLagRows; |
214 | | bool m_bUseSao; |
215 | | |
216 | | CTURow* m_rows; |
217 | | uint16_t m_sliceAddrBits; |
218 | | uint32_t m_sliceGroupSize; |
219 | | uint32_t* m_sliceBaseRow; |
220 | | uint32_t* m_sliceMaxBlockRow; |
221 | | int64_t m_rowSliceTotalBits[2]; |
222 | | RateControlEntry m_rce; |
223 | | SEIDecodedPictureHash m_seiReconPictureDigest; |
224 | | |
225 | | uint64_t m_SSDY[MAX_LAYERS]; |
226 | | uint64_t m_SSDU[MAX_LAYERS]; |
227 | | uint64_t m_SSDV[MAX_LAYERS]; |
228 | | double m_ssim[MAX_LAYERS]; |
229 | | uint64_t m_accessUnitBits[MAX_LAYERS]; |
230 | | uint32_t m_ssimCnt[MAX_LAYERS]; |
231 | | |
232 | | volatile int m_activeWorkerCount; // count of workers currently encoding or filtering CTUs |
233 | | volatile int m_totalActiveWorkerCount; // sum of m_activeWorkerCount sampled at end of each CTU |
234 | | volatile int m_activeWorkerCountSamples; // count of times m_activeWorkerCount was sampled (think vbv restarts) |
235 | | volatile int m_countRowBlocks; // count of workers forced to abandon a row because of top dependency |
236 | | int64_t m_startCompressTime[MAX_LAYERS]; // timestamp when frame encoder is given a frame |
237 | | int64_t m_row0WaitTime[MAX_LAYERS]; // timestamp when row 0 is allowed to start |
238 | | int64_t m_allRowsAvailableTime[MAX_LAYERS]; // timestamp when all reference dependencies are resolved |
239 | | int64_t m_endCompressTime[MAX_LAYERS]; // timestamp after all CTUs are compressed |
240 | | int64_t m_endFrameTime[MAX_LAYERS]; // timestamp after RCEnd, NR updates, etc |
241 | | int64_t m_stallStartTime[MAX_LAYERS]; // timestamp when worker count becomes 0 |
242 | | int64_t m_prevOutputTime[MAX_LAYERS]; // timestamp when prev frame was retrieved by API thread |
243 | | int64_t m_slicetypeWaitTime[MAX_LAYERS]; // total elapsed time waiting for decided frame |
244 | | int64_t m_totalWorkerElapsedTime[MAX_LAYERS]; // total elapsed time spent by worker threads processing CTUs |
245 | | int64_t m_totalNoWorkerTime[MAX_LAYERS]; // total elapsed time without any active worker threads |
246 | | int64_t m_totalThreadedMEWait[MAX_LAYERS]; // total time spent waiting by CTUs for ThreadedME |
247 | | int64_t m_totalThreadedMETime[MAX_LAYERS]; // total time spent processing by ThreadedME |
248 | | |
249 | | #if DETAILED_CU_STATS |
250 | | CUStats m_cuStats; |
251 | | #endif |
252 | | |
253 | | Encoder* m_top; |
254 | | x265_param* m_param; |
255 | | Frame* m_frame[MAX_LAYERS]; |
256 | | Frame** m_retFrameBuffer; |
257 | | NoiseReduction* m_nr; |
258 | | ThreadLocalData* m_tld; /* for --no-wpp */ |
259 | | Bitstream* m_outStreams; |
260 | | Bitstream* m_backupStreams; |
261 | | uint32_t* m_substreamSizes; |
262 | | |
263 | | CUGeom* m_cuGeoms; |
264 | | uint32_t* m_ctuGeomMap; |
265 | | |
266 | | Bitstream m_bs; |
267 | | MotionReference m_mref[2][MAX_NUM_REF + 1]; |
268 | | Entropy m_entropyCoder; |
269 | | Entropy m_initSliceContext; |
270 | | FrameFilter m_frameFilter; |
271 | | NALList m_nalList; |
272 | | |
273 | | int m_sLayerId; |
274 | | |
275 | | std::queue<CTUTask> m_tmeTasks; |
276 | | Lock m_tmeTasksLock; |
277 | | |
278 | | struct TMEDependencyState |
279 | | { |
280 | | bool internal; |
281 | | bool external; |
282 | | bool isQueued; |
283 | | }; |
284 | | |
285 | | std::vector<TMEDependencyState> m_tmeDeps; |
286 | | Lock m_tmeDepLock; |
287 | | |
288 | | class WeightAnalysis : public BondedTaskGroup |
289 | | { |
290 | | public: |
291 | | |
292 | | FrameEncoder& master; |
293 | | |
294 | 0 | WeightAnalysis(FrameEncoder& fe) : master(fe) {} |
295 | | |
296 | | void processTasks(int workerThreadId); |
297 | | |
298 | | protected: |
299 | | |
300 | | WeightAnalysis operator=(const WeightAnalysis&); |
301 | | }; |
302 | | |
303 | | protected: |
304 | | |
305 | | bool initializeGeoms(); |
306 | | |
307 | | /* analyze / compress frame, can be run in parallel within reference constraints */ |
308 | | void compressFrame(int layer); |
309 | | |
310 | | /* called by compressFrame to generate final per-row bitstreams */ |
311 | | void encodeSlice(uint32_t sliceAddr, int layer); |
312 | | |
313 | | void threadMain(); |
314 | | int collectCTUStatistics(const CUData& ctu, FrameStats* frameLog); |
315 | | void noiseReductionUpdate(); |
316 | | void writeTrailingSEIMessages(int layer); |
317 | | bool writeToneMapInfo(x265_sei_payload *payload); |
318 | | |
319 | | /* Called by WaveFront::findJob() */ |
320 | | virtual void processRow(int row, int threadId, int layer); |
321 | | virtual void processRowEncoder(int row, ThreadLocalData& tld, int layer); |
322 | | |
323 | 3.58k | void enqueueRowEncoder(int row) { WaveFront::enqueueRow(row * 2 + 0); } |
324 | 2.84k | void enqueueRowFilter(int row) { WaveFront::enqueueRow(row * 2 + 1); } |
325 | 2.84k | void enableRowEncoder(int row) { WaveFront::enableRow(row * 2 + 0); } |
326 | 2.84k | void enableRowFilter(int row) { WaveFront::enableRow(row * 2 + 1); } |
327 | | #if ENABLE_LIBVMAF |
328 | | void vmafFrameLevelScore(); |
329 | | #endif |
330 | | void collectDynDataFrame(int layer); |
331 | | void computeAvgTrainingData(int layer); |
332 | | void collectDynDataRow(CUData& ctu, FrameStats* rowStats); |
333 | | void readModel(FilmGrainCharacteristics* m_filmGrain, FILE* filmgrain); |
334 | | void readAomModel(AomFilmGrainCharacteristics* m_aomFilmGrain, FILE* Aomfilmgrain); |
335 | | }; |
336 | | } |
337 | | |
338 | | #endif // ifndef X265_FRAMEENCODER_H |