Coverage Report

Created: 2026-03-08 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/encoder/threadedme.h
Line
Count
Source
1
/*****************************************************************************
2
 * Copyright (C) 2013-2025 MulticoreWare, Inc
3
 *
4
 * Authors: Shashank Pathipati <shashank.pathipati@multicorewareinc.com>
5
 *          Somu Vineela <somu@mutlicorewareinc.com>
6
 *
7
 * This program is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 2 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * This program is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
 *
21
 * This program is also available under a commercial proprietary license.
22
 * For more information, contact us at license @ x265.com.
23
 *****************************************************************************/
24
25
#ifndef THREADED_ME_H
26
#define THREADED_ME_H
27
28
#include "common.h"
29
#include "threading.h"
30
#include "threadpool.h"
31
#include "cudata.h"
32
#include "lowres.h"
33
#include "frame.h"
34
#include "analysis.h"
35
#include "mv.h"
36
37
#include <queue>
38
#include <vector>
39
#include <fstream>
40
41
namespace X265_NS {
42
43
extern int g_puStartIdx[128][8];
44
45
class Encoder;
46
class Analysis;
47
class FrameEncoder;
48
49
struct PUBlock {
50
    uint32_t width;
51
    uint32_t height;
52
    PartSize partsize;
53
    bool isAmp;
54
};
55
56
const PUBlock g_puLookup[MAX_NUM_PU_SIZES] = {
57
    { 8,   4, SIZE_2NxN,  0 },
58
    { 4,   8, SIZE_Nx2N,  0 },
59
    { 8,   8, SIZE_2Nx2N, 0 },
60
    { 16,  4, SIZE_2NxnU, 1 },
61
    { 16, 12, SIZE_2NxnD, 1 },
62
    { 4,  16, SIZE_nLx2N, 1 },
63
    { 12, 16, SIZE_nRx2N, 1 },
64
    { 16,  8, SIZE_2NxN,  0 },
65
    { 8,  16, SIZE_Nx2N,  0 },
66
    { 16, 16, SIZE_2Nx2N, 0 },
67
    { 32,  8, SIZE_2NxnU, 1 },
68
    { 32, 24, SIZE_2NxnD, 1 },
69
    { 8,  32, SIZE_nLx2N, 1 },
70
    { 24, 32, SIZE_nRx2N, 1 },
71
    { 32, 16, SIZE_2NxN,  0 },
72
    { 16, 32, SIZE_Nx2N,  0 },
73
    { 32, 32, SIZE_2Nx2N, 0 },
74
    { 64, 16, SIZE_2NxnU, 1 },
75
    { 64, 48, SIZE_2NxnD, 1 },
76
    { 16, 64, SIZE_nLx2N, 1 },
77
    { 48, 64, SIZE_nRx2N, 1 },
78
    { 64, 32, SIZE_2NxN,  0 },
79
    { 32, 64, SIZE_Nx2N,  0 },
80
    { 64, 64, SIZE_2Nx2N, 0 }
81
};
82
83
struct CTUTaskData
84
{
85
    CUData& ctuData;
86
    CUGeom& ctuGeom;
87
    Frame& frame;
88
};
89
90
struct CTUBlockTask
91
{
92
    int row;
93
    int col;
94
    int width;
95
    int height;
96
    Frame* frame;
97
    class FrameEncoder* frameEnc;
98
    unsigned long long seq; /* monotonic sequence to preserve enqueue order */
99
};
100
101
struct PUData
102
{
103
    PartSize part;
104
    const CUGeom* cuGeom;
105
    int puOffset;
106
    int areaId;
107
    int finalIdx;
108
    int qp;
109
};
110
111
struct MEData
112
{
113
    MV       mv[2];
114
    MV       mvp[2];
115
    uint32_t mvCost[2];
116
    int      ref[2];
117
    int      bits;
118
    uint32_t cost;
119
};
120
121
struct CTUTask
122
{
123
    uint64_t seq;
124
    int row;
125
    int col;
126
    int width;
127
    int height;
128
    int layer;
129
130
    CUData* ctu;
131
    CUGeom* geom;
132
    Frame* frame;
133
    FrameEncoder* frameEnc;
134
};
135
136
137
struct CompareCTUTask {
138
0
    bool operator()(const CTUTask& a, const CTUTask& b) const {
139
0
        if (a.frame->m_poc == b.frame->m_poc)
140
0
        {
141
0
            int a_pos = a.row + a.col;
142
0
            int b_pos = b.row + b.col;
143
0
            if (a_pos != b_pos) return a_pos > b_pos;
144
0
        }
145
146
        /* Compare by sequence number to preserve FIFO enqueue order.
147
         * priority_queue in C++ is a max-heap, so return true when a.seq > b.seq
148
         * to make smaller seq (earlier enqueue) the top() element. */
149
0
        return a.seq > b.seq;
150
0
    }
151
};
152
153
/**
154
 * @brief Threaded motion-estimation module that schedules CTU blocks across worker threads.
155
 *
156
 * Owns per-worker analysis state (ThreadLocalData), manages the CTU task queues,
157
 * and exposes a JobProvider interface for the thread pool to execute MVP
158
 * derivation and ME searches in parallel.
159
 */
160
class ThreadedME: public JobProvider, public Thread
161
{
162
public:
163
    x265_param*             m_param;
164
    Encoder&                m_enc;
165
166
    std::priority_queue<CTUTask, std::vector<CTUTask>, CompareCTUTask>  m_taskQueue;
167
    Lock                    m_taskQueueLock;
168
    Event                   m_taskEvent;
169
170
    volatile bool           m_active;
171
    unsigned long long      m_enqueueSeq;
172
173
    ThreadLocalData*        m_tld;
174
    int                     m_tldCount;
175
176
#ifdef DETAILED_CU_STATS
177
    CUStats                 m_cuStats;
178
#endif
179
180
    /**
181
     * @brief Construct the ThreadedME manager; call create() before use.
182
     */
183
0
    ThreadedME(x265_param* param, Encoder& enc): m_param(param), m_enc(enc) {};
184
    
185
    /**
186
     * @brief Creates threadpool, thread local data and registers itself as a job provider
187
     */
188
    bool create();
189
190
    /**
191
     * @brief Initialize lookup table used to index PU offsets for all valid CTU sizes.
192
     */
193
    void initPuStartIdx();
194
195
    /**
196
     * @brief Enqueue a block of CTUs for motion estimation.
197
     *
198
     * Blocks are queued per FrameEncoder and later moved into the global
199
     * priority queue consumed by worker threads.
200
     */
201
    void enqueueCTUBlock(int row, int col, int width, int height, int layer, FrameEncoder* frameEnc);
202
203
    /**
204
     * @brief Inspect dependency state and enqueue newly-unblocked CTU rows.
205
     *
206
     * Uses external (row-level) and internal (buffered-row) dependencies to
207
     * decide when a row can be split into CTU block tasks.
208
     */
209
    void enqueueReadyRows(int row, int layer, FrameEncoder* frameEnc);
210
211
    /**
212
     * @brief Main dispatcher thread that transfers per-frame tasks into the global queue.
213
     */
214
    void threadMain();
215
216
    /**
217
     * @brief Dequeue a CTU task, derive MVs, and run ME over all supported PU shapes.
218
     *
219
     * Called by worker threads via JobProvider; processes an entire CTU block.
220
     */
221
    void findJob(int workerThreadId);
222
223
    /**
224
     * @brief Stops worker threads
225
     */
226
    void stopJobs();
227
228
    /**
229
     * @brief Cleanup allocated resources
230
     */
231
    void destroy();
232
233
    /**
234
     * @brief Accumulate detailed CU statistics from worker thread local data.
235
     */
236
    void collectStats();
237
};
238
239
// Utils
240
241
/**
242
 * @brief A workaround to init CTUs before processRowEncoder does the same,
243
 * since the CUData is needed before the FrameEncoder initializes it
244
 */
245
void initCTU(CUData& ctu, int row, int col, CTUTask& task);
246
247
};
248
    
249
#endif