/src/libvpx/vp9/encoder/vp9_multi_thread.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2017 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include <assert.h> |
12 | | |
13 | | #include "vpx_util/vpx_pthread.h" |
14 | | #include "vp9/encoder/vp9_encoder.h" |
15 | | #include "vp9/encoder/vp9_ethread.h" |
16 | | #include "vp9/encoder/vp9_multi_thread.h" |
17 | | #include "vp9/encoder/vp9_temporal_filter.h" |
18 | | |
19 | | void *vp9_enc_grp_get_next_job(MultiThreadHandle *multi_thread_ctxt, |
20 | 0 | int tile_id) { |
21 | 0 | RowMTInfo *row_mt_info; |
22 | 0 | JobQueueHandle *job_queue_hdl = NULL; |
23 | 0 | void *next = NULL; |
24 | 0 | JobNode *job_info = NULL; |
25 | 0 | #if CONFIG_MULTITHREAD |
26 | 0 | pthread_mutex_t *mutex_handle = NULL; |
27 | 0 | #endif |
28 | |
|
29 | 0 | row_mt_info = (RowMTInfo *)(&multi_thread_ctxt->row_mt_info[tile_id]); |
30 | 0 | job_queue_hdl = (JobQueueHandle *)&row_mt_info->job_queue_hdl; |
31 | 0 | #if CONFIG_MULTITHREAD |
32 | 0 | mutex_handle = &row_mt_info->job_mutex; |
33 | 0 | #endif |
34 | | |
35 | | // lock the mutex for queue access |
36 | 0 | #if CONFIG_MULTITHREAD |
37 | 0 | pthread_mutex_lock(mutex_handle); |
38 | 0 | #endif |
39 | 0 | next = job_queue_hdl->next; |
40 | 0 | if (next != NULL) { |
41 | 0 | JobQueue *job_queue = (JobQueue *)next; |
42 | 0 | job_info = &job_queue->job_info; |
43 | | // Update the next job in the queue |
44 | 0 | job_queue_hdl->next = job_queue->next; |
45 | 0 | job_queue_hdl->num_jobs_acquired++; |
46 | 0 | } |
47 | |
|
48 | 0 | #if CONFIG_MULTITHREAD |
49 | 0 | pthread_mutex_unlock(mutex_handle); |
50 | 0 | #endif |
51 | |
|
52 | 0 | return job_info; |
53 | 0 | } |
54 | | |
55 | | void vp9_row_mt_alloc_rd_thresh(VP9_COMP *const cpi, |
56 | 0 | TileDataEnc *const this_tile) { |
57 | 0 | VP9_COMMON *const cm = &cpi->common; |
58 | 0 | const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; |
59 | 0 | int i; |
60 | |
|
61 | 0 | if (this_tile->row_base_thresh_freq_fact != NULL) { |
62 | 0 | if (sb_rows <= this_tile->sb_rows) { |
63 | 0 | return; |
64 | 0 | } |
65 | 0 | vpx_free(this_tile->row_base_thresh_freq_fact); |
66 | 0 | this_tile->row_base_thresh_freq_fact = NULL; |
67 | 0 | } |
68 | 0 | CHECK_MEM_ERROR( |
69 | 0 | &cm->error, this_tile->row_base_thresh_freq_fact, |
70 | 0 | (int *)vpx_calloc(sb_rows * BLOCK_SIZES * MAX_MODES, |
71 | 0 | sizeof(*(this_tile->row_base_thresh_freq_fact)))); |
72 | 0 | for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++) |
73 | 0 | this_tile->row_base_thresh_freq_fact[i] = RD_THRESH_INIT_FACT; |
74 | 0 | this_tile->sb_rows = sb_rows; |
75 | 0 | } |
76 | | |
77 | 0 | void vp9_row_mt_mem_alloc(VP9_COMP *cpi) { |
78 | 0 | struct VP9Common *cm = &cpi->common; |
79 | 0 | MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; |
80 | 0 | int tile_row, tile_col; |
81 | 0 | const int tile_cols = 1 << cm->log2_tile_cols; |
82 | 0 | const int tile_rows = 1 << cm->log2_tile_rows; |
83 | 0 | const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; |
84 | 0 | int jobs_per_tile_col, total_jobs; |
85 | | |
86 | | // Allocate memory that is large enough for all row_mt stages. First pass |
87 | | // uses 16x16 block size. |
88 | 0 | jobs_per_tile_col = VPXMAX(cm->mb_rows, sb_rows); |
89 | | // Calculate the total number of jobs |
90 | 0 | total_jobs = jobs_per_tile_col * tile_cols; |
91 | |
|
92 | 0 | multi_thread_ctxt->allocated_tile_cols = tile_cols; |
93 | 0 | multi_thread_ctxt->allocated_tile_rows = tile_rows; |
94 | 0 | multi_thread_ctxt->allocated_vert_unit_rows = jobs_per_tile_col; |
95 | |
|
96 | 0 | CHECK_MEM_ERROR(&cm->error, multi_thread_ctxt->job_queue, |
97 | 0 | (JobQueue *)vpx_memalign(32, total_jobs * sizeof(JobQueue))); |
98 | |
|
99 | 0 | #if CONFIG_MULTITHREAD |
100 | | // Create mutex for each tile |
101 | 0 | for (tile_col = 0; tile_col < tile_cols; tile_col++) { |
102 | 0 | RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col]; |
103 | 0 | pthread_mutex_init(&row_mt_info->job_mutex, NULL); |
104 | 0 | } |
105 | 0 | #endif |
106 | | |
107 | | // Allocate memory for row based multi-threading |
108 | 0 | for (tile_col = 0; tile_col < tile_cols; tile_col++) { |
109 | 0 | TileDataEnc *this_tile = &cpi->tile_data[tile_col]; |
110 | 0 | vp9_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, jobs_per_tile_col); |
111 | 0 | } |
112 | | |
113 | | // Assign the sync pointer of tile row zero for every tile row > 0 |
114 | 0 | for (tile_row = 1; tile_row < tile_rows; tile_row++) { |
115 | 0 | for (tile_col = 0; tile_col < tile_cols; tile_col++) { |
116 | 0 | TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; |
117 | 0 | TileDataEnc *this_col_tile = &cpi->tile_data[tile_col]; |
118 | 0 | this_tile->row_mt_sync = this_col_tile->row_mt_sync; |
119 | 0 | } |
120 | 0 | } |
121 | | |
122 | | // Calculate the number of vertical units in the given tile row |
123 | 0 | for (tile_row = 0; tile_row < tile_rows; tile_row++) { |
124 | 0 | TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols]; |
125 | 0 | TileInfo *tile_info = &this_tile->tile_info; |
126 | 0 | multi_thread_ctxt->num_tile_vert_sbs[tile_row] = |
127 | 0 | get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2); |
128 | 0 | } |
129 | 0 | } |
130 | | |
131 | 2.98k | void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) { |
132 | 2.98k | MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; |
133 | 2.98k | int tile_col; |
134 | 2.98k | #if CONFIG_MULTITHREAD |
135 | 2.98k | int tile_row; |
136 | 2.98k | #endif |
137 | | |
138 | | // Deallocate memory for job queue |
139 | 2.98k | if (multi_thread_ctxt->job_queue) { |
140 | 0 | vpx_free(multi_thread_ctxt->job_queue); |
141 | 0 | multi_thread_ctxt->job_queue = NULL; |
142 | 0 | } |
143 | | |
144 | 2.98k | #if CONFIG_MULTITHREAD |
145 | | // Destroy mutex for each tile |
146 | 2.98k | for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols; |
147 | 2.98k | tile_col++) { |
148 | 0 | RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col]; |
149 | 0 | pthread_mutex_destroy(&row_mt_info->job_mutex); |
150 | 0 | } |
151 | 2.98k | #endif |
152 | | |
153 | | // Free row based multi-threading sync memory |
154 | 2.98k | for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols; |
155 | 2.98k | tile_col++) { |
156 | 0 | TileDataEnc *this_tile = &cpi->tile_data[tile_col]; |
157 | 0 | vp9_row_mt_sync_mem_dealloc(&this_tile->row_mt_sync); |
158 | 0 | } |
159 | | |
160 | 2.98k | #if CONFIG_MULTITHREAD |
161 | 2.98k | for (tile_row = 0; tile_row < multi_thread_ctxt->allocated_tile_rows; |
162 | 2.98k | tile_row++) { |
163 | 0 | for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols; |
164 | 0 | tile_col++) { |
165 | 0 | TileDataEnc *this_tile = |
166 | 0 | &cpi->tile_data[tile_row * multi_thread_ctxt->allocated_tile_cols + |
167 | 0 | tile_col]; |
168 | 0 | if (this_tile->row_base_thresh_freq_fact != NULL) { |
169 | 0 | vpx_free(this_tile->row_base_thresh_freq_fact); |
170 | 0 | this_tile->row_base_thresh_freq_fact = NULL; |
171 | 0 | } |
172 | 0 | } |
173 | 0 | } |
174 | 2.98k | #endif |
175 | | |
176 | 2.98k | multi_thread_ctxt->allocated_tile_cols = 0; |
177 | 2.98k | multi_thread_ctxt->allocated_tile_rows = 0; |
178 | 2.98k | multi_thread_ctxt->allocated_vert_unit_rows = 0; |
179 | 2.98k | } |
180 | | |
181 | 0 | void vp9_multi_thread_tile_init(VP9_COMP *cpi) { |
182 | 0 | VP9_COMMON *const cm = &cpi->common; |
183 | 0 | const int tile_cols = 1 << cm->log2_tile_cols; |
184 | 0 | const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; |
185 | 0 | int i; |
186 | |
|
187 | 0 | for (i = 0; i < tile_cols; i++) { |
188 | 0 | TileDataEnc *this_tile = &cpi->tile_data[i]; |
189 | 0 | int jobs_per_tile_col = cpi->oxcf.pass == 1 ? cm->mb_rows : sb_rows; |
190 | | |
191 | | // Initialize cur_col to -1 for all rows. |
192 | 0 | memset(this_tile->row_mt_sync.cur_col, -1, |
193 | 0 | sizeof(*this_tile->row_mt_sync.cur_col) * jobs_per_tile_col); |
194 | 0 | vp9_zero(this_tile->fp_data); |
195 | 0 | this_tile->fp_data.image_data_start_row = INVALID_ROW; |
196 | 0 | } |
197 | 0 | } |
198 | | |
199 | | void vp9_assign_tile_to_thread(MultiThreadHandle *multi_thread_ctxt, |
200 | 0 | int tile_cols, int num_workers) { |
201 | 0 | int tile_id = 0; |
202 | 0 | int i; |
203 | | |
204 | | // Allocating the threads for the tiles |
205 | 0 | for (i = 0; i < num_workers; i++) { |
206 | 0 | multi_thread_ctxt->thread_id_to_tile_id[i] = tile_id++; |
207 | 0 | if (tile_id == tile_cols) tile_id = 0; |
208 | 0 | } |
209 | 0 | } |
210 | | |
211 | | int vp9_get_job_queue_status(MultiThreadHandle *multi_thread_ctxt, |
212 | 0 | int cur_tile_id) { |
213 | 0 | RowMTInfo *row_mt_info; |
214 | 0 | JobQueueHandle *job_queue_hndl; |
215 | 0 | #if CONFIG_MULTITHREAD |
216 | 0 | pthread_mutex_t *mutex; |
217 | 0 | #endif |
218 | 0 | int num_jobs_remaining; |
219 | |
|
220 | 0 | row_mt_info = &multi_thread_ctxt->row_mt_info[cur_tile_id]; |
221 | 0 | job_queue_hndl = &row_mt_info->job_queue_hdl; |
222 | 0 | #if CONFIG_MULTITHREAD |
223 | 0 | mutex = &row_mt_info->job_mutex; |
224 | 0 | #endif |
225 | |
|
226 | 0 | #if CONFIG_MULTITHREAD |
227 | 0 | pthread_mutex_lock(mutex); |
228 | 0 | #endif |
229 | 0 | num_jobs_remaining = |
230 | 0 | multi_thread_ctxt->jobs_per_tile_col - job_queue_hndl->num_jobs_acquired; |
231 | 0 | #if CONFIG_MULTITHREAD |
232 | 0 | pthread_mutex_unlock(mutex); |
233 | 0 | #endif |
234 | |
|
235 | 0 | return (num_jobs_remaining); |
236 | 0 | } |
237 | | |
238 | 0 | void vp9_prepare_job_queue(VP9_COMP *cpi, JOB_TYPE job_type) { |
239 | 0 | VP9_COMMON *const cm = &cpi->common; |
240 | 0 | MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; |
241 | 0 | JobQueue *job_queue = multi_thread_ctxt->job_queue; |
242 | 0 | const int tile_cols = 1 << cm->log2_tile_cols; |
243 | 0 | int job_row_num, jobs_per_tile, jobs_per_tile_col = 0, total_jobs; |
244 | 0 | const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; |
245 | 0 | int tile_col, i; |
246 | |
|
247 | 0 | switch (job_type) { |
248 | 0 | case ENCODE_JOB: jobs_per_tile_col = sb_rows; break; |
249 | 0 | case FIRST_PASS_JOB: jobs_per_tile_col = cm->mb_rows; break; |
250 | 0 | case ARNR_JOB: |
251 | 0 | jobs_per_tile_col = ((cm->mi_rows + TF_ROUND) >> TF_SHIFT); |
252 | 0 | break; |
253 | 0 | default: assert(0); |
254 | 0 | } |
255 | | |
256 | 0 | total_jobs = jobs_per_tile_col * tile_cols; |
257 | |
|
258 | 0 | multi_thread_ctxt->jobs_per_tile_col = jobs_per_tile_col; |
259 | | // memset the entire job queue buffer to zero |
260 | 0 | memset(job_queue, 0, total_jobs * sizeof(JobQueue)); |
261 | | |
262 | | // Job queue preparation |
263 | 0 | for (tile_col = 0; tile_col < tile_cols; tile_col++) { |
264 | 0 | RowMTInfo *tile_ctxt = &multi_thread_ctxt->row_mt_info[tile_col]; |
265 | 0 | JobQueue *job_queue_curr, *job_queue_temp; |
266 | 0 | int tile_row = 0; |
267 | |
|
268 | 0 | tile_ctxt->job_queue_hdl.next = (void *)job_queue; |
269 | 0 | tile_ctxt->job_queue_hdl.num_jobs_acquired = 0; |
270 | |
|
271 | 0 | job_queue_curr = job_queue; |
272 | 0 | job_queue_temp = job_queue; |
273 | | |
274 | | // loop over all the vertical rows |
275 | 0 | for (job_row_num = 0, jobs_per_tile = 0; job_row_num < jobs_per_tile_col; |
276 | 0 | job_row_num++, jobs_per_tile++) { |
277 | 0 | job_queue_curr->job_info.vert_unit_row_num = job_row_num; |
278 | 0 | job_queue_curr->job_info.tile_col_id = tile_col; |
279 | 0 | job_queue_curr->job_info.tile_row_id = tile_row; |
280 | 0 | job_queue_curr->next = (void *)(job_queue_temp + 1); |
281 | 0 | job_queue_curr = ++job_queue_temp; |
282 | |
|
283 | 0 | if (ENCODE_JOB == job_type) { |
284 | 0 | if (jobs_per_tile >= |
285 | 0 | multi_thread_ctxt->num_tile_vert_sbs[tile_row] - 1) { |
286 | 0 | tile_row++; |
287 | 0 | jobs_per_tile = -1; |
288 | 0 | } |
289 | 0 | } |
290 | 0 | } |
291 | | |
292 | | // Set the last pointer to NULL |
293 | 0 | job_queue_curr += -1; |
294 | 0 | job_queue_curr->next = (void *)NULL; |
295 | | |
296 | | // Move to the next tile |
297 | 0 | job_queue += jobs_per_tile_col; |
298 | 0 | } |
299 | |
|
300 | 0 | for (i = 0; i < cpi->num_workers; i++) { |
301 | 0 | EncWorkerData *thread_data; |
302 | 0 | thread_data = &cpi->tile_thr_data[i]; |
303 | 0 | thread_data->thread_id = i; |
304 | |
|
305 | 0 | for (tile_col = 0; tile_col < tile_cols; tile_col++) |
306 | 0 | thread_data->tile_completion_status[tile_col] = 0; |
307 | 0 | } |
308 | 0 | } |
309 | | |
310 | | int vp9_get_tiles_proc_status(MultiThreadHandle *multi_thread_ctxt, |
311 | | int *tile_completion_status, int *cur_tile_id, |
312 | 0 | int tile_cols) { |
313 | 0 | int tile_col; |
314 | 0 | int tile_id = -1; // Stores the tile ID with minimum proc done |
315 | 0 | int max_num_jobs_remaining = 0; |
316 | 0 | int num_jobs_remaining; |
317 | | |
318 | | // Mark the completion to avoid check in the loop |
319 | 0 | tile_completion_status[*cur_tile_id] = 1; |
320 | | // Check for the status of all the tiles |
321 | 0 | for (tile_col = 0; tile_col < tile_cols; tile_col++) { |
322 | 0 | if (tile_completion_status[tile_col] == 0) { |
323 | 0 | num_jobs_remaining = |
324 | 0 | vp9_get_job_queue_status(multi_thread_ctxt, tile_col); |
325 | | // Mark the completion to avoid checks during future switches across tiles |
326 | 0 | if (num_jobs_remaining == 0) tile_completion_status[tile_col] = 1; |
327 | 0 | if (num_jobs_remaining > max_num_jobs_remaining) { |
328 | 0 | max_num_jobs_remaining = num_jobs_remaining; |
329 | 0 | tile_id = tile_col; |
330 | 0 | } |
331 | 0 | } |
332 | 0 | } |
333 | |
|
334 | 0 | if (-1 == tile_id) { |
335 | 0 | return 1; |
336 | 0 | } else { |
337 | | // Update the cur ID to the next tile ID that will be processed, |
338 | | // which will be the least processed tile |
339 | 0 | *cur_tile_id = tile_id; |
340 | 0 | return 0; |
341 | 0 | } |
342 | 0 | } |