/src/libvpx/vp8/encoder/ethreading.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | #include <stddef.h> |
11 | | |
12 | | #include "onyx_int.h" |
13 | | #include "vpx_util/vpx_pthread.h" |
14 | | #include "vp8/common/threading.h" |
15 | | #include "vp8/common/common.h" |
16 | | #include "vp8/common/extend.h" |
17 | | #include "bitstream.h" |
18 | | #include "encodeframe.h" |
19 | | #include "ethreading.h" |
20 | | |
21 | | #if CONFIG_MULTITHREAD |
22 | | |
23 | | extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, |
24 | | int ok_to_skip); |
25 | | |
26 | 0 | static THREADFN thread_loopfilter(void *p_data) { |
27 | 0 | VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1); |
28 | 0 | VP8_COMMON *cm = &cpi->common; |
29 | |
|
30 | 0 | while (1) { |
31 | 0 | if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break; |
32 | | |
33 | 0 | if (vp8_sem_wait(&cpi->h_event_start_lpf) == 0) { |
34 | | /* we're shutting down */ |
35 | 0 | if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break; |
36 | | |
37 | 0 | vp8_loopfilter_frame(cpi, cm); |
38 | |
|
39 | 0 | vp8_sem_post(&cpi->h_event_end_lpf); |
40 | 0 | } |
41 | 0 | } |
42 | |
|
43 | 0 | return THREAD_EXIT_SUCCESS; |
44 | 0 | } |
45 | | |
46 | 0 | static THREADFN thread_encoding_proc(void *p_data) { |
47 | 0 | int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread; |
48 | 0 | VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1); |
49 | 0 | MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2); |
50 | 0 | ENTROPY_CONTEXT_PLANES mb_row_left_context; |
51 | |
|
52 | 0 | while (1) { |
53 | 0 | if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break; |
54 | | |
55 | 0 | if (vp8_sem_wait(&cpi->h_event_start_encoding[ithread]) == 0) { |
56 | 0 | const int nsync = cpi->mt_sync_range; |
57 | 0 | VP8_COMMON *cm = &cpi->common; |
58 | 0 | int mb_row; |
59 | 0 | MACROBLOCK *x = &mbri->mb; |
60 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
61 | 0 | TOKENEXTRA *tp; |
62 | | #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING |
63 | | TOKENEXTRA *tp_start = cpi->tok + (1 + ithread) * (16 * 24); |
64 | | const int num_part = (1 << cm->multi_token_partition); |
65 | | #endif |
66 | |
|
67 | 0 | int *segment_counts = mbri->segment_counts; |
68 | 0 | int *totalrate = &mbri->totalrate; |
69 | | |
70 | | /* we're shutting down */ |
71 | 0 | if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break; |
72 | | |
73 | 0 | xd->mode_info_context = cm->mi + cm->mode_info_stride * (ithread + 1); |
74 | 0 | xd->mode_info_stride = cm->mode_info_stride; |
75 | |
|
76 | 0 | for (mb_row = ithread + 1; mb_row < cm->mb_rows; |
77 | 0 | mb_row += (cpi->encoding_thread_count + 1)) { |
78 | 0 | int recon_yoffset, recon_uvoffset; |
79 | 0 | int mb_col; |
80 | 0 | int ref_fb_idx = cm->lst_fb_idx; |
81 | 0 | int dst_fb_idx = cm->new_fb_idx; |
82 | 0 | int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; |
83 | 0 | int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; |
84 | 0 | int map_index = (mb_row * cm->mb_cols); |
85 | 0 | const vpx_atomic_int *last_row_current_mb_col; |
86 | 0 | vpx_atomic_int *current_mb_col = &cpi->mt_current_mb_col[mb_row]; |
87 | |
|
88 | | #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) |
89 | | vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)]; |
90 | | #else |
91 | 0 | tp = cpi->tok + (mb_row * (cm->mb_cols * 16 * 24)); |
92 | 0 | cpi->tplist[mb_row].start = tp; |
93 | 0 | #endif |
94 | |
|
95 | 0 | last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; |
96 | | |
97 | | /* reset above block coeffs */ |
98 | 0 | xd->above_context = cm->above_context; |
99 | 0 | xd->left_context = &mb_row_left_context; |
100 | |
|
101 | 0 | vp8_zero(mb_row_left_context); |
102 | |
|
103 | 0 | xd->up_available = (mb_row != 0); |
104 | 0 | recon_yoffset = (mb_row * recon_y_stride * 16); |
105 | 0 | recon_uvoffset = (mb_row * recon_uv_stride * 8); |
106 | | |
107 | | /* Set the mb activity pointer to the start of the row. */ |
108 | 0 | x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; |
109 | | |
110 | | /* for each macroblock col in image */ |
111 | 0 | for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { |
112 | 0 | if (((mb_col - 1) % nsync) == 0) { |
113 | 0 | vpx_atomic_store_release(current_mb_col, mb_col - 1); |
114 | 0 | } |
115 | |
|
116 | 0 | if (mb_row && !(mb_col & (nsync - 1))) { |
117 | 0 | vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync); |
118 | 0 | } |
119 | |
|
120 | | #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING |
121 | | tp = tp_start; |
122 | | #endif |
123 | | |
124 | | /* Distance of Mb to the various image edges. |
125 | | * These specified to 8th pel as they are always compared |
126 | | * to values that are in 1/8th pel units |
127 | | */ |
128 | 0 | xd->mb_to_left_edge = -((mb_col * 16) << 3); |
129 | 0 | xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; |
130 | 0 | xd->mb_to_top_edge = -((mb_row * 16) << 3); |
131 | 0 | xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; |
132 | | |
133 | | /* Set up limit values for motion vectors used to prevent |
134 | | * them extending outside the UMV borders |
135 | | */ |
136 | 0 | x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); |
137 | 0 | x->mv_col_max = |
138 | 0 | ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); |
139 | 0 | x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); |
140 | 0 | x->mv_row_max = |
141 | 0 | ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); |
142 | |
|
143 | 0 | xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; |
144 | 0 | xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; |
145 | 0 | xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; |
146 | 0 | xd->left_available = (mb_col != 0); |
147 | |
|
148 | 0 | x->rddiv = cpi->RDDIV; |
149 | 0 | x->rdmult = cpi->RDMULT; |
150 | | |
151 | | /* Copy current mb to a buffer */ |
152 | 0 | vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); |
153 | |
|
154 | 0 | if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp8_activity_masking(cpi, x); |
155 | | |
156 | | /* Is segmentation enabled */ |
157 | | /* MB level adjustment to quantizer */ |
158 | 0 | if (xd->segmentation_enabled) { |
159 | | /* Code to set segment id in xd->mbmi.segment_id for |
160 | | * current MB (with range checking) |
161 | | */ |
162 | 0 | if (cpi->segmentation_map[map_index + mb_col] <= 3) { |
163 | 0 | xd->mode_info_context->mbmi.segment_id = |
164 | 0 | cpi->segmentation_map[map_index + mb_col]; |
165 | 0 | } else { |
166 | 0 | xd->mode_info_context->mbmi.segment_id = 0; |
167 | 0 | } |
168 | |
|
169 | 0 | vp8cx_mb_init_quantizer(cpi, x, 1); |
170 | 0 | } else { |
171 | | /* Set to Segment 0 by default */ |
172 | 0 | xd->mode_info_context->mbmi.segment_id = 0; |
173 | 0 | } |
174 | |
|
175 | 0 | x->active_ptr = cpi->active_map + map_index + mb_col; |
176 | |
|
177 | 0 | if (cm->frame_type == KEY_FRAME) { |
178 | 0 | *totalrate += vp8cx_encode_intra_macroblock(cpi, x, &tp); |
179 | | #ifdef MODE_STATS |
180 | | y_modes[xd->mbmi.mode]++; |
181 | | #endif |
182 | 0 | } else { |
183 | 0 | *totalrate += vp8cx_encode_inter_macroblock( |
184 | 0 | cpi, x, &tp, recon_yoffset, recon_uvoffset, mb_row, mb_col); |
185 | |
|
186 | | #ifdef MODE_STATS |
187 | | inter_y_modes[xd->mbmi.mode]++; |
188 | | |
189 | | if (xd->mbmi.mode == SPLITMV) { |
190 | | int b; |
191 | | |
192 | | for (b = 0; b < xd->mbmi.partition_count; ++b) { |
193 | | inter_b_modes[x->partition->bmi[b].mode]++; |
194 | | } |
195 | | } |
196 | | |
197 | | #endif |
198 | | // Keep track of how many (consecutive) times a block |
199 | | // is coded as ZEROMV_LASTREF, for base layer frames. |
200 | | // Reset to 0 if its coded as anything else. |
201 | 0 | if (cpi->current_layer == 0) { |
202 | 0 | if (xd->mode_info_context->mbmi.mode == ZEROMV && |
203 | 0 | xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) { |
204 | | // Increment, check for wrap-around. |
205 | 0 | if (cpi->consec_zero_last[map_index + mb_col] < 255) { |
206 | 0 | cpi->consec_zero_last[map_index + mb_col] += 1; |
207 | 0 | } |
208 | 0 | if (cpi->consec_zero_last_mvbias[map_index + mb_col] < 255) { |
209 | 0 | cpi->consec_zero_last_mvbias[map_index + mb_col] += 1; |
210 | 0 | } |
211 | 0 | } else { |
212 | 0 | cpi->consec_zero_last[map_index + mb_col] = 0; |
213 | 0 | cpi->consec_zero_last_mvbias[map_index + mb_col] = 0; |
214 | 0 | } |
215 | 0 | if (x->zero_last_dot_suppress) { |
216 | 0 | cpi->consec_zero_last_mvbias[map_index + mb_col] = 0; |
217 | 0 | } |
218 | 0 | } |
219 | | |
220 | | /* Special case code for cyclic refresh |
221 | | * If cyclic update enabled then copy |
222 | | * xd->mbmi.segment_id; (which may have been updated |
223 | | * based on mode during |
224 | | * vp8cx_encode_inter_macroblock()) back into the |
225 | | * global segmentation map |
226 | | */ |
227 | 0 | if ((cpi->current_layer == 0) && |
228 | 0 | (cpi->cyclic_refresh_mode_enabled && |
229 | 0 | xd->segmentation_enabled)) { |
230 | 0 | const MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; |
231 | 0 | cpi->segmentation_map[map_index + mb_col] = mbmi->segment_id; |
232 | | |
233 | | /* If the block has been refreshed mark it as clean |
234 | | * (the magnitude of the -ve influences how long it |
235 | | * will be before we consider another refresh): |
236 | | * Else if it was coded (last frame 0,0) and has |
237 | | * not already been refreshed then mark it as a |
238 | | * candidate for cleanup next time (marked 0) else |
239 | | * mark it as dirty (1). |
240 | | */ |
241 | 0 | if (mbmi->segment_id) { |
242 | 0 | cpi->cyclic_refresh_map[map_index + mb_col] = -1; |
243 | 0 | } else if ((mbmi->mode == ZEROMV) && |
244 | 0 | (mbmi->ref_frame == LAST_FRAME)) { |
245 | 0 | if (cpi->cyclic_refresh_map[map_index + mb_col] == 1) { |
246 | 0 | cpi->cyclic_refresh_map[map_index + mb_col] = 0; |
247 | 0 | } |
248 | 0 | } else { |
249 | 0 | cpi->cyclic_refresh_map[map_index + mb_col] = 1; |
250 | 0 | } |
251 | 0 | } |
252 | 0 | } |
253 | |
|
254 | | #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING |
255 | | /* pack tokens for this MB */ |
256 | | { |
257 | | int tok_count = tp - tp_start; |
258 | | vp8_pack_tokens(w, tp_start, tok_count); |
259 | | } |
260 | | #else |
261 | 0 | cpi->tplist[mb_row].stop = tp; |
262 | 0 | #endif |
263 | | /* Increment pointer into gf usage flags structure. */ |
264 | 0 | x->gf_active_ptr++; |
265 | | |
266 | | /* Increment the activity mask pointers. */ |
267 | 0 | x->mb_activity_ptr++; |
268 | | |
269 | | /* adjust to the next column of macroblocks */ |
270 | 0 | x->src.y_buffer += 16; |
271 | 0 | x->src.u_buffer += 8; |
272 | 0 | x->src.v_buffer += 8; |
273 | |
|
274 | 0 | recon_yoffset += 16; |
275 | 0 | recon_uvoffset += 8; |
276 | | |
277 | | /* Keep track of segment usage */ |
278 | 0 | segment_counts[xd->mode_info_context->mbmi.segment_id]++; |
279 | | |
280 | | /* skip to next mb */ |
281 | 0 | xd->mode_info_context++; |
282 | 0 | x->partition_info++; |
283 | 0 | xd->above_context++; |
284 | 0 | } |
285 | |
|
286 | 0 | vp8_extend_mb_row(&cm->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, |
287 | 0 | xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); |
288 | |
|
289 | 0 | vpx_atomic_store_release(current_mb_col, mb_col + nsync); |
290 | | |
291 | | /* this is to account for the border */ |
292 | 0 | xd->mode_info_context++; |
293 | 0 | x->partition_info++; |
294 | |
|
295 | 0 | x->src.y_buffer += |
296 | 0 | 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - |
297 | 0 | 16 * cm->mb_cols; |
298 | 0 | x->src.u_buffer += |
299 | 0 | 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - |
300 | 0 | 8 * cm->mb_cols; |
301 | 0 | x->src.v_buffer += |
302 | 0 | 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - |
303 | 0 | 8 * cm->mb_cols; |
304 | |
|
305 | 0 | xd->mode_info_context += |
306 | 0 | xd->mode_info_stride * cpi->encoding_thread_count; |
307 | 0 | x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; |
308 | 0 | x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count; |
309 | 0 | } |
310 | | /* Signal that this thread has completed processing its rows. */ |
311 | 0 | vp8_sem_post(&cpi->h_event_end_encoding[ithread]); |
312 | 0 | } |
313 | 0 | } |
314 | | |
315 | | /* printf("exit thread %d\n", ithread); */ |
316 | 0 | return THREAD_EXIT_SUCCESS; |
317 | 0 | } |
318 | | |
319 | 0 | static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) { |
320 | 0 | MACROBLOCK *x = mbsrc; |
321 | 0 | MACROBLOCK *z = mbdst; |
322 | 0 | int i; |
323 | |
|
324 | 0 | z->ss = x->ss; |
325 | 0 | z->ss_count = x->ss_count; |
326 | 0 | z->searches_per_step = x->searches_per_step; |
327 | 0 | z->errorperbit = x->errorperbit; |
328 | |
|
329 | 0 | z->sadperbit16 = x->sadperbit16; |
330 | 0 | z->sadperbit4 = x->sadperbit4; |
331 | | |
332 | | /* |
333 | | z->mv_col_min = x->mv_col_min; |
334 | | z->mv_col_max = x->mv_col_max; |
335 | | z->mv_row_min = x->mv_row_min; |
336 | | z->mv_row_max = x->mv_row_max; |
337 | | */ |
338 | |
|
339 | 0 | z->short_fdct4x4 = x->short_fdct4x4; |
340 | 0 | z->short_fdct8x4 = x->short_fdct8x4; |
341 | 0 | z->short_walsh4x4 = x->short_walsh4x4; |
342 | 0 | z->quantize_b = x->quantize_b; |
343 | 0 | z->optimize = x->optimize; |
344 | | |
345 | | /* |
346 | | z->mvc = x->mvc; |
347 | | z->src.y_buffer = x->src.y_buffer; |
348 | | z->src.u_buffer = x->src.u_buffer; |
349 | | z->src.v_buffer = x->src.v_buffer; |
350 | | */ |
351 | |
|
352 | 0 | z->mvcost[0] = x->mvcost[0]; |
353 | 0 | z->mvcost[1] = x->mvcost[1]; |
354 | 0 | z->mvsadcost[0] = x->mvsadcost[0]; |
355 | 0 | z->mvsadcost[1] = x->mvsadcost[1]; |
356 | |
|
357 | 0 | z->token_costs = x->token_costs; |
358 | 0 | z->inter_bmode_costs = x->inter_bmode_costs; |
359 | 0 | z->mbmode_cost = x->mbmode_cost; |
360 | 0 | z->intra_uv_mode_cost = x->intra_uv_mode_cost; |
361 | 0 | z->bmode_costs = x->bmode_costs; |
362 | |
|
363 | 0 | for (i = 0; i < 25; ++i) { |
364 | 0 | z->block[i].quant = x->block[i].quant; |
365 | 0 | z->block[i].quant_fast = x->block[i].quant_fast; |
366 | 0 | z->block[i].quant_shift = x->block[i].quant_shift; |
367 | 0 | z->block[i].zbin = x->block[i].zbin; |
368 | 0 | z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; |
369 | 0 | z->block[i].round = x->block[i].round; |
370 | 0 | z->block[i].src_stride = x->block[i].src_stride; |
371 | 0 | } |
372 | |
|
373 | 0 | z->q_index = x->q_index; |
374 | 0 | z->act_zbin_adj = x->act_zbin_adj; |
375 | 0 | z->last_act_zbin_adj = x->last_act_zbin_adj; |
376 | |
|
377 | 0 | { |
378 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
379 | 0 | MACROBLOCKD *zd = &z->e_mbd; |
380 | | |
381 | | /* |
382 | | zd->mode_info_context = xd->mode_info_context; |
383 | | zd->mode_info = xd->mode_info; |
384 | | |
385 | | zd->mode_info_stride = xd->mode_info_stride; |
386 | | zd->frame_type = xd->frame_type; |
387 | | zd->up_available = xd->up_available ; |
388 | | zd->left_available = xd->left_available; |
389 | | zd->left_context = xd->left_context; |
390 | | zd->last_frame_dc = xd->last_frame_dc; |
391 | | zd->last_frame_dccons = xd->last_frame_dccons; |
392 | | zd->gold_frame_dc = xd->gold_frame_dc; |
393 | | zd->gold_frame_dccons = xd->gold_frame_dccons; |
394 | | zd->mb_to_left_edge = xd->mb_to_left_edge; |
395 | | zd->mb_to_right_edge = xd->mb_to_right_edge; |
396 | | zd->mb_to_top_edge = xd->mb_to_top_edge ; |
397 | | zd->mb_to_bottom_edge = xd->mb_to_bottom_edge; |
398 | | zd->gf_active_ptr = xd->gf_active_ptr; |
399 | | zd->frames_since_golden = xd->frames_since_golden; |
400 | | zd->frames_till_alt_ref_frame = xd->frames_till_alt_ref_frame; |
401 | | */ |
402 | 0 | zd->subpixel_predict = xd->subpixel_predict; |
403 | 0 | zd->subpixel_predict8x4 = xd->subpixel_predict8x4; |
404 | 0 | zd->subpixel_predict8x8 = xd->subpixel_predict8x8; |
405 | 0 | zd->subpixel_predict16x16 = xd->subpixel_predict16x16; |
406 | 0 | zd->segmentation_enabled = xd->segmentation_enabled; |
407 | 0 | zd->mb_segment_abs_delta = xd->mb_segment_abs_delta; |
408 | 0 | memcpy(zd->segment_feature_data, xd->segment_feature_data, |
409 | 0 | sizeof(xd->segment_feature_data)); |
410 | |
|
411 | 0 | memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); |
412 | 0 | memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); |
413 | 0 | memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); |
414 | 0 | memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); |
415 | |
|
416 | 0 | #if 1 |
417 | | /*TODO: Remove dequant from BLOCKD. This is a temporary solution until |
418 | | * the quantizer code uses a passed in pointer to the dequant constants. |
419 | | * This will also require modifications to the x86 and neon assembly. |
420 | | * */ |
421 | 0 | for (i = 0; i < 16; ++i) zd->block[i].dequant = zd->dequant_y1; |
422 | 0 | for (i = 16; i < 24; ++i) zd->block[i].dequant = zd->dequant_uv; |
423 | 0 | zd->block[24].dequant = zd->dequant_y2; |
424 | 0 | #endif |
425 | |
|
426 | 0 | memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes)); |
427 | 0 | memcpy(z->rd_thresh_mult, x->rd_thresh_mult, sizeof(x->rd_thresh_mult)); |
428 | |
|
429 | 0 | z->zbin_over_quant = x->zbin_over_quant; |
430 | 0 | z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled; |
431 | 0 | z->zbin_mode_boost = x->zbin_mode_boost; |
432 | |
|
433 | 0 | memset(z->error_bins, 0, sizeof(z->error_bins)); |
434 | 0 | } |
435 | 0 | } |
436 | | |
437 | | void vp8cx_init_mbrthread_data(VP8_COMP *cpi, MACROBLOCK *x, |
438 | 0 | MB_ROW_COMP *mbr_ei, int count) { |
439 | 0 | VP8_COMMON *const cm = &cpi->common; |
440 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
441 | 0 | int i; |
442 | |
|
443 | 0 | for (i = 0; i < count; ++i) { |
444 | 0 | MACROBLOCK *mb = &mbr_ei[i].mb; |
445 | 0 | MACROBLOCKD *mbd = &mb->e_mbd; |
446 | |
|
447 | 0 | mbd->subpixel_predict = xd->subpixel_predict; |
448 | 0 | mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; |
449 | 0 | mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; |
450 | 0 | mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; |
451 | 0 | mb->gf_active_ptr = x->gf_active_ptr; |
452 | |
|
453 | 0 | memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts)); |
454 | 0 | mbr_ei[i].totalrate = 0; |
455 | |
|
456 | 0 | mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1); |
457 | |
|
458 | 0 | mbd->frame_type = cm->frame_type; |
459 | |
|
460 | 0 | mb->src = *cpi->Source; |
461 | 0 | mbd->pre = cm->yv12_fb[cm->lst_fb_idx]; |
462 | 0 | mbd->dst = cm->yv12_fb[cm->new_fb_idx]; |
463 | |
|
464 | 0 | mb->src.y_buffer += 16 * x->src.y_stride * (i + 1); |
465 | 0 | mb->src.u_buffer += 8 * x->src.uv_stride * (i + 1); |
466 | 0 | mb->src.v_buffer += 8 * x->src.uv_stride * (i + 1); |
467 | |
|
468 | 0 | vp8_build_block_offsets(mb); |
469 | |
|
470 | 0 | mbd->left_context = &cm->left_context; |
471 | 0 | mb->mvc = cm->fc.mvc; |
472 | |
|
473 | 0 | setup_mbby_copy(&mbr_ei[i].mb, x); |
474 | |
|
475 | 0 | mbd->fullpixel_mask = ~0; |
476 | 0 | if (cm->full_pixel) mbd->fullpixel_mask = ~7; |
477 | |
|
478 | 0 | vp8_zero(mb->coef_counts); |
479 | 0 | vp8_zero(x->ymode_count); |
480 | 0 | mb->skip_true_count = 0; |
481 | 0 | vp8_zero(mb->MVcount); |
482 | 0 | mb->prediction_error = 0; |
483 | 0 | mb->intra_error = 0; |
484 | 0 | vp8_zero(mb->count_mb_ref_frame_usage); |
485 | 0 | mb->mbs_tested_so_far = 0; |
486 | 0 | mb->mbs_zero_last_dot_suppress = 0; |
487 | 0 | } |
488 | 0 | } |
489 | | |
490 | 4.91k | int vp8cx_create_encoder_threads(VP8_COMP *cpi) { |
491 | 4.91k | const VP8_COMMON *cm = &cpi->common; |
492 | 4.91k | int th_count = 0; |
493 | | |
494 | 4.91k | if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) { |
495 | 0 | th_count = cpi->oxcf.multi_threaded - 1; |
496 | | |
497 | | /* don't allocate more threads than cores available */ |
498 | 0 | if (cpi->oxcf.multi_threaded > cm->processor_core_count) { |
499 | 0 | th_count = cm->processor_core_count - 1; |
500 | 0 | } |
501 | | |
502 | | /* we have th_count + 1 (main) threads processing one row each */ |
503 | | /* no point to have more threads than the sync range allows */ |
504 | 0 | if (th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1)) { |
505 | 0 | th_count = (cm->mb_cols / cpi->mt_sync_range) - 1; |
506 | 0 | } |
507 | 0 | } |
508 | 4.91k | if (th_count == cpi->encoding_thread_count) return 0; |
509 | | |
510 | 0 | vp8cx_remove_encoder_threads(cpi); |
511 | 0 | if (th_count != 0) { |
512 | 0 | int ithread; |
513 | 0 | int rc = 0; |
514 | |
|
515 | 0 | CHECK_MEM_ERROR(&cpi->common.error, cpi->h_encoding_thread, |
516 | 0 | vpx_malloc(sizeof(pthread_t) * th_count)); |
517 | 0 | CHECK_MEM_ERROR(&cpi->common.error, cpi->h_event_start_encoding, |
518 | 0 | vpx_malloc(sizeof(vp8_sem_t) * th_count)); |
519 | 0 | CHECK_MEM_ERROR(&cpi->common.error, cpi->h_event_end_encoding, |
520 | 0 | vpx_malloc(sizeof(vp8_sem_t) * th_count)); |
521 | 0 | CHECK_MEM_ERROR(&cpi->common.error, cpi->mb_row_ei, |
522 | 0 | vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); |
523 | 0 | memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count); |
524 | 0 | CHECK_MEM_ERROR(&cpi->common.error, cpi->en_thread_data, |
525 | 0 | vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count)); |
526 | |
|
527 | 0 | vpx_atomic_store_release(&cpi->b_multi_threaded, 1); |
528 | 0 | cpi->encoding_thread_count = th_count; |
529 | | |
530 | | /* |
531 | | printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", |
532 | | (cpi->encoding_thread_count +1)); |
533 | | */ |
534 | |
|
535 | 0 | for (ithread = 0; ithread < th_count; ++ithread) { |
536 | 0 | ENCODETHREAD_DATA *ethd = &cpi->en_thread_data[ithread]; |
537 | | |
538 | | /* Setup block ptrs and offsets */ |
539 | 0 | vp8_setup_block_ptrs(&cpi->mb_row_ei[ithread].mb); |
540 | 0 | vp8_setup_block_dptrs(&cpi->mb_row_ei[ithread].mb.e_mbd); |
541 | |
|
542 | 0 | vp8_sem_init(&cpi->h_event_start_encoding[ithread], 0, 0); |
543 | 0 | vp8_sem_init(&cpi->h_event_end_encoding[ithread], 0, 0); |
544 | |
|
545 | 0 | ethd->ithread = ithread; |
546 | 0 | ethd->ptr1 = (void *)cpi; |
547 | 0 | ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread]; |
548 | |
|
549 | 0 | rc = pthread_create(&cpi->h_encoding_thread[ithread], 0, |
550 | 0 | thread_encoding_proc, ethd); |
551 | 0 | if (rc) break; |
552 | 0 | } |
553 | |
|
554 | 0 | if (rc) { |
555 | | /* shutdown other threads */ |
556 | 0 | vpx_atomic_store_release(&cpi->b_multi_threaded, 0); |
557 | 0 | for (--ithread; ithread >= 0; ithread--) { |
558 | 0 | vp8_sem_post(&cpi->h_event_start_encoding[ithread]); |
559 | 0 | vp8_sem_post(&cpi->h_event_end_encoding[ithread]); |
560 | 0 | pthread_join(cpi->h_encoding_thread[ithread], 0); |
561 | 0 | vp8_sem_destroy(&cpi->h_event_start_encoding[ithread]); |
562 | 0 | vp8_sem_destroy(&cpi->h_event_end_encoding[ithread]); |
563 | 0 | } |
564 | | |
565 | | /* free thread related resources */ |
566 | 0 | vpx_free(cpi->h_event_start_encoding); |
567 | 0 | cpi->h_event_start_encoding = NULL; |
568 | 0 | vpx_free(cpi->h_event_end_encoding); |
569 | 0 | cpi->h_event_end_encoding = NULL; |
570 | 0 | vpx_free(cpi->h_encoding_thread); |
571 | 0 | cpi->h_encoding_thread = NULL; |
572 | 0 | vpx_free(cpi->mb_row_ei); |
573 | 0 | cpi->mb_row_ei = NULL; |
574 | 0 | vpx_free(cpi->en_thread_data); |
575 | 0 | cpi->en_thread_data = NULL; |
576 | 0 | cpi->encoding_thread_count = 0; |
577 | |
|
578 | 0 | return -1; |
579 | 0 | } |
580 | | |
581 | 0 | { |
582 | 0 | LPFTHREAD_DATA *lpfthd = &cpi->lpf_thread_data; |
583 | |
|
584 | 0 | vp8_sem_init(&cpi->h_event_start_lpf, 0, 0); |
585 | 0 | vp8_sem_init(&cpi->h_event_end_lpf, 0, 0); |
586 | |
|
587 | 0 | lpfthd->ptr1 = (void *)cpi; |
588 | 0 | rc = pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, lpfthd); |
589 | |
|
590 | 0 | if (rc) { |
591 | | /* shutdown other threads */ |
592 | 0 | vpx_atomic_store_release(&cpi->b_multi_threaded, 0); |
593 | 0 | for (--ithread; ithread >= 0; ithread--) { |
594 | 0 | vp8_sem_post(&cpi->h_event_start_encoding[ithread]); |
595 | 0 | vp8_sem_post(&cpi->h_event_end_encoding[ithread]); |
596 | 0 | pthread_join(cpi->h_encoding_thread[ithread], 0); |
597 | 0 | vp8_sem_destroy(&cpi->h_event_start_encoding[ithread]); |
598 | 0 | vp8_sem_destroy(&cpi->h_event_end_encoding[ithread]); |
599 | 0 | } |
600 | 0 | vp8_sem_destroy(&cpi->h_event_end_lpf); |
601 | 0 | vp8_sem_destroy(&cpi->h_event_start_lpf); |
602 | | |
603 | | /* free thread related resources */ |
604 | 0 | vpx_free(cpi->h_event_start_encoding); |
605 | 0 | cpi->h_event_start_encoding = NULL; |
606 | 0 | vpx_free(cpi->h_event_end_encoding); |
607 | 0 | cpi->h_event_end_encoding = NULL; |
608 | 0 | vpx_free(cpi->h_encoding_thread); |
609 | 0 | cpi->h_encoding_thread = NULL; |
610 | 0 | vpx_free(cpi->mb_row_ei); |
611 | 0 | cpi->mb_row_ei = NULL; |
612 | 0 | vpx_free(cpi->en_thread_data); |
613 | 0 | cpi->en_thread_data = NULL; |
614 | 0 | cpi->encoding_thread_count = 0; |
615 | |
|
616 | 0 | return -2; |
617 | 0 | } |
618 | 0 | } |
619 | 0 | } |
620 | 0 | return 0; |
621 | 0 | } |
622 | | |
623 | 4.91k | void vp8cx_remove_encoder_threads(VP8_COMP *cpi) { |
624 | 4.91k | if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) { |
625 | | /* shutdown other threads */ |
626 | 0 | vpx_atomic_store_release(&cpi->b_multi_threaded, 0); |
627 | 0 | { |
628 | 0 | int i; |
629 | |
|
630 | 0 | for (i = 0; i < cpi->encoding_thread_count; ++i) { |
631 | 0 | vp8_sem_post(&cpi->h_event_start_encoding[i]); |
632 | 0 | vp8_sem_post(&cpi->h_event_end_encoding[i]); |
633 | |
|
634 | 0 | pthread_join(cpi->h_encoding_thread[i], 0); |
635 | |
|
636 | 0 | vp8_sem_destroy(&cpi->h_event_start_encoding[i]); |
637 | 0 | vp8_sem_destroy(&cpi->h_event_end_encoding[i]); |
638 | 0 | } |
639 | |
|
640 | 0 | vp8_sem_post(&cpi->h_event_start_lpf); |
641 | 0 | pthread_join(cpi->h_filter_thread, 0); |
642 | 0 | } |
643 | |
|
644 | 0 | vp8_sem_destroy(&cpi->h_event_end_lpf); |
645 | 0 | vp8_sem_destroy(&cpi->h_event_start_lpf); |
646 | 0 | cpi->b_lpf_running = 0; |
647 | | |
648 | | /* free thread related resources */ |
649 | 0 | vpx_free(cpi->mt_current_mb_col); |
650 | 0 | cpi->mt_current_mb_col = NULL; |
651 | 0 | cpi->mt_current_mb_col_size = 0; |
652 | 0 | vpx_free(cpi->h_event_start_encoding); |
653 | 0 | cpi->h_event_start_encoding = NULL; |
654 | 0 | vpx_free(cpi->h_event_end_encoding); |
655 | 0 | cpi->h_event_end_encoding = NULL; |
656 | 0 | vpx_free(cpi->h_encoding_thread); |
657 | 0 | cpi->h_encoding_thread = NULL; |
658 | 0 | vpx_free(cpi->mb_row_ei); |
659 | 0 | cpi->mb_row_ei = NULL; |
660 | 0 | vpx_free(cpi->en_thread_data); |
661 | 0 | cpi->en_thread_data = NULL; |
662 | 0 | cpi->encoding_thread_count = 0; |
663 | 0 | } |
664 | 4.91k | } |
665 | | #endif |