/src/libvpx/vp8/decoder/threading.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include "vpx_config.h" |
12 | | #include "vp8_rtcd.h" |
13 | | #if !defined(_WIN32) && CONFIG_OS_SUPPORT == 1 |
14 | | #include <unistd.h> |
15 | | #endif |
16 | | #include "onyxd_int.h" |
17 | | #include "vpx_mem/vpx_mem.h" |
18 | | #include "vpx_util/vpx_pthread.h" |
19 | | #include "vp8/common/common.h" |
20 | | #include "vp8/common/threading.h" |
21 | | #include "vp8/common/loopfilter.h" |
22 | | #include "vp8/common/extend.h" |
23 | | #include "vpx_ports/vpx_timer.h" |
24 | | #include "decoderthreading.h" |
25 | | #include "detokenize.h" |
26 | | #include "vp8/common/reconintra4x4.h" |
27 | | #include "vp8/common/reconinter.h" |
28 | | #include "vp8/common/reconintra.h" |
29 | | #include "vp8/common/setupintrarecon.h" |
30 | | #if CONFIG_ERROR_CONCEALMENT |
31 | | #include "error_concealment.h" |
32 | | #endif |
33 | | |
34 | | #define CALLOC_ARRAY(p, n) \ |
35 | 157k | CHECK_MEM_ERROR(&pbi->common.error, (p), vpx_calloc(sizeof(*(p)), (n))) |
36 | | #define CALLOC_ARRAY_ALIGNED(p, n, algn) \ |
37 | 10.3k | do { \ |
38 | 10.3k | CHECK_MEM_ERROR(&pbi->common.error, (p), \ |
39 | 10.3k | vpx_memalign((algn), sizeof(*(p)) * (n))); \ |
40 | 10.3k | memset((p), 0, (n) * sizeof(*(p))); \ |
41 | 10.3k | } while (0) |
42 | | |
43 | | static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, |
44 | 5.16k | MB_ROW_DEC *mbrd, int count) { |
45 | 5.16k | VP8_COMMON *const pc = &pbi->common; |
46 | 5.16k | int i; |
47 | | |
48 | 10.7k | for (i = 0; i < count; ++i) { |
49 | 5.56k | MACROBLOCKD *mbd = &mbrd[i].mbd; |
50 | 5.56k | mbd->subpixel_predict = xd->subpixel_predict; |
51 | 5.56k | mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; |
52 | 5.56k | mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; |
53 | 5.56k | mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; |
54 | | |
55 | 5.56k | mbd->frame_type = pc->frame_type; |
56 | 5.56k | mbd->pre = xd->pre; |
57 | 5.56k | mbd->dst = xd->dst; |
58 | | |
59 | 5.56k | mbd->segmentation_enabled = xd->segmentation_enabled; |
60 | 5.56k | mbd->mb_segment_abs_delta = xd->mb_segment_abs_delta; |
61 | 5.56k | memcpy(mbd->segment_feature_data, xd->segment_feature_data, |
62 | 5.56k | sizeof(xd->segment_feature_data)); |
63 | | |
64 | | /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/ |
65 | 5.56k | memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas)); |
66 | | /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/ |
67 | 5.56k | memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas)); |
68 | | /*unsigned char mode_ref_lf_delta_enabled; |
69 | | unsigned char mode_ref_lf_delta_update;*/ |
70 | 5.56k | mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled; |
71 | 5.56k | mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update; |
72 | | |
73 | 5.56k | mbd->current_bc = &pbi->mbc[0]; |
74 | | |
75 | 5.56k | memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); |
76 | 5.56k | memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); |
77 | 5.56k | memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); |
78 | 5.56k | memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); |
79 | | |
80 | 5.56k | mbd->fullpixel_mask = ~0; |
81 | | |
82 | 5.56k | if (pc->full_pixel) mbd->fullpixel_mask = ~7; |
83 | 5.56k | } |
84 | | |
85 | 790k | for (i = 0; i < pc->mb_rows; ++i) |
86 | 785k | vpx_atomic_store_release(&pbi->mt_current_mb_col[i], -1); |
87 | 5.16k | } |
88 | | |
89 | | static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, |
90 | 2.90M | unsigned int mb_idx) { |
91 | 2.90M | MB_PREDICTION_MODE mode; |
92 | 2.90M | int i; |
93 | | #if CONFIG_ERROR_CONCEALMENT |
94 | | int corruption_detected = 0; |
95 | | #else |
96 | 2.90M | (void)mb_idx; |
97 | 2.90M | #endif |
98 | | |
99 | 2.90M | if (xd->mode_info_context->mbmi.mb_skip_coeff) { |
100 | 2.56M | vp8_reset_mb_tokens_context(xd); |
101 | 2.56M | } else if (!vp8dx_bool_error(xd->current_bc)) { |
102 | 332k | int eobtotal; |
103 | 332k | eobtotal = vp8_decode_mb_tokens(pbi, xd); |
104 | | |
105 | | /* Special case: Force the loopfilter to skip when eobtotal is zero */ |
106 | 332k | xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal == 0); |
107 | 332k | } |
108 | | |
109 | 2.90M | mode = xd->mode_info_context->mbmi.mode; |
110 | | |
111 | 2.90M | if (xd->segmentation_enabled) vp8_mb_init_dequantizer(pbi, xd); |
112 | | |
113 | | #if CONFIG_ERROR_CONCEALMENT |
114 | | |
115 | | if (pbi->ec_active) { |
116 | | int throw_residual; |
117 | | /* When we have independent partitions we can apply residual even |
118 | | * though other partitions within the frame are corrupt. |
119 | | */ |
120 | | throw_residual = |
121 | | (!pbi->independent_partitions && pbi->frame_corrupt_residual); |
122 | | throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc)); |
123 | | |
124 | | if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) { |
125 | | /* MB with corrupt residuals or corrupt mode/motion vectors. |
126 | | * Better to use the predictor as reconstruction. |
127 | | */ |
128 | | pbi->frame_corrupt_residual = 1; |
129 | | memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); |
130 | | |
131 | | corruption_detected = 1; |
132 | | |
133 | | /* force idct to be skipped for B_PRED and use the |
134 | | * prediction only for reconstruction |
135 | | * */ |
136 | | memset(xd->eobs, 0, 25); |
137 | | } |
138 | | } |
139 | | #endif |
140 | | |
141 | | /* do prediction */ |
142 | 2.90M | if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { |
143 | 1.21M | vp8_build_intra_predictors_mbuv_s( |
144 | 1.21M | xd, xd->recon_above[1], xd->recon_above[2], xd->recon_left[1], |
145 | 1.21M | xd->recon_left[2], xd->recon_left_stride[1], xd->dst.u_buffer, |
146 | 1.21M | xd->dst.v_buffer, xd->dst.uv_stride); |
147 | | |
148 | 1.21M | if (mode != B_PRED) { |
149 | 959k | vp8_build_intra_predictors_mby_s( |
150 | 959k | xd, xd->recon_above[0], xd->recon_left[0], xd->recon_left_stride[0], |
151 | 959k | xd->dst.y_buffer, xd->dst.y_stride); |
152 | 959k | } else { |
153 | 251k | short *DQC = xd->dequant_y1; |
154 | 251k | int dst_stride = xd->dst.y_stride; |
155 | | |
156 | | /* clear out residual eob info */ |
157 | 251k | if (xd->mode_info_context->mbmi.mb_skip_coeff) memset(xd->eobs, 0, 25); |
158 | | |
159 | 251k | intra_prediction_down_copy(xd, xd->recon_above[0] + 16); |
160 | | |
161 | 3.67M | for (i = 0; i < 16; ++i) { |
162 | 3.42M | BLOCKD *b = &xd->block[i]; |
163 | 3.42M | unsigned char *dst = xd->dst.y_buffer + b->offset; |
164 | 3.42M | B_PREDICTION_MODE b_mode = xd->mode_info_context->bmi[i].as_mode; |
165 | 3.42M | unsigned char *Above; |
166 | 3.42M | unsigned char *yleft; |
167 | 3.42M | int left_stride; |
168 | 3.42M | unsigned char top_left; |
169 | | |
170 | | /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 |
171 | | * above-right).*/ |
172 | 3.42M | if (i < 4 && pbi->common.filter_level) { |
173 | 833k | Above = xd->recon_above[0] + b->offset; |
174 | 2.58M | } else { |
175 | 2.58M | Above = dst - dst_stride; |
176 | 2.58M | } |
177 | | |
178 | 3.42M | if (i % 4 == 0 && pbi->common.filter_level) { |
179 | 834k | yleft = xd->recon_left[0] + i; |
180 | 834k | left_stride = 1; |
181 | 2.58M | } else { |
182 | 2.58M | yleft = dst - 1; |
183 | 2.58M | left_stride = dst_stride; |
184 | 2.58M | } |
185 | | |
186 | 3.42M | if ((i == 4 || i == 8 || i == 12) && pbi->common.filter_level) { |
187 | 628k | top_left = *(xd->recon_left[0] + i - 1); |
188 | 2.79M | } else { |
189 | 2.79M | top_left = Above[-1]; |
190 | 2.79M | } |
191 | | |
192 | 3.42M | vp8_intra4x4_predict(Above, yleft, left_stride, b_mode, dst, dst_stride, |
193 | 3.42M | top_left); |
194 | | |
195 | 3.42M | if (xd->eobs[i]) { |
196 | 1.00M | if (xd->eobs[i] > 1) { |
197 | 563k | vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); |
198 | 563k | } else { |
199 | 439k | vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], dst, dst_stride, dst, |
200 | 439k | dst_stride); |
201 | 439k | memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); |
202 | 439k | } |
203 | 1.00M | } |
204 | 3.42M | } |
205 | 251k | } |
206 | 1.69M | } else { |
207 | 1.69M | vp8_build_inter_predictors_mb(xd); |
208 | 1.69M | } |
209 | | |
210 | | #if CONFIG_ERROR_CONCEALMENT |
211 | | if (corruption_detected) { |
212 | | return; |
213 | | } |
214 | | #endif |
215 | | |
216 | 2.90M | if (!xd->mode_info_context->mbmi.mb_skip_coeff) { |
217 | | /* dequantization and idct */ |
218 | 299k | if (mode != B_PRED) { |
219 | 160k | short *DQC = xd->dequant_y1; |
220 | | |
221 | 160k | if (mode != SPLITMV) { |
222 | 152k | BLOCKD *b = &xd->block[24]; |
223 | | |
224 | | /* do 2nd order transform on the dc block */ |
225 | 152k | if (xd->eobs[24] > 1) { |
226 | 85.6k | vp8_dequantize_b(b, xd->dequant_y2); |
227 | | |
228 | 85.6k | vp8_short_inv_walsh4x4(&b->dqcoeff[0], xd->qcoeff); |
229 | 85.6k | memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); |
230 | 85.6k | } else { |
231 | 66.9k | b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; |
232 | 66.9k | vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], xd->qcoeff); |
233 | 66.9k | memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); |
234 | 66.9k | } |
235 | | |
236 | | /* override the dc dequant constant in order to preserve the |
237 | | * dc components |
238 | | */ |
239 | 152k | DQC = xd->dequant_y1_dc; |
240 | 152k | } |
241 | | |
242 | 160k | vp8_dequant_idct_add_y_block(xd->qcoeff, DQC, xd->dst.y_buffer, |
243 | 160k | xd->dst.y_stride, xd->eobs); |
244 | 160k | } |
245 | | |
246 | 299k | vp8_dequant_idct_add_uv_block(xd->qcoeff + 16 * 16, xd->dequant_uv, |
247 | 299k | xd->dst.u_buffer, xd->dst.v_buffer, |
248 | 299k | xd->dst.uv_stride, xd->eobs + 16); |
249 | 299k | } |
250 | 2.90M | } |
251 | | |
252 | | static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, |
253 | 10.7k | int start_mb_row) { |
254 | 10.7k | const vpx_atomic_int *last_row_current_mb_col; |
255 | 10.7k | vpx_atomic_int *current_mb_col; |
256 | 10.7k | int mb_row; |
257 | 10.7k | VP8_COMMON *pc = &pbi->common; |
258 | 10.7k | const int nsync = pbi->sync_range; |
259 | 10.7k | const vpx_atomic_int first_row_no_sync_above = |
260 | 10.7k | VPX_ATOMIC_INIT(pc->mb_cols + nsync); |
261 | 10.7k | int num_part = 1 << pbi->common.multi_token_partition; |
262 | 10.7k | int last_mb_row = start_mb_row; |
263 | | |
264 | 10.7k | YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; |
265 | 10.7k | YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME]; |
266 | | |
267 | 10.7k | int recon_y_stride = yv12_fb_new->y_stride; |
268 | 10.7k | int recon_uv_stride = yv12_fb_new->uv_stride; |
269 | | |
270 | 10.7k | unsigned char *ref_buffer[MAX_REF_FRAMES][3]; |
271 | 10.7k | unsigned char *dst_buffer[3]; |
272 | 10.7k | int i; |
273 | 10.7k | int ref_fb_corrupted[MAX_REF_FRAMES]; |
274 | | |
275 | 10.7k | ref_fb_corrupted[INTRA_FRAME] = 0; |
276 | | |
277 | 42.9k | for (i = 1; i < MAX_REF_FRAMES; ++i) { |
278 | 32.1k | YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; |
279 | | |
280 | 32.1k | ref_buffer[i][0] = this_fb->y_buffer; |
281 | 32.1k | ref_buffer[i][1] = this_fb->u_buffer; |
282 | 32.1k | ref_buffer[i][2] = this_fb->v_buffer; |
283 | | |
284 | 32.1k | ref_fb_corrupted[i] = this_fb->corrupted; |
285 | 32.1k | } |
286 | | |
287 | 10.7k | dst_buffer[0] = yv12_fb_new->y_buffer; |
288 | 10.7k | dst_buffer[1] = yv12_fb_new->u_buffer; |
289 | 10.7k | dst_buffer[2] = yv12_fb_new->v_buffer; |
290 | | |
291 | 10.7k | xd->up_available = (start_mb_row != 0); |
292 | | |
293 | 10.7k | xd->mode_info_context = pc->mi + pc->mode_info_stride * start_mb_row; |
294 | 10.7k | xd->mode_info_stride = pc->mode_info_stride; |
295 | | |
296 | 51.2k | for (mb_row = start_mb_row; mb_row < pc->mb_rows; |
297 | 40.5k | mb_row += (pbi->decoding_thread_count + 1)) { |
298 | 40.5k | int recon_yoffset, recon_uvoffset; |
299 | 40.5k | int mb_col; |
300 | 40.5k | int filter_level; |
301 | 40.5k | loop_filter_info_n *lfi_n = &pc->lf_info; |
302 | | |
303 | | /* save last row processed by this thread */ |
304 | 40.5k | last_mb_row = mb_row; |
305 | | /* select bool coder for current partition */ |
306 | 40.5k | xd->current_bc = &pbi->mbc[mb_row % num_part]; |
307 | | |
308 | 40.5k | if (mb_row > 0) { |
309 | 35.3k | last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row - 1]; |
310 | 35.3k | } else { |
311 | 5.16k | last_row_current_mb_col = &first_row_no_sync_above; |
312 | 5.16k | } |
313 | | |
314 | 40.5k | current_mb_col = &pbi->mt_current_mb_col[mb_row]; |
315 | | |
316 | 40.5k | recon_yoffset = mb_row * recon_y_stride * 16; |
317 | 40.5k | recon_uvoffset = mb_row * recon_uv_stride * 8; |
318 | | |
319 | | /* reset contexts */ |
320 | 40.5k | xd->above_context = pc->above_context; |
321 | 40.5k | memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); |
322 | | |
323 | 40.5k | xd->left_available = 0; |
324 | | |
325 | 40.5k | xd->mb_to_top_edge = -((mb_row * 16) << 3); |
326 | 40.5k | xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; |
327 | | |
328 | 40.5k | if (pbi->common.filter_level) { |
329 | 36.9k | xd->recon_above[0] = pbi->mt_yabove_row[mb_row] + 0 * 16 + 32; |
330 | 36.9k | xd->recon_above[1] = pbi->mt_uabove_row[mb_row] + 0 * 8 + 16; |
331 | 36.9k | xd->recon_above[2] = pbi->mt_vabove_row[mb_row] + 0 * 8 + 16; |
332 | | |
333 | 36.9k | xd->recon_left[0] = pbi->mt_yleft_col[mb_row]; |
334 | 36.9k | xd->recon_left[1] = pbi->mt_uleft_col[mb_row]; |
335 | 36.9k | xd->recon_left[2] = pbi->mt_vleft_col[mb_row]; |
336 | | |
337 | | /* TODO: move to outside row loop */ |
338 | 36.9k | xd->recon_left_stride[0] = 1; |
339 | 36.9k | xd->recon_left_stride[1] = 1; |
340 | 36.9k | } else { |
341 | 3.60k | xd->recon_above[0] = dst_buffer[0] + recon_yoffset; |
342 | 3.60k | xd->recon_above[1] = dst_buffer[1] + recon_uvoffset; |
343 | 3.60k | xd->recon_above[2] = dst_buffer[2] + recon_uvoffset; |
344 | | |
345 | 3.60k | xd->recon_left[0] = xd->recon_above[0] - 1; |
346 | 3.60k | xd->recon_left[1] = xd->recon_above[1] - 1; |
347 | 3.60k | xd->recon_left[2] = xd->recon_above[2] - 1; |
348 | | |
349 | 3.60k | xd->recon_above[0] -= xd->dst.y_stride; |
350 | 3.60k | xd->recon_above[1] -= xd->dst.uv_stride; |
351 | 3.60k | xd->recon_above[2] -= xd->dst.uv_stride; |
352 | | |
353 | | /* TODO: move to outside row loop */ |
354 | 3.60k | xd->recon_left_stride[0] = xd->dst.y_stride; |
355 | 3.60k | xd->recon_left_stride[1] = xd->dst.uv_stride; |
356 | | |
357 | 3.60k | setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], |
358 | 3.60k | xd->recon_left[2], xd->dst.y_stride, |
359 | 3.60k | xd->dst.uv_stride); |
360 | 3.60k | } |
361 | | |
362 | 2.94M | for (mb_col = 0; mb_col < pc->mb_cols; ++mb_col) { |
363 | 2.90M | if (((mb_col - 1) % nsync) == 0) { |
364 | 316k | vpx_atomic_store_release(current_mb_col, mb_col - 1); |
365 | 316k | } |
366 | | |
367 | 2.90M | if (mb_row && !(mb_col & (nsync - 1))) { |
368 | 296k | vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync); |
369 | 296k | } |
370 | | |
371 | | /* Distance of MB to the various image edges. |
372 | | * These are specified to 8th pel as they are always |
373 | | * compared to values that are in 1/8th pel units. |
374 | | */ |
375 | 2.90M | xd->mb_to_left_edge = -((mb_col * 16) << 3); |
376 | 2.90M | xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; |
377 | | |
378 | | #if CONFIG_ERROR_CONCEALMENT |
379 | | { |
380 | | int corrupt_residual = |
381 | | (!pbi->independent_partitions && pbi->frame_corrupt_residual) || |
382 | | vp8dx_bool_error(xd->current_bc); |
383 | | if (pbi->ec_active && |
384 | | (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && |
385 | | corrupt_residual) { |
386 | | /* We have an intra block with corrupt |
387 | | * coefficients, better to conceal with an inter |
388 | | * block. |
389 | | * Interpolate MVs from neighboring MBs |
390 | | * |
391 | | * Note that for the first mb with corrupt |
392 | | * residual in a frame, we might not discover |
393 | | * that before decoding the residual. That |
394 | | * happens after this check, and therefore no |
395 | | * inter concealment will be done. |
396 | | */ |
397 | | vp8_interpolate_motion(xd, mb_row, mb_col, pc->mb_rows, pc->mb_cols); |
398 | | } |
399 | | } |
400 | | #endif |
401 | | |
402 | 2.90M | xd->dst.y_buffer = dst_buffer[0] + recon_yoffset; |
403 | 2.90M | xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset; |
404 | 2.90M | xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset; |
405 | | |
406 | | /* propagate errors from reference frames */ |
407 | 2.90M | xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; |
408 | | |
409 | 2.90M | if (xd->corrupted) { |
410 | | // Move current decoding marcoblock to the end of row for all rows |
411 | | // assigned to this thread, such that other threads won't be waiting. |
412 | 753k | for (; mb_row < pc->mb_rows; |
413 | 747k | mb_row += (pbi->decoding_thread_count + 1)) { |
414 | 747k | current_mb_col = &pbi->mt_current_mb_col[mb_row]; |
415 | 747k | vpx_atomic_store_release(current_mb_col, pc->mb_cols + nsync); |
416 | 747k | } |
417 | 5.77k | vpx_internal_error(&xd->error_info, VPX_CODEC_CORRUPT_FRAME, |
418 | 5.77k | "Corrupted reference frame"); |
419 | 5.77k | } |
420 | | |
421 | 2.90M | if (xd->mode_info_context->mbmi.ref_frame >= LAST_FRAME) { |
422 | 1.69M | const MV_REFERENCE_FRAME ref = xd->mode_info_context->mbmi.ref_frame; |
423 | 1.69M | xd->pre.y_buffer = ref_buffer[ref][0] + recon_yoffset; |
424 | 1.69M | xd->pre.u_buffer = ref_buffer[ref][1] + recon_uvoffset; |
425 | 1.69M | xd->pre.v_buffer = ref_buffer[ref][2] + recon_uvoffset; |
426 | 1.69M | } else { |
427 | | // ref_frame is INTRA_FRAME, pre buffer should not be used. |
428 | 1.21M | xd->pre.y_buffer = 0; |
429 | 1.21M | xd->pre.u_buffer = 0; |
430 | 1.21M | xd->pre.v_buffer = 0; |
431 | 1.21M | } |
432 | 2.90M | mt_decode_macroblock(pbi, xd, 0); |
433 | | |
434 | 2.90M | xd->left_available = 1; |
435 | | |
436 | | /* check if the boolean decoder has suffered an error */ |
437 | 2.90M | xd->corrupted |= vp8dx_bool_error(xd->current_bc); |
438 | | |
439 | 2.90M | xd->recon_above[0] += 16; |
440 | 2.90M | xd->recon_above[1] += 8; |
441 | 2.90M | xd->recon_above[2] += 8; |
442 | | |
443 | 2.90M | if (!pbi->common.filter_level) { |
444 | 42.2k | xd->recon_left[0] += 16; |
445 | 42.2k | xd->recon_left[1] += 8; |
446 | 42.2k | xd->recon_left[2] += 8; |
447 | 42.2k | } |
448 | | |
449 | 2.90M | if (pbi->common.filter_level) { |
450 | 2.75M | int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED && |
451 | 2.75M | xd->mode_info_context->mbmi.mode != SPLITMV && |
452 | 2.75M | xd->mode_info_context->mbmi.mb_skip_coeff); |
453 | | |
454 | 2.75M | const int mode_index = |
455 | 2.75M | lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode]; |
456 | 2.75M | const int seg = xd->mode_info_context->mbmi.segment_id; |
457 | 2.75M | const int ref_frame = xd->mode_info_context->mbmi.ref_frame; |
458 | | |
459 | 2.75M | filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; |
460 | | |
461 | 2.75M | if (mb_row != pc->mb_rows - 1) { |
462 | | /* Save decoded MB last row data for next-row decoding */ |
463 | 2.71M | memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col * 16), |
464 | 2.71M | (xd->dst.y_buffer + 15 * recon_y_stride), 16); |
465 | 2.71M | memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col * 8), |
466 | 2.71M | (xd->dst.u_buffer + 7 * recon_uv_stride), 8); |
467 | 2.71M | memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col * 8), |
468 | 2.71M | (xd->dst.v_buffer + 7 * recon_uv_stride), 8); |
469 | 2.71M | } |
470 | | |
471 | | /* save left_col for next MB decoding */ |
472 | 2.75M | if (mb_col != pc->mb_cols - 1) { |
473 | 2.71M | MODE_INFO *next = xd->mode_info_context + 1; |
474 | | |
475 | 2.71M | if (next->mbmi.ref_frame == INTRA_FRAME) { |
476 | 18.7M | for (i = 0; i < 16; ++i) { |
477 | 17.5M | pbi->mt_yleft_col[mb_row][i] = |
478 | 17.5M | xd->dst.y_buffer[i * recon_y_stride + 15]; |
479 | 17.5M | } |
480 | 10.0M | for (i = 0; i < 8; ++i) { |
481 | 8.91M | pbi->mt_uleft_col[mb_row][i] = |
482 | 8.91M | xd->dst.u_buffer[i * recon_uv_stride + 7]; |
483 | 8.91M | pbi->mt_vleft_col[mb_row][i] = |
484 | 8.91M | xd->dst.v_buffer[i * recon_uv_stride + 7]; |
485 | 8.91M | } |
486 | 1.14M | } |
487 | 2.71M | } |
488 | | |
489 | | /* loopfilter on this macroblock. */ |
490 | 2.75M | if (filter_level) { |
491 | 2.71M | if (pc->filter_type == NORMAL_LOOPFILTER) { |
492 | 2.65M | loop_filter_info lfi; |
493 | 2.65M | FRAME_TYPE frame_type = pc->frame_type; |
494 | 2.65M | const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; |
495 | 2.65M | lfi.mblim = lfi_n->mblim[filter_level]; |
496 | 2.65M | lfi.blim = lfi_n->blim[filter_level]; |
497 | 2.65M | lfi.lim = lfi_n->lim[filter_level]; |
498 | 2.65M | lfi.hev_thr = lfi_n->hev_thr[hev_index]; |
499 | | |
500 | 2.65M | if (mb_col > 0) |
501 | 2.63M | vp8_loop_filter_mbv(xd->dst.y_buffer, xd->dst.u_buffer, |
502 | 2.63M | xd->dst.v_buffer, recon_y_stride, |
503 | 2.63M | recon_uv_stride, &lfi); |
504 | | |
505 | 2.65M | if (!skip_lf) |
506 | 322k | vp8_loop_filter_bv(xd->dst.y_buffer, xd->dst.u_buffer, |
507 | 322k | xd->dst.v_buffer, recon_y_stride, |
508 | 322k | recon_uv_stride, &lfi); |
509 | | |
510 | | /* don't apply across umv border */ |
511 | 2.65M | if (mb_row > 0) |
512 | 2.72M | vp8_loop_filter_mbh(xd->dst.y_buffer, xd->dst.u_buffer, |
513 | 2.72M | xd->dst.v_buffer, recon_y_stride, |
514 | 2.72M | recon_uv_stride, &lfi); |
515 | | |
516 | 2.65M | if (!skip_lf) |
517 | 323k | vp8_loop_filter_bh(xd->dst.y_buffer, xd->dst.u_buffer, |
518 | 323k | xd->dst.v_buffer, recon_y_stride, |
519 | 323k | recon_uv_stride, &lfi); |
520 | 2.65M | } else { |
521 | 62.6k | if (mb_col > 0) |
522 | 58.9k | vp8_loop_filter_simple_mbv(xd->dst.y_buffer, recon_y_stride, |
523 | 58.9k | lfi_n->mblim[filter_level]); |
524 | | |
525 | 62.6k | if (!skip_lf) |
526 | 49.0k | vp8_loop_filter_simple_bv(xd->dst.y_buffer, recon_y_stride, |
527 | 49.0k | lfi_n->blim[filter_level]); |
528 | | |
529 | | /* don't apply across umv border */ |
530 | 62.6k | if (mb_row > 0) |
531 | 57.6k | vp8_loop_filter_simple_mbh(xd->dst.y_buffer, recon_y_stride, |
532 | 57.6k | lfi_n->mblim[filter_level]); |
533 | | |
534 | 62.6k | if (!skip_lf) |
535 | 49.0k | vp8_loop_filter_simple_bh(xd->dst.y_buffer, recon_y_stride, |
536 | 49.0k | lfi_n->blim[filter_level]); |
537 | 62.6k | } |
538 | 2.71M | } |
539 | 2.75M | } |
540 | | |
541 | 2.90M | recon_yoffset += 16; |
542 | 2.90M | recon_uvoffset += 8; |
543 | | |
544 | 2.90M | ++xd->mode_info_context; /* next mb */ |
545 | | |
546 | 2.90M | xd->above_context++; |
547 | 2.90M | } |
548 | | |
549 | | /* adjust to the next row of mbs */ |
550 | 40.5k | if (pbi->common.filter_level) { |
551 | 32.1k | if (mb_row != pc->mb_rows - 1) { |
552 | 30.0k | int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS; |
553 | 30.0k | int lastuv = (yv12_fb_lst->y_width >> 1) + (VP8BORDERINPIXELS >> 1); |
554 | | |
555 | 150k | for (i = 0; i < 4; ++i) { |
556 | 120k | pbi->mt_yabove_row[mb_row + 1][lasty + i] = |
557 | 120k | pbi->mt_yabove_row[mb_row + 1][lasty - 1]; |
558 | 120k | pbi->mt_uabove_row[mb_row + 1][lastuv + i] = |
559 | 120k | pbi->mt_uabove_row[mb_row + 1][lastuv - 1]; |
560 | 120k | pbi->mt_vabove_row[mb_row + 1][lastuv + i] = |
561 | 120k | pbi->mt_vabove_row[mb_row + 1][lastuv - 1]; |
562 | 120k | } |
563 | 30.0k | } |
564 | 32.1k | } else { |
565 | 8.41k | vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, |
566 | 8.41k | xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); |
567 | 8.41k | } |
568 | | |
569 | | /* last MB of row is ready just after extension is done */ |
570 | 40.5k | vpx_atomic_store_release(current_mb_col, mb_col + nsync); |
571 | | |
572 | 40.5k | ++xd->mode_info_context; /* skip prediction column */ |
573 | 40.5k | xd->up_available = 1; |
574 | | |
575 | | /* since we have multithread */ |
576 | 40.5k | xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; |
577 | 40.5k | } |
578 | | |
579 | | /* signal end of decoding of current thread for current frame */ |
580 | 10.7k | if (last_mb_row + (int)pbi->decoding_thread_count + 1 >= pc->mb_rows) |
581 | 4.95k | vp8_sem_post(&pbi->h_event_end_decoding); |
582 | 10.7k | } |
583 | | |
584 | 69.7k | static THREADFN thread_decoding_proc(void *p_data) { |
585 | 69.7k | int ithread = ((DECODETHREAD_DATA *)p_data)->ithread; |
586 | 69.7k | VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1); |
587 | 69.7k | MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2); |
588 | 69.7k | ENTROPY_CONTEXT_PLANES mb_row_left_context; |
589 | | |
590 | 75.2k | while (1) { |
591 | 75.2k | if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) break; |
592 | | |
593 | 74.7k | if (vp8_sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) { |
594 | 74.7k | if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) { |
595 | 69.1k | break; |
596 | 69.1k | } else { |
597 | 5.56k | MACROBLOCKD *xd = &mbrd->mbd; |
598 | 5.56k | xd->left_context = &mb_row_left_context; |
599 | 5.56k | if (setjmp(xd->error_info.jmp)) { |
600 | 2.64k | xd->error_info.setjmp = 0; |
601 | | // Signal the end of decoding for current thread. |
602 | 2.64k | vp8_sem_post(&pbi->h_event_end_decoding); |
603 | 2.64k | continue; |
604 | 2.64k | } |
605 | 2.91k | xd->error_info.setjmp = 1; |
606 | 2.91k | mt_decode_mb_rows(pbi, xd, ithread + 1); |
607 | 2.91k | xd->error_info.setjmp = 0; |
608 | 2.91k | } |
609 | 74.7k | } |
610 | 74.6k | } |
611 | | |
612 | 69.7k | return THREAD_EXIT_SUCCESS; |
613 | 69.7k | } |
614 | | |
615 | 10.9k | void vp8_decoder_create_threads(VP8D_COMP *pbi) { |
616 | 10.9k | int core_count = 0; |
617 | 10.9k | unsigned int ithread; |
618 | | |
619 | 10.9k | vpx_atomic_init(&pbi->b_multithreaded_rd, 0); |
620 | 10.9k | pbi->allocated_decoding_thread_count = 0; |
621 | | |
622 | | /* limit decoding threads to the max number of token partitions */ |
623 | 10.9k | core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads; |
624 | | |
625 | | /* limit decoding threads to the available cores */ |
626 | 10.9k | if (core_count > pbi->common.processor_core_count) { |
627 | 0 | core_count = pbi->common.processor_core_count; |
628 | 0 | } |
629 | | |
630 | 10.9k | if (core_count > 1) { |
631 | 10.3k | vpx_atomic_init(&pbi->b_multithreaded_rd, 1); |
632 | 10.3k | pbi->decoding_thread_count = core_count - 1; |
633 | | |
634 | 10.3k | CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count); |
635 | 10.3k | CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count); |
636 | 10.3k | CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32); |
637 | 10.3k | CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count); |
638 | | |
639 | 10.3k | if (vp8_sem_init(&pbi->h_event_end_decoding, 0, 0)) { |
640 | 0 | vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, |
641 | 0 | "Failed to initialize semaphore"); |
642 | 0 | } |
643 | | |
644 | 80.0k | for (ithread = 0; ithread < pbi->decoding_thread_count; ++ithread) { |
645 | 69.7k | if (vp8_sem_init(&pbi->h_event_start_decoding[ithread], 0, 0)) break; |
646 | | |
647 | 69.7k | vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd); |
648 | | |
649 | 69.7k | pbi->de_thread_data[ithread].ithread = ithread; |
650 | 69.7k | pbi->de_thread_data[ithread].ptr1 = (void *)pbi; |
651 | 69.7k | pbi->de_thread_data[ithread].ptr2 = (void *)&pbi->mb_row_di[ithread]; |
652 | | |
653 | 69.7k | if (pthread_create(&pbi->h_decoding_thread[ithread], 0, |
654 | 69.7k | thread_decoding_proc, &pbi->de_thread_data[ithread])) { |
655 | 0 | vp8_sem_destroy(&pbi->h_event_start_decoding[ithread]); |
656 | 0 | break; |
657 | 0 | } |
658 | 69.7k | } |
659 | | |
660 | 10.3k | pbi->allocated_decoding_thread_count = ithread; |
661 | 10.3k | if (pbi->allocated_decoding_thread_count != |
662 | 10.3k | (int)pbi->decoding_thread_count) { |
663 | | /* the remainder of cleanup cases will be handled in |
664 | | * vp8_decoder_remove_threads(). */ |
665 | 0 | if (pbi->allocated_decoding_thread_count == 0) { |
666 | 0 | vp8_sem_destroy(&pbi->h_event_end_decoding); |
667 | 0 | } |
668 | 0 | vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, |
669 | 0 | "Failed to create threads"); |
670 | 0 | } |
671 | 10.3k | } |
672 | 10.9k | } |
673 | | |
674 | 49.7k | void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) { |
675 | 49.7k | int i; |
676 | | |
677 | 49.7k | vpx_free(pbi->mt_current_mb_col); |
678 | 49.7k | pbi->mt_current_mb_col = NULL; |
679 | | |
680 | | /* Free above_row buffers. */ |
681 | 49.7k | if (pbi->mt_yabove_row) { |
682 | 4.21M | for (i = 0; i < mb_rows; ++i) { |
683 | 4.19M | vpx_free(pbi->mt_yabove_row[i]); |
684 | 4.19M | pbi->mt_yabove_row[i] = NULL; |
685 | 4.19M | } |
686 | 21.0k | vpx_free(pbi->mt_yabove_row); |
687 | 21.0k | pbi->mt_yabove_row = NULL; |
688 | 21.0k | } |
689 | | |
690 | 49.7k | if (pbi->mt_uabove_row) { |
691 | 4.21M | for (i = 0; i < mb_rows; ++i) { |
692 | 4.19M | vpx_free(pbi->mt_uabove_row[i]); |
693 | 4.19M | pbi->mt_uabove_row[i] = NULL; |
694 | 4.19M | } |
695 | 21.0k | vpx_free(pbi->mt_uabove_row); |
696 | 21.0k | pbi->mt_uabove_row = NULL; |
697 | 21.0k | } |
698 | | |
699 | 49.7k | if (pbi->mt_vabove_row) { |
700 | 4.21M | for (i = 0; i < mb_rows; ++i) { |
701 | 4.19M | vpx_free(pbi->mt_vabove_row[i]); |
702 | 4.19M | pbi->mt_vabove_row[i] = NULL; |
703 | 4.19M | } |
704 | 21.0k | vpx_free(pbi->mt_vabove_row); |
705 | 21.0k | pbi->mt_vabove_row = NULL; |
706 | 21.0k | } |
707 | | |
708 | | /* Free left_col buffers. */ |
709 | 49.7k | if (pbi->mt_yleft_col) { |
710 | 4.21M | for (i = 0; i < mb_rows; ++i) { |
711 | 4.19M | vpx_free(pbi->mt_yleft_col[i]); |
712 | 4.19M | pbi->mt_yleft_col[i] = NULL; |
713 | 4.19M | } |
714 | 21.0k | vpx_free(pbi->mt_yleft_col); |
715 | 21.0k | pbi->mt_yleft_col = NULL; |
716 | 21.0k | } |
717 | | |
718 | 49.7k | if (pbi->mt_uleft_col) { |
719 | 4.21M | for (i = 0; i < mb_rows; ++i) { |
720 | 4.19M | vpx_free(pbi->mt_uleft_col[i]); |
721 | 4.19M | pbi->mt_uleft_col[i] = NULL; |
722 | 4.19M | } |
723 | 21.0k | vpx_free(pbi->mt_uleft_col); |
724 | 21.0k | pbi->mt_uleft_col = NULL; |
725 | 21.0k | } |
726 | | |
727 | 49.7k | if (pbi->mt_vleft_col) { |
728 | 4.21M | for (i = 0; i < mb_rows; ++i) { |
729 | 4.19M | vpx_free(pbi->mt_vleft_col[i]); |
730 | 4.19M | pbi->mt_vleft_col[i] = NULL; |
731 | 4.19M | } |
732 | 21.0k | vpx_free(pbi->mt_vleft_col); |
733 | 21.0k | pbi->mt_vleft_col = NULL; |
734 | 21.0k | } |
735 | 49.7k | } |
736 | | |
737 | 21.0k | void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) { |
738 | 21.0k | VP8_COMMON *const pc = &pbi->common; |
739 | 21.0k | int i; |
740 | 21.0k | int uv_width; |
741 | | |
742 | 21.0k | if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) { |
743 | 21.0k | vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows); |
744 | | |
745 | | /* our internal buffers are always multiples of 16 */ |
746 | 21.0k | if ((width & 0xf) != 0) width += 16 - (width & 0xf); |
747 | | |
748 | 21.0k | if (width < 640) { |
749 | 9.34k | pbi->sync_range = 1; |
750 | 11.6k | } else if (width <= 1280) { |
751 | 5.88k | pbi->sync_range = 8; |
752 | 5.88k | } else if (width <= 2560) { |
753 | 1.33k | pbi->sync_range = 16; |
754 | 4.44k | } else { |
755 | 4.44k | pbi->sync_range = 32; |
756 | 4.44k | } |
757 | | |
758 | 21.0k | uv_width = width >> 1; |
759 | | |
760 | | /* Allocate a vpx_atomic_int for each mb row. */ |
761 | 21.0k | CHECK_MEM_ERROR(&pc->error, pbi->mt_current_mb_col, |
762 | 21.0k | vpx_malloc(sizeof(*pbi->mt_current_mb_col) * pc->mb_rows)); |
763 | 4.21M | for (i = 0; i < pc->mb_rows; ++i) |
764 | 4.19M | vpx_atomic_init(&pbi->mt_current_mb_col[i], 0); |
765 | | |
766 | | /* Allocate memory for above_row buffers. */ |
767 | 21.0k | CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows); |
768 | 4.21M | for (i = 0; i < pc->mb_rows; ++i) { |
769 | 4.19M | CHECK_MEM_ERROR(&pc->error, pbi->mt_yabove_row[i], |
770 | 4.19M | vpx_memalign(16, sizeof(unsigned char) * |
771 | 4.19M | (width + (VP8BORDERINPIXELS << 1)))); |
772 | 4.19M | vp8_zero_array(pbi->mt_yabove_row[i], width + (VP8BORDERINPIXELS << 1)); |
773 | 4.19M | } |
774 | | |
775 | 21.0k | CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows); |
776 | 4.21M | for (i = 0; i < pc->mb_rows; ++i) { |
777 | 4.19M | CHECK_MEM_ERROR(&pc->error, pbi->mt_uabove_row[i], |
778 | 4.19M | vpx_memalign(16, sizeof(unsigned char) * |
779 | 4.19M | (uv_width + VP8BORDERINPIXELS))); |
780 | 4.19M | vp8_zero_array(pbi->mt_uabove_row[i], uv_width + VP8BORDERINPIXELS); |
781 | 4.19M | } |
782 | | |
783 | 21.0k | CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows); |
784 | 4.21M | for (i = 0; i < pc->mb_rows; ++i) { |
785 | 4.19M | CHECK_MEM_ERROR(&pc->error, pbi->mt_vabove_row[i], |
786 | 4.19M | vpx_memalign(16, sizeof(unsigned char) * |
787 | 4.19M | (uv_width + VP8BORDERINPIXELS))); |
788 | 4.19M | vp8_zero_array(pbi->mt_vabove_row[i], uv_width + VP8BORDERINPIXELS); |
789 | 4.19M | } |
790 | | |
791 | | /* Allocate memory for left_col buffers. */ |
792 | 21.0k | CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows); |
793 | 4.21M | for (i = 0; i < pc->mb_rows; ++i) |
794 | 4.19M | CHECK_MEM_ERROR(&pc->error, pbi->mt_yleft_col[i], |
795 | 21.0k | vpx_calloc(sizeof(unsigned char) * 16, 1)); |
796 | | |
797 | 21.0k | CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows); |
798 | 4.21M | for (i = 0; i < pc->mb_rows; ++i) |
799 | 4.19M | CHECK_MEM_ERROR(&pc->error, pbi->mt_uleft_col[i], |
800 | 21.0k | vpx_calloc(sizeof(unsigned char) * 8, 1)); |
801 | | |
802 | 21.0k | CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows); |
803 | 4.21M | for (i = 0; i < pc->mb_rows; ++i) |
804 | 4.19M | CHECK_MEM_ERROR(&pc->error, pbi->mt_vleft_col[i], |
805 | 21.0k | vpx_calloc(sizeof(unsigned char) * 8, 1)); |
806 | 21.0k | } |
807 | 21.0k | } |
808 | | |
809 | 11.4k | void vp8_decoder_remove_threads(VP8D_COMP *pbi) { |
810 | | /* shutdown MB Decoding thread; */ |
811 | 11.4k | if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) { |
812 | 10.3k | int i; |
813 | 10.3k | vpx_atomic_store_release(&pbi->b_multithreaded_rd, 0); |
814 | | |
815 | | /* allow all threads to exit */ |
816 | 80.0k | for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) { |
817 | 69.7k | vp8_sem_post(&pbi->h_event_start_decoding[i]); |
818 | 69.7k | pthread_join(pbi->h_decoding_thread[i], NULL); |
819 | 69.7k | } |
820 | | |
821 | 80.0k | for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) { |
822 | 69.7k | vp8_sem_destroy(&pbi->h_event_start_decoding[i]); |
823 | 69.7k | } |
824 | | |
825 | 10.3k | if (pbi->allocated_decoding_thread_count) { |
826 | 10.3k | vp8_sem_destroy(&pbi->h_event_end_decoding); |
827 | 10.3k | } |
828 | | |
829 | 10.3k | vpx_free(pbi->h_decoding_thread); |
830 | 10.3k | pbi->h_decoding_thread = NULL; |
831 | | |
832 | 10.3k | vpx_free(pbi->h_event_start_decoding); |
833 | 10.3k | pbi->h_event_start_decoding = NULL; |
834 | | |
835 | 10.3k | vpx_free(pbi->mb_row_di); |
836 | 10.3k | pbi->mb_row_di = NULL; |
837 | | |
838 | 10.3k | vpx_free(pbi->de_thread_data); |
839 | 10.3k | pbi->de_thread_data = NULL; |
840 | | |
841 | 10.3k | vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows); |
842 | 10.3k | } |
843 | 11.4k | } |
844 | | |
845 | 5.16k | int vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd) { |
846 | 5.16k | VP8_COMMON *pc = &pbi->common; |
847 | 5.16k | unsigned int i; |
848 | 5.16k | int j; |
849 | | |
850 | 5.16k | int filter_level = pc->filter_level; |
851 | 5.16k | YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; |
852 | | |
853 | 5.16k | if (filter_level) { |
854 | | /* Set above_row buffer to 127 for decoding first MB row */ |
855 | 4.57k | memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS - 1, 127, |
856 | 4.57k | yv12_fb_new->y_width + 5); |
857 | 4.57k | memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS >> 1) - 1, 127, |
858 | 4.57k | (yv12_fb_new->y_width >> 1) + 5); |
859 | 4.57k | memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS >> 1) - 1, 127, |
860 | 4.57k | (yv12_fb_new->y_width >> 1) + 5); |
861 | | |
862 | 680k | for (j = 1; j < pc->mb_rows; ++j) { |
863 | 676k | memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS - 1, (unsigned char)129, |
864 | 676k | 1); |
865 | 676k | memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS >> 1) - 1, |
866 | 676k | (unsigned char)129, 1); |
867 | 676k | memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS >> 1) - 1, |
868 | 676k | (unsigned char)129, 1); |
869 | 676k | } |
870 | | |
871 | | /* Set left_col to 129 initially */ |
872 | 685k | for (j = 0; j < pc->mb_rows; ++j) { |
873 | 680k | memset(pbi->mt_yleft_col[j], (unsigned char)129, 16); |
874 | 680k | memset(pbi->mt_uleft_col[j], (unsigned char)129, 8); |
875 | 680k | memset(pbi->mt_vleft_col[j], (unsigned char)129, 8); |
876 | 680k | } |
877 | | |
878 | | /* Initialize the loop filter for this frame. */ |
879 | 4.57k | vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level); |
880 | 4.57k | } else { |
881 | 586 | vp8_setup_intra_recon_top_line(yv12_fb_new); |
882 | 586 | } |
883 | | |
884 | 5.16k | setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, |
885 | 5.16k | pbi->decoding_thread_count); |
886 | | |
887 | 10.7k | for (i = 0; i < pbi->decoding_thread_count; ++i) { |
888 | 5.56k | vp8_sem_post(&pbi->h_event_start_decoding[i]); |
889 | 5.56k | } |
890 | | |
891 | 5.16k | if (setjmp(xd->error_info.jmp)) { |
892 | 3.12k | xd->error_info.setjmp = 0; |
893 | 3.12k | xd->corrupted = 1; |
894 | | // Wait for other threads to finish. This prevents other threads decoding |
895 | | // the current frame while the main thread starts decoding the next frame, |
896 | | // which causes a data race. |
897 | 5.87k | for (i = 0; i < pbi->decoding_thread_count; ++i) |
898 | 2.75k | vp8_sem_wait(&pbi->h_event_end_decoding); |
899 | 3.12k | return -1; |
900 | 3.12k | } |
901 | | |
902 | 2.03k | xd->error_info.setjmp = 1; |
903 | 2.03k | mt_decode_mb_rows(pbi, xd, 0); |
904 | 2.03k | xd->error_info.setjmp = 0; |
905 | | |
906 | 6.88k | for (i = 0; i < pbi->decoding_thread_count + 1; ++i) |
907 | 4.85k | vp8_sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ |
908 | | |
909 | 2.03k | return 0; |
910 | 5.16k | } |