/src/libvpx/vp8/decoder/threading.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include "vpx_config.h" |
12 | | #include "vp8_rtcd.h" |
13 | | #if !defined(_WIN32) && CONFIG_OS_SUPPORT == 1 |
14 | | #include <unistd.h> |
15 | | #endif |
16 | | #include "onyxd_int.h" |
17 | | #include "vpx_mem/vpx_mem.h" |
18 | | #include "vpx_util/vpx_pthread.h" |
19 | | #include "vp8/common/common.h" |
20 | | #include "vp8/common/threading.h" |
21 | | #include "vp8/common/loopfilter.h" |
22 | | #include "vp8/common/extend.h" |
23 | | #include "vpx_ports/vpx_timer.h" |
24 | | #include "decoderthreading.h" |
25 | | #include "detokenize.h" |
26 | | #include "vp8/common/reconintra4x4.h" |
27 | | #include "vp8/common/reconinter.h" |
28 | | #include "vp8/common/reconintra.h" |
29 | | #include "vp8/common/setupintrarecon.h" |
30 | | #if CONFIG_ERROR_CONCEALMENT |
31 | | #include "error_concealment.h" |
32 | | #endif |
33 | | |
34 | | #define CALLOC_ARRAY(p, n) \ |
35 | 0 | CHECK_MEM_ERROR(&pbi->common.error, (p), vpx_calloc(sizeof(*(p)), (n))) |
36 | | #define CALLOC_ARRAY_ALIGNED(p, n, algn) \ |
37 | 0 | do { \ |
38 | 0 | CHECK_MEM_ERROR(&pbi->common.error, (p), \ |
39 | 0 | vpx_memalign((algn), sizeof(*(p)) * (n))); \ |
40 | 0 | memset((p), 0, (n) * sizeof(*(p))); \ |
41 | 0 | } while (0) |
42 | | |
43 | | static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, |
44 | 0 | MB_ROW_DEC *mbrd, int count) { |
45 | 0 | VP8_COMMON *const pc = &pbi->common; |
46 | 0 | int i; |
47 | |
|
48 | 0 | for (i = 0; i < count; ++i) { |
49 | 0 | MACROBLOCKD *mbd = &mbrd[i].mbd; |
50 | 0 | mbd->subpixel_predict = xd->subpixel_predict; |
51 | 0 | mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; |
52 | 0 | mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; |
53 | 0 | mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; |
54 | |
|
55 | 0 | mbd->frame_type = pc->frame_type; |
56 | 0 | mbd->pre = xd->pre; |
57 | 0 | mbd->dst = xd->dst; |
58 | |
|
59 | 0 | mbd->segmentation_enabled = xd->segmentation_enabled; |
60 | 0 | mbd->mb_segment_abs_delta = xd->mb_segment_abs_delta; |
61 | 0 | memcpy(mbd->segment_feature_data, xd->segment_feature_data, |
62 | 0 | sizeof(xd->segment_feature_data)); |
63 | | |
64 | | /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/ |
65 | 0 | memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas)); |
66 | | /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/ |
67 | 0 | memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas)); |
68 | | /*unsigned char mode_ref_lf_delta_enabled; |
69 | | unsigned char mode_ref_lf_delta_update;*/ |
70 | 0 | mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled; |
71 | 0 | mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update; |
72 | |
|
73 | 0 | mbd->current_bc = &pbi->mbc[0]; |
74 | |
|
75 | 0 | memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); |
76 | 0 | memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); |
77 | 0 | memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); |
78 | 0 | memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); |
79 | |
|
80 | 0 | mbd->fullpixel_mask = ~0; |
81 | |
|
82 | 0 | if (pc->full_pixel) mbd->fullpixel_mask = ~7; |
83 | 0 | } |
84 | |
|
85 | 0 | for (i = 0; i < pc->mb_rows; ++i) |
86 | 0 | vpx_atomic_store_release(&pbi->mt_current_mb_col[i], -1); |
87 | 0 | } |
88 | | |
89 | | static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, |
90 | 0 | unsigned int mb_idx) { |
91 | 0 | MB_PREDICTION_MODE mode; |
92 | 0 | int i; |
93 | | #if CONFIG_ERROR_CONCEALMENT |
94 | | int corruption_detected = 0; |
95 | | #else |
96 | 0 | (void)mb_idx; |
97 | 0 | #endif |
98 | |
|
99 | 0 | if (xd->mode_info_context->mbmi.mb_skip_coeff) { |
100 | 0 | vp8_reset_mb_tokens_context(xd); |
101 | 0 | } else if (!vp8dx_bool_error(xd->current_bc)) { |
102 | 0 | int eobtotal; |
103 | 0 | eobtotal = vp8_decode_mb_tokens(pbi, xd); |
104 | | |
105 | | /* Special case: Force the loopfilter to skip when eobtotal is zero */ |
106 | 0 | xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal == 0); |
107 | 0 | } |
108 | |
|
109 | 0 | mode = xd->mode_info_context->mbmi.mode; |
110 | |
|
111 | 0 | if (xd->segmentation_enabled) vp8_mb_init_dequantizer(pbi, xd); |
112 | |
|
113 | | #if CONFIG_ERROR_CONCEALMENT |
114 | | |
115 | | if (pbi->ec_active) { |
116 | | int throw_residual; |
117 | | /* When we have independent partitions we can apply residual even |
118 | | * though other partitions within the frame are corrupt. |
119 | | */ |
120 | | throw_residual = |
121 | | (!pbi->independent_partitions && pbi->frame_corrupt_residual); |
122 | | throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc)); |
123 | | |
124 | | if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) { |
125 | | /* MB with corrupt residuals or corrupt mode/motion vectors. |
126 | | * Better to use the predictor as reconstruction. |
127 | | */ |
128 | | pbi->frame_corrupt_residual = 1; |
129 | | memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); |
130 | | |
131 | | corruption_detected = 1; |
132 | | |
133 | | /* force idct to be skipped for B_PRED and use the |
134 | | * prediction only for reconstruction |
135 | | * */ |
136 | | memset(xd->eobs, 0, 25); |
137 | | } |
138 | | } |
139 | | #endif |
140 | | |
141 | | /* do prediction */ |
142 | 0 | if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { |
143 | 0 | vp8_build_intra_predictors_mbuv_s( |
144 | 0 | xd, xd->recon_above[1], xd->recon_above[2], xd->recon_left[1], |
145 | 0 | xd->recon_left[2], xd->recon_left_stride[1], xd->dst.u_buffer, |
146 | 0 | xd->dst.v_buffer, xd->dst.uv_stride); |
147 | |
|
148 | 0 | if (mode != B_PRED) { |
149 | 0 | vp8_build_intra_predictors_mby_s( |
150 | 0 | xd, xd->recon_above[0], xd->recon_left[0], xd->recon_left_stride[0], |
151 | 0 | xd->dst.y_buffer, xd->dst.y_stride); |
152 | 0 | } else { |
153 | 0 | short *DQC = xd->dequant_y1; |
154 | 0 | int dst_stride = xd->dst.y_stride; |
155 | | |
156 | | /* clear out residual eob info */ |
157 | 0 | if (xd->mode_info_context->mbmi.mb_skip_coeff) memset(xd->eobs, 0, 25); |
158 | |
|
159 | 0 | intra_prediction_down_copy(xd, xd->recon_above[0] + 16); |
160 | |
|
161 | 0 | for (i = 0; i < 16; ++i) { |
162 | 0 | BLOCKD *b = &xd->block[i]; |
163 | 0 | unsigned char *dst = xd->dst.y_buffer + b->offset; |
164 | 0 | B_PREDICTION_MODE b_mode = xd->mode_info_context->bmi[i].as_mode; |
165 | 0 | unsigned char *Above; |
166 | 0 | unsigned char *yleft; |
167 | 0 | int left_stride; |
168 | 0 | unsigned char top_left; |
169 | | |
170 | | /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 |
171 | | * above-right).*/ |
172 | 0 | if (i < 4 && pbi->common.filter_level) { |
173 | 0 | Above = xd->recon_above[0] + b->offset; |
174 | 0 | } else { |
175 | 0 | Above = dst - dst_stride; |
176 | 0 | } |
177 | |
|
178 | 0 | if (i % 4 == 0 && pbi->common.filter_level) { |
179 | 0 | yleft = xd->recon_left[0] + i; |
180 | 0 | left_stride = 1; |
181 | 0 | } else { |
182 | 0 | yleft = dst - 1; |
183 | 0 | left_stride = dst_stride; |
184 | 0 | } |
185 | |
|
186 | 0 | if ((i == 4 || i == 8 || i == 12) && pbi->common.filter_level) { |
187 | 0 | top_left = *(xd->recon_left[0] + i - 1); |
188 | 0 | } else { |
189 | 0 | top_left = Above[-1]; |
190 | 0 | } |
191 | |
|
192 | 0 | vp8_intra4x4_predict(Above, yleft, left_stride, b_mode, dst, dst_stride, |
193 | 0 | top_left); |
194 | |
|
195 | 0 | if (xd->eobs[i]) { |
196 | 0 | if (xd->eobs[i] > 1) { |
197 | 0 | vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); |
198 | 0 | } else { |
199 | 0 | vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], dst, dst_stride, dst, |
200 | 0 | dst_stride); |
201 | 0 | memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); |
202 | 0 | } |
203 | 0 | } |
204 | 0 | } |
205 | 0 | } |
206 | 0 | } else { |
207 | 0 | vp8_build_inter_predictors_mb(xd); |
208 | 0 | } |
209 | |
|
210 | | #if CONFIG_ERROR_CONCEALMENT |
211 | | if (corruption_detected) { |
212 | | return; |
213 | | } |
214 | | #endif |
215 | |
|
216 | 0 | if (!xd->mode_info_context->mbmi.mb_skip_coeff) { |
217 | | /* dequantization and idct */ |
218 | 0 | if (mode != B_PRED) { |
219 | 0 | short *DQC = xd->dequant_y1; |
220 | |
|
221 | 0 | if (mode != SPLITMV) { |
222 | 0 | BLOCKD *b = &xd->block[24]; |
223 | | |
224 | | /* do 2nd order transform on the dc block */ |
225 | 0 | if (xd->eobs[24] > 1) { |
226 | 0 | vp8_dequantize_b(b, xd->dequant_y2); |
227 | |
|
228 | 0 | vp8_short_inv_walsh4x4(&b->dqcoeff[0], xd->qcoeff); |
229 | 0 | memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); |
230 | 0 | } else { |
231 | 0 | b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; |
232 | 0 | vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], xd->qcoeff); |
233 | 0 | memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); |
234 | 0 | } |
235 | | |
236 | | /* override the dc dequant constant in order to preserve the |
237 | | * dc components |
238 | | */ |
239 | 0 | DQC = xd->dequant_y1_dc; |
240 | 0 | } |
241 | |
|
242 | 0 | vp8_dequant_idct_add_y_block(xd->qcoeff, DQC, xd->dst.y_buffer, |
243 | 0 | xd->dst.y_stride, xd->eobs); |
244 | 0 | } |
245 | |
|
246 | 0 | vp8_dequant_idct_add_uv_block(xd->qcoeff + 16 * 16, xd->dequant_uv, |
247 | 0 | xd->dst.u_buffer, xd->dst.v_buffer, |
248 | 0 | xd->dst.uv_stride, xd->eobs + 16); |
249 | 0 | } |
250 | 0 | } |
251 | | |
252 | | static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, |
253 | 0 | int start_mb_row) { |
254 | 0 | const vpx_atomic_int *last_row_current_mb_col; |
255 | 0 | vpx_atomic_int *current_mb_col; |
256 | 0 | int mb_row; |
257 | 0 | VP8_COMMON *pc = &pbi->common; |
258 | 0 | const int nsync = pbi->sync_range; |
259 | 0 | const vpx_atomic_int first_row_no_sync_above = |
260 | 0 | VPX_ATOMIC_INIT(pc->mb_cols + nsync); |
261 | 0 | int num_part = 1 << pbi->common.multi_token_partition; |
262 | 0 | int last_mb_row = start_mb_row; |
263 | |
|
264 | 0 | YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; |
265 | 0 | YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME]; |
266 | |
|
267 | 0 | int recon_y_stride = yv12_fb_new->y_stride; |
268 | 0 | int recon_uv_stride = yv12_fb_new->uv_stride; |
269 | |
|
270 | 0 | unsigned char *ref_buffer[MAX_REF_FRAMES][3]; |
271 | 0 | unsigned char *dst_buffer[3]; |
272 | 0 | int i; |
273 | 0 | int ref_fb_corrupted[MAX_REF_FRAMES]; |
274 | |
|
275 | 0 | ref_fb_corrupted[INTRA_FRAME] = 0; |
276 | |
|
277 | 0 | for (i = 1; i < MAX_REF_FRAMES; ++i) { |
278 | 0 | YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; |
279 | |
|
280 | 0 | ref_buffer[i][0] = this_fb->y_buffer; |
281 | 0 | ref_buffer[i][1] = this_fb->u_buffer; |
282 | 0 | ref_buffer[i][2] = this_fb->v_buffer; |
283 | |
|
284 | 0 | ref_fb_corrupted[i] = this_fb->corrupted; |
285 | 0 | } |
286 | |
|
287 | 0 | dst_buffer[0] = yv12_fb_new->y_buffer; |
288 | 0 | dst_buffer[1] = yv12_fb_new->u_buffer; |
289 | 0 | dst_buffer[2] = yv12_fb_new->v_buffer; |
290 | |
|
291 | 0 | xd->up_available = (start_mb_row != 0); |
292 | |
|
293 | 0 | xd->mode_info_context = pc->mi + pc->mode_info_stride * start_mb_row; |
294 | 0 | xd->mode_info_stride = pc->mode_info_stride; |
295 | |
|
296 | 0 | for (mb_row = start_mb_row; mb_row < pc->mb_rows; |
297 | 0 | mb_row += (pbi->decoding_thread_count + 1)) { |
298 | 0 | int recon_yoffset, recon_uvoffset; |
299 | 0 | int mb_col; |
300 | 0 | int filter_level; |
301 | 0 | loop_filter_info_n *lfi_n = &pc->lf_info; |
302 | | |
303 | | /* save last row processed by this thread */ |
304 | 0 | last_mb_row = mb_row; |
305 | | /* select bool coder for current partition */ |
306 | 0 | xd->current_bc = &pbi->mbc[mb_row % num_part]; |
307 | |
|
308 | 0 | if (mb_row > 0) { |
309 | 0 | last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row - 1]; |
310 | 0 | } else { |
311 | 0 | last_row_current_mb_col = &first_row_no_sync_above; |
312 | 0 | } |
313 | |
|
314 | 0 | current_mb_col = &pbi->mt_current_mb_col[mb_row]; |
315 | |
|
316 | 0 | recon_yoffset = mb_row * recon_y_stride * 16; |
317 | 0 | recon_uvoffset = mb_row * recon_uv_stride * 8; |
318 | | |
319 | | /* reset contexts */ |
320 | 0 | xd->above_context = pc->above_context; |
321 | 0 | memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); |
322 | |
|
323 | 0 | xd->left_available = 0; |
324 | |
|
325 | 0 | xd->mb_to_top_edge = -((mb_row * 16) << 3); |
326 | 0 | xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; |
327 | |
|
328 | 0 | if (pbi->common.filter_level) { |
329 | 0 | xd->recon_above[0] = pbi->mt_yabove_row[mb_row] + 0 * 16 + 32; |
330 | 0 | xd->recon_above[1] = pbi->mt_uabove_row[mb_row] + 0 * 8 + 16; |
331 | 0 | xd->recon_above[2] = pbi->mt_vabove_row[mb_row] + 0 * 8 + 16; |
332 | |
|
333 | 0 | xd->recon_left[0] = pbi->mt_yleft_col[mb_row]; |
334 | 0 | xd->recon_left[1] = pbi->mt_uleft_col[mb_row]; |
335 | 0 | xd->recon_left[2] = pbi->mt_vleft_col[mb_row]; |
336 | | |
337 | | /* TODO: move to outside row loop */ |
338 | 0 | xd->recon_left_stride[0] = 1; |
339 | 0 | xd->recon_left_stride[1] = 1; |
340 | 0 | } else { |
341 | 0 | xd->recon_above[0] = dst_buffer[0] + recon_yoffset; |
342 | 0 | xd->recon_above[1] = dst_buffer[1] + recon_uvoffset; |
343 | 0 | xd->recon_above[2] = dst_buffer[2] + recon_uvoffset; |
344 | |
|
345 | 0 | xd->recon_left[0] = xd->recon_above[0] - 1; |
346 | 0 | xd->recon_left[1] = xd->recon_above[1] - 1; |
347 | 0 | xd->recon_left[2] = xd->recon_above[2] - 1; |
348 | |
|
349 | 0 | xd->recon_above[0] -= xd->dst.y_stride; |
350 | 0 | xd->recon_above[1] -= xd->dst.uv_stride; |
351 | 0 | xd->recon_above[2] -= xd->dst.uv_stride; |
352 | | |
353 | | /* TODO: move to outside row loop */ |
354 | 0 | xd->recon_left_stride[0] = xd->dst.y_stride; |
355 | 0 | xd->recon_left_stride[1] = xd->dst.uv_stride; |
356 | |
|
357 | 0 | setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], |
358 | 0 | xd->recon_left[2], xd->dst.y_stride, |
359 | 0 | xd->dst.uv_stride); |
360 | 0 | } |
361 | |
|
362 | 0 | for (mb_col = 0; mb_col < pc->mb_cols; ++mb_col) { |
363 | 0 | if (((mb_col - 1) % nsync) == 0) { |
364 | 0 | vpx_atomic_store_release(current_mb_col, mb_col - 1); |
365 | 0 | } |
366 | |
|
367 | 0 | if (mb_row && !(mb_col & (nsync - 1))) { |
368 | 0 | vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync); |
369 | 0 | } |
370 | | |
371 | | /* Distance of MB to the various image edges. |
372 | | * These are specified to 8th pel as they are always |
373 | | * compared to values that are in 1/8th pel units. |
374 | | */ |
375 | 0 | xd->mb_to_left_edge = -((mb_col * 16) << 3); |
376 | 0 | xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; |
377 | |
|
378 | | #if CONFIG_ERROR_CONCEALMENT |
379 | | { |
380 | | int corrupt_residual = |
381 | | (!pbi->independent_partitions && pbi->frame_corrupt_residual) || |
382 | | vp8dx_bool_error(xd->current_bc); |
383 | | if (pbi->ec_active && |
384 | | (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && |
385 | | corrupt_residual) { |
386 | | /* We have an intra block with corrupt |
387 | | * coefficients, better to conceal with an inter |
388 | | * block. |
389 | | * Interpolate MVs from neighboring MBs |
390 | | * |
391 | | * Note that for the first mb with corrupt |
392 | | * residual in a frame, we might not discover |
393 | | * that before decoding the residual. That |
394 | | * happens after this check, and therefore no |
395 | | * inter concealment will be done. |
396 | | */ |
397 | | vp8_interpolate_motion(xd, mb_row, mb_col, pc->mb_rows, pc->mb_cols); |
398 | | } |
399 | | } |
400 | | #endif |
401 | |
|
402 | 0 | xd->dst.y_buffer = dst_buffer[0] + recon_yoffset; |
403 | 0 | xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset; |
404 | 0 | xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset; |
405 | | |
406 | | /* propagate errors from reference frames */ |
407 | 0 | xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; |
408 | |
|
409 | 0 | if (xd->corrupted) { |
410 | | // Move current decoding marcoblock to the end of row for all rows |
411 | | // assigned to this thread, such that other threads won't be waiting. |
412 | 0 | for (; mb_row < pc->mb_rows; |
413 | 0 | mb_row += (pbi->decoding_thread_count + 1)) { |
414 | 0 | current_mb_col = &pbi->mt_current_mb_col[mb_row]; |
415 | 0 | vpx_atomic_store_release(current_mb_col, pc->mb_cols + nsync); |
416 | 0 | } |
417 | 0 | vpx_internal_error(&xd->error_info, VPX_CODEC_CORRUPT_FRAME, |
418 | 0 | "Corrupted reference frame"); |
419 | 0 | } |
420 | |
|
421 | 0 | if (xd->mode_info_context->mbmi.ref_frame >= LAST_FRAME) { |
422 | 0 | const MV_REFERENCE_FRAME ref = xd->mode_info_context->mbmi.ref_frame; |
423 | 0 | xd->pre.y_buffer = ref_buffer[ref][0] + recon_yoffset; |
424 | 0 | xd->pre.u_buffer = ref_buffer[ref][1] + recon_uvoffset; |
425 | 0 | xd->pre.v_buffer = ref_buffer[ref][2] + recon_uvoffset; |
426 | 0 | } else { |
427 | | // ref_frame is INTRA_FRAME, pre buffer should not be used. |
428 | 0 | xd->pre.y_buffer = 0; |
429 | 0 | xd->pre.u_buffer = 0; |
430 | 0 | xd->pre.v_buffer = 0; |
431 | 0 | } |
432 | 0 | mt_decode_macroblock(pbi, xd, 0); |
433 | |
|
434 | 0 | xd->left_available = 1; |
435 | | |
436 | | /* check if the boolean decoder has suffered an error */ |
437 | 0 | xd->corrupted |= vp8dx_bool_error(xd->current_bc); |
438 | |
|
439 | 0 | xd->recon_above[0] += 16; |
440 | 0 | xd->recon_above[1] += 8; |
441 | 0 | xd->recon_above[2] += 8; |
442 | |
|
443 | 0 | if (!pbi->common.filter_level) { |
444 | 0 | xd->recon_left[0] += 16; |
445 | 0 | xd->recon_left[1] += 8; |
446 | 0 | xd->recon_left[2] += 8; |
447 | 0 | } |
448 | |
|
449 | 0 | if (pbi->common.filter_level) { |
450 | 0 | int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED && |
451 | 0 | xd->mode_info_context->mbmi.mode != SPLITMV && |
452 | 0 | xd->mode_info_context->mbmi.mb_skip_coeff); |
453 | |
|
454 | 0 | const int mode_index = |
455 | 0 | lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode]; |
456 | 0 | const int seg = xd->mode_info_context->mbmi.segment_id; |
457 | 0 | const int ref_frame = xd->mode_info_context->mbmi.ref_frame; |
458 | |
|
459 | 0 | filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; |
460 | |
|
461 | 0 | if (mb_row != pc->mb_rows - 1) { |
462 | | /* Save decoded MB last row data for next-row decoding */ |
463 | 0 | memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col * 16), |
464 | 0 | (xd->dst.y_buffer + 15 * recon_y_stride), 16); |
465 | 0 | memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col * 8), |
466 | 0 | (xd->dst.u_buffer + 7 * recon_uv_stride), 8); |
467 | 0 | memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col * 8), |
468 | 0 | (xd->dst.v_buffer + 7 * recon_uv_stride), 8); |
469 | 0 | } |
470 | | |
471 | | /* save left_col for next MB decoding */ |
472 | 0 | if (mb_col != pc->mb_cols - 1) { |
473 | 0 | MODE_INFO *next = xd->mode_info_context + 1; |
474 | |
|
475 | 0 | if (next->mbmi.ref_frame == INTRA_FRAME) { |
476 | 0 | for (i = 0; i < 16; ++i) { |
477 | 0 | pbi->mt_yleft_col[mb_row][i] = |
478 | 0 | xd->dst.y_buffer[i * recon_y_stride + 15]; |
479 | 0 | } |
480 | 0 | for (i = 0; i < 8; ++i) { |
481 | 0 | pbi->mt_uleft_col[mb_row][i] = |
482 | 0 | xd->dst.u_buffer[i * recon_uv_stride + 7]; |
483 | 0 | pbi->mt_vleft_col[mb_row][i] = |
484 | 0 | xd->dst.v_buffer[i * recon_uv_stride + 7]; |
485 | 0 | } |
486 | 0 | } |
487 | 0 | } |
488 | | |
489 | | /* loopfilter on this macroblock. */ |
490 | 0 | if (filter_level) { |
491 | 0 | if (pc->filter_type == NORMAL_LOOPFILTER) { |
492 | 0 | loop_filter_info lfi; |
493 | 0 | FRAME_TYPE frame_type = pc->frame_type; |
494 | 0 | const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; |
495 | 0 | lfi.mblim = lfi_n->mblim[filter_level]; |
496 | 0 | lfi.blim = lfi_n->blim[filter_level]; |
497 | 0 | lfi.lim = lfi_n->lim[filter_level]; |
498 | 0 | lfi.hev_thr = lfi_n->hev_thr[hev_index]; |
499 | |
|
500 | 0 | if (mb_col > 0) |
501 | 0 | vp8_loop_filter_mbv(xd->dst.y_buffer, xd->dst.u_buffer, |
502 | 0 | xd->dst.v_buffer, recon_y_stride, |
503 | 0 | recon_uv_stride, &lfi); |
504 | |
|
505 | 0 | if (!skip_lf) |
506 | 0 | vp8_loop_filter_bv(xd->dst.y_buffer, xd->dst.u_buffer, |
507 | 0 | xd->dst.v_buffer, recon_y_stride, |
508 | 0 | recon_uv_stride, &lfi); |
509 | | |
510 | | /* don't apply across umv border */ |
511 | 0 | if (mb_row > 0) |
512 | 0 | vp8_loop_filter_mbh(xd->dst.y_buffer, xd->dst.u_buffer, |
513 | 0 | xd->dst.v_buffer, recon_y_stride, |
514 | 0 | recon_uv_stride, &lfi); |
515 | |
|
516 | 0 | if (!skip_lf) |
517 | 0 | vp8_loop_filter_bh(xd->dst.y_buffer, xd->dst.u_buffer, |
518 | 0 | xd->dst.v_buffer, recon_y_stride, |
519 | 0 | recon_uv_stride, &lfi); |
520 | 0 | } else { |
521 | 0 | if (mb_col > 0) |
522 | 0 | vp8_loop_filter_simple_mbv(xd->dst.y_buffer, recon_y_stride, |
523 | 0 | lfi_n->mblim[filter_level]); |
524 | |
|
525 | 0 | if (!skip_lf) |
526 | 0 | vp8_loop_filter_simple_bv(xd->dst.y_buffer, recon_y_stride, |
527 | 0 | lfi_n->blim[filter_level]); |
528 | | |
529 | | /* don't apply across umv border */ |
530 | 0 | if (mb_row > 0) |
531 | 0 | vp8_loop_filter_simple_mbh(xd->dst.y_buffer, recon_y_stride, |
532 | 0 | lfi_n->mblim[filter_level]); |
533 | |
|
534 | 0 | if (!skip_lf) |
535 | 0 | vp8_loop_filter_simple_bh(xd->dst.y_buffer, recon_y_stride, |
536 | 0 | lfi_n->blim[filter_level]); |
537 | 0 | } |
538 | 0 | } |
539 | 0 | } |
540 | |
|
541 | 0 | recon_yoffset += 16; |
542 | 0 | recon_uvoffset += 8; |
543 | |
|
544 | 0 | ++xd->mode_info_context; /* next mb */ |
545 | |
|
546 | 0 | xd->above_context++; |
547 | 0 | } |
548 | | |
549 | | /* adjust to the next row of mbs */ |
550 | 0 | if (pbi->common.filter_level) { |
551 | 0 | if (mb_row != pc->mb_rows - 1) { |
552 | 0 | int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS; |
553 | 0 | int lastuv = (yv12_fb_lst->y_width >> 1) + (VP8BORDERINPIXELS >> 1); |
554 | |
|
555 | 0 | for (i = 0; i < 4; ++i) { |
556 | 0 | pbi->mt_yabove_row[mb_row + 1][lasty + i] = |
557 | 0 | pbi->mt_yabove_row[mb_row + 1][lasty - 1]; |
558 | 0 | pbi->mt_uabove_row[mb_row + 1][lastuv + i] = |
559 | 0 | pbi->mt_uabove_row[mb_row + 1][lastuv - 1]; |
560 | 0 | pbi->mt_vabove_row[mb_row + 1][lastuv + i] = |
561 | 0 | pbi->mt_vabove_row[mb_row + 1][lastuv - 1]; |
562 | 0 | } |
563 | 0 | } |
564 | 0 | } else { |
565 | 0 | vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, |
566 | 0 | xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); |
567 | 0 | } |
568 | | |
569 | | /* last MB of row is ready just after extension is done */ |
570 | 0 | vpx_atomic_store_release(current_mb_col, mb_col + nsync); |
571 | |
|
572 | 0 | ++xd->mode_info_context; /* skip prediction column */ |
573 | 0 | xd->up_available = 1; |
574 | | |
575 | | /* since we have multithread */ |
576 | 0 | xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; |
577 | 0 | } |
578 | | |
579 | | /* signal end of decoding of current thread for current frame */ |
580 | 0 | if (last_mb_row + (int)pbi->decoding_thread_count + 1 >= pc->mb_rows) |
581 | 0 | vp8_sem_post(&pbi->h_event_end_decoding); |
582 | 0 | } |
583 | | |
584 | 0 | static THREADFN thread_decoding_proc(void *p_data) { |
585 | 0 | int ithread = ((DECODETHREAD_DATA *)p_data)->ithread; |
586 | 0 | VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1); |
587 | 0 | MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2); |
588 | 0 | ENTROPY_CONTEXT_PLANES mb_row_left_context; |
589 | |
|
590 | 0 | while (1) { |
591 | 0 | if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) break; |
592 | | |
593 | 0 | if (vp8_sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) { |
594 | 0 | if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) { |
595 | 0 | break; |
596 | 0 | } else { |
597 | 0 | MACROBLOCKD *xd = &mbrd->mbd; |
598 | 0 | xd->left_context = &mb_row_left_context; |
599 | 0 | if (setjmp(xd->error_info.jmp)) { |
600 | 0 | xd->error_info.setjmp = 0; |
601 | | // Signal the end of decoding for current thread. |
602 | 0 | vp8_sem_post(&pbi->h_event_end_decoding); |
603 | 0 | continue; |
604 | 0 | } |
605 | 0 | xd->error_info.setjmp = 1; |
606 | 0 | mt_decode_mb_rows(pbi, xd, ithread + 1); |
607 | 0 | xd->error_info.setjmp = 0; |
608 | 0 | } |
609 | 0 | } |
610 | 0 | } |
611 | |
|
612 | 0 | return THREAD_EXIT_SUCCESS; |
613 | 0 | } |
614 | | |
615 | 5.20k | void vp8_decoder_create_threads(VP8D_COMP *pbi) { |
616 | 5.20k | int core_count = 0; |
617 | 5.20k | unsigned int ithread; |
618 | | |
619 | 5.20k | vpx_atomic_init(&pbi->b_multithreaded_rd, 0); |
620 | 5.20k | pbi->allocated_decoding_thread_count = 0; |
621 | | |
622 | | /* limit decoding threads to the max number of token partitions */ |
623 | 5.20k | core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads; |
624 | | |
625 | | /* limit decoding threads to the available cores */ |
626 | 5.20k | if (core_count > pbi->common.processor_core_count) { |
627 | 0 | core_count = pbi->common.processor_core_count; |
628 | 0 | } |
629 | | |
630 | 5.20k | if (core_count > 1) { |
631 | 0 | vpx_atomic_init(&pbi->b_multithreaded_rd, 1); |
632 | 0 | pbi->decoding_thread_count = core_count - 1; |
633 | |
|
634 | 0 | CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count); |
635 | 0 | CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count); |
636 | 0 | CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32); |
637 | 0 | CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count); |
638 | |
|
639 | 0 | if (vp8_sem_init(&pbi->h_event_end_decoding, 0, 0)) { |
640 | 0 | vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, |
641 | 0 | "Failed to initialize semaphore"); |
642 | 0 | } |
643 | |
|
644 | 0 | for (ithread = 0; ithread < pbi->decoding_thread_count; ++ithread) { |
645 | 0 | if (vp8_sem_init(&pbi->h_event_start_decoding[ithread], 0, 0)) break; |
646 | | |
647 | 0 | vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd); |
648 | |
|
649 | 0 | pbi->de_thread_data[ithread].ithread = ithread; |
650 | 0 | pbi->de_thread_data[ithread].ptr1 = (void *)pbi; |
651 | 0 | pbi->de_thread_data[ithread].ptr2 = (void *)&pbi->mb_row_di[ithread]; |
652 | |
|
653 | 0 | if (pthread_create(&pbi->h_decoding_thread[ithread], 0, |
654 | 0 | thread_decoding_proc, &pbi->de_thread_data[ithread])) { |
655 | 0 | vp8_sem_destroy(&pbi->h_event_start_decoding[ithread]); |
656 | 0 | break; |
657 | 0 | } |
658 | 0 | } |
659 | |
|
660 | 0 | pbi->allocated_decoding_thread_count = ithread; |
661 | 0 | if (pbi->allocated_decoding_thread_count != |
662 | 0 | (int)pbi->decoding_thread_count) { |
663 | | /* the remainder of cleanup cases will be handled in |
664 | | * vp8_decoder_remove_threads(). */ |
665 | 0 | if (pbi->allocated_decoding_thread_count == 0) { |
666 | 0 | vp8_sem_destroy(&pbi->h_event_end_decoding); |
667 | 0 | } |
668 | 0 | vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, |
669 | 0 | "Failed to create threads"); |
670 | 0 | } |
671 | 0 | } |
672 | 5.20k | } |
673 | | |
674 | 0 | void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) { |
675 | 0 | int i; |
676 | |
|
677 | 0 | vpx_free(pbi->mt_current_mb_col); |
678 | 0 | pbi->mt_current_mb_col = NULL; |
679 | | |
680 | | /* Free above_row buffers. */ |
681 | 0 | if (pbi->mt_yabove_row) { |
682 | 0 | for (i = 0; i < mb_rows; ++i) { |
683 | 0 | vpx_free(pbi->mt_yabove_row[i]); |
684 | 0 | pbi->mt_yabove_row[i] = NULL; |
685 | 0 | } |
686 | 0 | vpx_free(pbi->mt_yabove_row); |
687 | 0 | pbi->mt_yabove_row = NULL; |
688 | 0 | } |
689 | |
|
690 | 0 | if (pbi->mt_uabove_row) { |
691 | 0 | for (i = 0; i < mb_rows; ++i) { |
692 | 0 | vpx_free(pbi->mt_uabove_row[i]); |
693 | 0 | pbi->mt_uabove_row[i] = NULL; |
694 | 0 | } |
695 | 0 | vpx_free(pbi->mt_uabove_row); |
696 | 0 | pbi->mt_uabove_row = NULL; |
697 | 0 | } |
698 | |
|
699 | 0 | if (pbi->mt_vabove_row) { |
700 | 0 | for (i = 0; i < mb_rows; ++i) { |
701 | 0 | vpx_free(pbi->mt_vabove_row[i]); |
702 | 0 | pbi->mt_vabove_row[i] = NULL; |
703 | 0 | } |
704 | 0 | vpx_free(pbi->mt_vabove_row); |
705 | 0 | pbi->mt_vabove_row = NULL; |
706 | 0 | } |
707 | | |
708 | | /* Free left_col buffers. */ |
709 | 0 | if (pbi->mt_yleft_col) { |
710 | 0 | for (i = 0; i < mb_rows; ++i) { |
711 | 0 | vpx_free(pbi->mt_yleft_col[i]); |
712 | 0 | pbi->mt_yleft_col[i] = NULL; |
713 | 0 | } |
714 | 0 | vpx_free(pbi->mt_yleft_col); |
715 | 0 | pbi->mt_yleft_col = NULL; |
716 | 0 | } |
717 | |
|
718 | 0 | if (pbi->mt_uleft_col) { |
719 | 0 | for (i = 0; i < mb_rows; ++i) { |
720 | 0 | vpx_free(pbi->mt_uleft_col[i]); |
721 | 0 | pbi->mt_uleft_col[i] = NULL; |
722 | 0 | } |
723 | 0 | vpx_free(pbi->mt_uleft_col); |
724 | 0 | pbi->mt_uleft_col = NULL; |
725 | 0 | } |
726 | |
|
727 | 0 | if (pbi->mt_vleft_col) { |
728 | 0 | for (i = 0; i < mb_rows; ++i) { |
729 | 0 | vpx_free(pbi->mt_vleft_col[i]); |
730 | 0 | pbi->mt_vleft_col[i] = NULL; |
731 | 0 | } |
732 | 0 | vpx_free(pbi->mt_vleft_col); |
733 | 0 | pbi->mt_vleft_col = NULL; |
734 | 0 | } |
735 | 0 | } |
736 | | |
737 | 0 | void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) { |
738 | 0 | VP8_COMMON *const pc = &pbi->common; |
739 | 0 | int i; |
740 | 0 | int uv_width; |
741 | |
|
742 | 0 | if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) { |
743 | 0 | vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows); |
744 | | |
745 | | /* our internal buffers are always multiples of 16 */ |
746 | 0 | if ((width & 0xf) != 0) width += 16 - (width & 0xf); |
747 | |
|
748 | 0 | if (width < 640) { |
749 | 0 | pbi->sync_range = 1; |
750 | 0 | } else if (width <= 1280) { |
751 | 0 | pbi->sync_range = 8; |
752 | 0 | } else if (width <= 2560) { |
753 | 0 | pbi->sync_range = 16; |
754 | 0 | } else { |
755 | 0 | pbi->sync_range = 32; |
756 | 0 | } |
757 | |
|
758 | 0 | uv_width = width >> 1; |
759 | | |
760 | | /* Allocate a vpx_atomic_int for each mb row. */ |
761 | 0 | CHECK_MEM_ERROR(&pc->error, pbi->mt_current_mb_col, |
762 | 0 | vpx_malloc(sizeof(*pbi->mt_current_mb_col) * pc->mb_rows)); |
763 | 0 | for (i = 0; i < pc->mb_rows; ++i) |
764 | 0 | vpx_atomic_init(&pbi->mt_current_mb_col[i], 0); |
765 | | |
766 | | /* Allocate memory for above_row buffers. */ |
767 | 0 | CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows); |
768 | 0 | for (i = 0; i < pc->mb_rows; ++i) { |
769 | 0 | CHECK_MEM_ERROR(&pc->error, pbi->mt_yabove_row[i], |
770 | 0 | vpx_memalign(16, sizeof(unsigned char) * |
771 | 0 | (width + (VP8BORDERINPIXELS << 1)))); |
772 | 0 | vp8_zero_array(pbi->mt_yabove_row[i], width + (VP8BORDERINPIXELS << 1)); |
773 | 0 | } |
774 | |
|
775 | 0 | CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows); |
776 | 0 | for (i = 0; i < pc->mb_rows; ++i) { |
777 | 0 | CHECK_MEM_ERROR(&pc->error, pbi->mt_uabove_row[i], |
778 | 0 | vpx_memalign(16, sizeof(unsigned char) * |
779 | 0 | (uv_width + VP8BORDERINPIXELS))); |
780 | 0 | vp8_zero_array(pbi->mt_uabove_row[i], uv_width + VP8BORDERINPIXELS); |
781 | 0 | } |
782 | |
|
783 | 0 | CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows); |
784 | 0 | for (i = 0; i < pc->mb_rows; ++i) { |
785 | 0 | CHECK_MEM_ERROR(&pc->error, pbi->mt_vabove_row[i], |
786 | 0 | vpx_memalign(16, sizeof(unsigned char) * |
787 | 0 | (uv_width + VP8BORDERINPIXELS))); |
788 | 0 | vp8_zero_array(pbi->mt_vabove_row[i], uv_width + VP8BORDERINPIXELS); |
789 | 0 | } |
790 | | |
791 | | /* Allocate memory for left_col buffers. */ |
792 | 0 | CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows); |
793 | 0 | for (i = 0; i < pc->mb_rows; ++i) |
794 | 0 | CHECK_MEM_ERROR(&pc->error, pbi->mt_yleft_col[i], |
795 | 0 | vpx_calloc(sizeof(unsigned char) * 16, 1)); |
796 | |
|
797 | 0 | CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows); |
798 | 0 | for (i = 0; i < pc->mb_rows; ++i) |
799 | 0 | CHECK_MEM_ERROR(&pc->error, pbi->mt_uleft_col[i], |
800 | 0 | vpx_calloc(sizeof(unsigned char) * 8, 1)); |
801 | |
|
802 | 0 | CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows); |
803 | 0 | for (i = 0; i < pc->mb_rows; ++i) |
804 | 0 | CHECK_MEM_ERROR(&pc->error, pbi->mt_vleft_col[i], |
805 | 0 | vpx_calloc(sizeof(unsigned char) * 8, 1)); |
806 | 0 | } |
807 | 0 | } |
808 | | |
809 | 5.20k | void vp8_decoder_remove_threads(VP8D_COMP *pbi) { |
810 | | /* shutdown MB Decoding thread; */ |
811 | 5.20k | if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) { |
812 | 0 | int i; |
813 | 0 | vpx_atomic_store_release(&pbi->b_multithreaded_rd, 0); |
814 | | |
815 | | /* allow all threads to exit */ |
816 | 0 | for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) { |
817 | 0 | vp8_sem_post(&pbi->h_event_start_decoding[i]); |
818 | 0 | pthread_join(pbi->h_decoding_thread[i], NULL); |
819 | 0 | } |
820 | |
|
821 | 0 | for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) { |
822 | 0 | vp8_sem_destroy(&pbi->h_event_start_decoding[i]); |
823 | 0 | } |
824 | |
|
825 | 0 | if (pbi->allocated_decoding_thread_count) { |
826 | 0 | vp8_sem_destroy(&pbi->h_event_end_decoding); |
827 | 0 | } |
828 | |
|
829 | 0 | vpx_free(pbi->h_decoding_thread); |
830 | 0 | pbi->h_decoding_thread = NULL; |
831 | |
|
832 | 0 | vpx_free(pbi->h_event_start_decoding); |
833 | 0 | pbi->h_event_start_decoding = NULL; |
834 | |
|
835 | 0 | vpx_free(pbi->mb_row_di); |
836 | 0 | pbi->mb_row_di = NULL; |
837 | |
|
838 | 0 | vpx_free(pbi->de_thread_data); |
839 | 0 | pbi->de_thread_data = NULL; |
840 | |
|
841 | 0 | vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows); |
842 | 0 | } |
843 | 5.20k | } |
844 | | |
845 | 0 | int vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd) { |
846 | 0 | VP8_COMMON *pc = &pbi->common; |
847 | 0 | unsigned int i; |
848 | 0 | int j; |
849 | |
|
850 | 0 | int filter_level = pc->filter_level; |
851 | 0 | YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; |
852 | |
|
853 | 0 | if (filter_level) { |
854 | | /* Set above_row buffer to 127 for decoding first MB row */ |
855 | 0 | memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS - 1, 127, |
856 | 0 | yv12_fb_new->y_width + 5); |
857 | 0 | memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS >> 1) - 1, 127, |
858 | 0 | (yv12_fb_new->y_width >> 1) + 5); |
859 | 0 | memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS >> 1) - 1, 127, |
860 | 0 | (yv12_fb_new->y_width >> 1) + 5); |
861 | |
|
862 | 0 | for (j = 1; j < pc->mb_rows; ++j) { |
863 | 0 | memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS - 1, (unsigned char)129, |
864 | 0 | 1); |
865 | 0 | memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS >> 1) - 1, |
866 | 0 | (unsigned char)129, 1); |
867 | 0 | memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS >> 1) - 1, |
868 | 0 | (unsigned char)129, 1); |
869 | 0 | } |
870 | | |
871 | | /* Set left_col to 129 initially */ |
872 | 0 | for (j = 0; j < pc->mb_rows; ++j) { |
873 | 0 | memset(pbi->mt_yleft_col[j], (unsigned char)129, 16); |
874 | 0 | memset(pbi->mt_uleft_col[j], (unsigned char)129, 8); |
875 | 0 | memset(pbi->mt_vleft_col[j], (unsigned char)129, 8); |
876 | 0 | } |
877 | | |
878 | | /* Initialize the loop filter for this frame. */ |
879 | 0 | vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level); |
880 | 0 | } else { |
881 | 0 | vp8_setup_intra_recon_top_line(yv12_fb_new); |
882 | 0 | } |
883 | |
|
884 | 0 | setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, |
885 | 0 | pbi->decoding_thread_count); |
886 | |
|
887 | 0 | for (i = 0; i < pbi->decoding_thread_count; ++i) { |
888 | 0 | vp8_sem_post(&pbi->h_event_start_decoding[i]); |
889 | 0 | } |
890 | |
|
891 | 0 | if (setjmp(xd->error_info.jmp)) { |
892 | 0 | xd->error_info.setjmp = 0; |
893 | 0 | xd->corrupted = 1; |
894 | | // Wait for other threads to finish. This prevents other threads decoding |
895 | | // the current frame while the main thread starts decoding the next frame, |
896 | | // which causes a data race. |
897 | 0 | for (i = 0; i < pbi->decoding_thread_count; ++i) |
898 | 0 | vp8_sem_wait(&pbi->h_event_end_decoding); |
899 | 0 | return -1; |
900 | 0 | } |
901 | | |
902 | 0 | xd->error_info.setjmp = 1; |
903 | 0 | mt_decode_mb_rows(pbi, xd, 0); |
904 | 0 | xd->error_info.setjmp = 0; |
905 | |
|
906 | 0 | for (i = 0; i < pbi->decoding_thread_count + 1; ++i) |
907 | 0 | vp8_sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ |
908 | |
|
909 | 0 | return 0; |
910 | 0 | } |