Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/media/libvpx/libvpx/vp8/decoder/threading.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "vpx_config.h"
12
#include "vp8_rtcd.h"
13
#if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
14
#include <unistd.h>
15
#endif
16
#include "onyxd_int.h"
17
#include "vpx_mem/vpx_mem.h"
18
#include "vp8/common/threading.h"
19
20
#include "vp8/common/loopfilter.h"
21
#include "vp8/common/extend.h"
22
#include "vpx_ports/vpx_timer.h"
23
#include "decoderthreading.h"
24
#include "detokenize.h"
25
#include "vp8/common/reconintra4x4.h"
26
#include "vp8/common/reconinter.h"
27
#include "vp8/common/reconintra.h"
28
#include "vp8/common/setupintrarecon.h"
29
#if CONFIG_ERROR_CONCEALMENT
30
#include "error_concealment.h"
31
#endif
32
33
0
#define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n)))
34
#define CALLOC_ARRAY_ALIGNED(p, n, algn)                            \
35
0
  do {                                                              \
36
0
    CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \
37
0
    memset((p), 0, (n) * sizeof(*(p)));                             \
38
0
  } while (0)
39
40
static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd,
41
0
                                       MB_ROW_DEC *mbrd, int count) {
42
0
  VP8_COMMON *const pc = &pbi->common;
43
0
  int i;
44
0
45
0
  for (i = 0; i < count; ++i) {
46
0
    MACROBLOCKD *mbd = &mbrd[i].mbd;
47
0
    mbd->subpixel_predict = xd->subpixel_predict;
48
0
    mbd->subpixel_predict8x4 = xd->subpixel_predict8x4;
49
0
    mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
50
0
    mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
51
0
52
0
    mbd->frame_type = pc->frame_type;
53
0
    mbd->pre = xd->pre;
54
0
    mbd->dst = xd->dst;
55
0
56
0
    mbd->segmentation_enabled = xd->segmentation_enabled;
57
0
    mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
58
0
    memcpy(mbd->segment_feature_data, xd->segment_feature_data,
59
0
           sizeof(xd->segment_feature_data));
60
0
61
0
    /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
62
0
    memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
63
0
    /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
64
0
    memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
65
0
    /*unsigned char mode_ref_lf_delta_enabled;
66
0
    unsigned char mode_ref_lf_delta_update;*/
67
0
    mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled;
68
0
    mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update;
69
0
70
0
    mbd->current_bc = &pbi->mbc[0];
71
0
72
0
    memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
73
0
    memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
74
0
    memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
75
0
    memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
76
0
77
0
    mbd->fullpixel_mask = 0xffffffff;
78
0
79
0
    if (pc->full_pixel) mbd->fullpixel_mask = 0xfffffff8;
80
0
  }
81
0
82
0
  for (i = 0; i < pc->mb_rows; ++i)
83
0
    vpx_atomic_store_release(&pbi->mt_current_mb_col[i], -1);
84
0
}
85
86
static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
87
0
                                 unsigned int mb_idx) {
88
0
  MB_PREDICTION_MODE mode;
89
0
  int i;
90
#if CONFIG_ERROR_CONCEALMENT
91
  int corruption_detected = 0;
92
#else
93
  (void)mb_idx;
94
0
#endif
95
0
96
0
  if (xd->mode_info_context->mbmi.mb_skip_coeff) {
97
0
    vp8_reset_mb_tokens_context(xd);
98
0
  } else if (!vp8dx_bool_error(xd->current_bc)) {
99
0
    int eobtotal;
100
0
    eobtotal = vp8_decode_mb_tokens(pbi, xd);
101
0
102
0
    /* Special case:  Force the loopfilter to skip when eobtotal is zero */
103
0
    xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal == 0);
104
0
  }
105
0
106
0
  mode = xd->mode_info_context->mbmi.mode;
107
0
108
0
  if (xd->segmentation_enabled) vp8_mb_init_dequantizer(pbi, xd);
109
0
110
#if CONFIG_ERROR_CONCEALMENT
111
112
  if (pbi->ec_active) {
113
    int throw_residual;
114
    /* When we have independent partitions we can apply residual even
115
     * though other partitions within the frame are corrupt.
116
     */
117
    throw_residual =
118
        (!pbi->independent_partitions && pbi->frame_corrupt_residual);
119
    throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc));
120
121
    if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) {
122
      /* MB with corrupt residuals or corrupt mode/motion vectors.
123
       * Better to use the predictor as reconstruction.
124
       */
125
      pbi->frame_corrupt_residual = 1;
126
      memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
127
128
      corruption_detected = 1;
129
130
      /* force idct to be skipped for B_PRED and use the
131
       * prediction only for reconstruction
132
       * */
133
      memset(xd->eobs, 0, 25);
134
    }
135
  }
136
#endif
137
138
0
  /* do prediction */
139
0
  if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
140
0
    vp8_build_intra_predictors_mbuv_s(
141
0
        xd, xd->recon_above[1], xd->recon_above[2], xd->recon_left[1],
142
0
        xd->recon_left[2], xd->recon_left_stride[1], xd->dst.u_buffer,
143
0
        xd->dst.v_buffer, xd->dst.uv_stride);
144
0
145
0
    if (mode != B_PRED) {
146
0
      vp8_build_intra_predictors_mby_s(
147
0
          xd, xd->recon_above[0], xd->recon_left[0], xd->recon_left_stride[0],
148
0
          xd->dst.y_buffer, xd->dst.y_stride);
149
0
    } else {
150
0
      short *DQC = xd->dequant_y1;
151
0
      int dst_stride = xd->dst.y_stride;
152
0
153
0
      /* clear out residual eob info */
154
0
      if (xd->mode_info_context->mbmi.mb_skip_coeff) memset(xd->eobs, 0, 25);
155
0
156
0
      intra_prediction_down_copy(xd, xd->recon_above[0] + 16);
157
0
158
0
      for (i = 0; i < 16; ++i) {
159
0
        BLOCKD *b = &xd->block[i];
160
0
        unsigned char *dst = xd->dst.y_buffer + b->offset;
161
0
        B_PREDICTION_MODE b_mode = xd->mode_info_context->bmi[i].as_mode;
162
0
        unsigned char *Above;
163
0
        unsigned char *yleft;
164
0
        int left_stride;
165
0
        unsigned char top_left;
166
0
167
0
        /*Caution: For some b_mode, it needs 8 pixels (4 above + 4
168
0
         * above-right).*/
169
0
        if (i < 4 && pbi->common.filter_level) {
170
0
          Above = xd->recon_above[0] + b->offset;
171
0
        } else {
172
0
          Above = dst - dst_stride;
173
0
        }
174
0
175
0
        if (i % 4 == 0 && pbi->common.filter_level) {
176
0
          yleft = xd->recon_left[0] + i;
177
0
          left_stride = 1;
178
0
        } else {
179
0
          yleft = dst - 1;
180
0
          left_stride = dst_stride;
181
0
        }
182
0
183
0
        if ((i == 4 || i == 8 || i == 12) && pbi->common.filter_level) {
184
0
          top_left = *(xd->recon_left[0] + i - 1);
185
0
        } else {
186
0
          top_left = Above[-1];
187
0
        }
188
0
189
0
        vp8_intra4x4_predict(Above, yleft, left_stride, b_mode, dst, dst_stride,
190
0
                             top_left);
191
0
192
0
        if (xd->eobs[i]) {
193
0
          if (xd->eobs[i] > 1) {
194
0
            vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride);
195
0
          } else {
196
0
            vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], dst, dst_stride, dst,
197
0
                                 dst_stride);
198
0
            memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
199
0
          }
200
0
        }
201
0
      }
202
0
    }
203
0
  } else {
204
0
    vp8_build_inter_predictors_mb(xd);
205
0
  }
206
0
207
#if CONFIG_ERROR_CONCEALMENT
208
  if (corruption_detected) {
209
    return;
210
  }
211
#endif
212
213
0
  if (!xd->mode_info_context->mbmi.mb_skip_coeff) {
214
0
    /* dequantization and idct */
215
0
    if (mode != B_PRED) {
216
0
      short *DQC = xd->dequant_y1;
217
0
218
0
      if (mode != SPLITMV) {
219
0
        BLOCKD *b = &xd->block[24];
220
0
221
0
        /* do 2nd order transform on the dc block */
222
0
        if (xd->eobs[24] > 1) {
223
0
          vp8_dequantize_b(b, xd->dequant_y2);
224
0
225
0
          vp8_short_inv_walsh4x4(&b->dqcoeff[0], xd->qcoeff);
226
0
          memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
227
0
        } else {
228
0
          b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
229
0
          vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], xd->qcoeff);
230
0
          memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
231
0
        }
232
0
233
0
        /* override the dc dequant constant in order to preserve the
234
0
         * dc components
235
0
         */
236
0
        DQC = xd->dequant_y1_dc;
237
0
      }
238
0
239
0
      vp8_dequant_idct_add_y_block(xd->qcoeff, DQC, xd->dst.y_buffer,
240
0
                                   xd->dst.y_stride, xd->eobs);
241
0
    }
242
0
243
0
    vp8_dequant_idct_add_uv_block(xd->qcoeff + 16 * 16, xd->dequant_uv,
244
0
                                  xd->dst.u_buffer, xd->dst.v_buffer,
245
0
                                  xd->dst.uv_stride, xd->eobs + 16);
246
0
  }
247
0
}
248
249
static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
250
0
                              int start_mb_row) {
251
0
  const vpx_atomic_int *last_row_current_mb_col;
252
0
  vpx_atomic_int *current_mb_col;
253
0
  int mb_row;
254
0
  VP8_COMMON *pc = &pbi->common;
255
0
  const int nsync = pbi->sync_range;
256
0
  const vpx_atomic_int first_row_no_sync_above =
257
0
      VPX_ATOMIC_INIT(pc->mb_cols + nsync);
258
0
  int num_part = 1 << pbi->common.multi_token_partition;
259
0
  int last_mb_row = start_mb_row;
260
0
261
0
  YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
262
0
  YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME];
263
0
264
0
  int recon_y_stride = yv12_fb_new->y_stride;
265
0
  int recon_uv_stride = yv12_fb_new->uv_stride;
266
0
267
0
  unsigned char *ref_buffer[MAX_REF_FRAMES][3];
268
0
  unsigned char *dst_buffer[3];
269
0
  int i;
270
0
  int ref_fb_corrupted[MAX_REF_FRAMES];
271
0
272
0
  ref_fb_corrupted[INTRA_FRAME] = 0;
273
0
274
0
  for (i = 1; i < MAX_REF_FRAMES; ++i) {
275
0
    YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i];
276
0
277
0
    ref_buffer[i][0] = this_fb->y_buffer;
278
0
    ref_buffer[i][1] = this_fb->u_buffer;
279
0
    ref_buffer[i][2] = this_fb->v_buffer;
280
0
281
0
    ref_fb_corrupted[i] = this_fb->corrupted;
282
0
  }
283
0
284
0
  dst_buffer[0] = yv12_fb_new->y_buffer;
285
0
  dst_buffer[1] = yv12_fb_new->u_buffer;
286
0
  dst_buffer[2] = yv12_fb_new->v_buffer;
287
0
288
0
  xd->up_available = (start_mb_row != 0);
289
0
290
0
  xd->mode_info_context = pc->mi + pc->mode_info_stride * start_mb_row;
291
0
  xd->mode_info_stride = pc->mode_info_stride;
292
0
293
0
  for (mb_row = start_mb_row; mb_row < pc->mb_rows;
294
0
       mb_row += (pbi->decoding_thread_count + 1)) {
295
0
    int recon_yoffset, recon_uvoffset;
296
0
    int mb_col;
297
0
    int filter_level;
298
0
    loop_filter_info_n *lfi_n = &pc->lf_info;
299
0
300
0
    /* save last row processed by this thread */
301
0
    last_mb_row = mb_row;
302
0
    /* select bool coder for current partition */
303
0
    xd->current_bc = &pbi->mbc[mb_row % num_part];
304
0
305
0
    if (mb_row > 0) {
306
0
      last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row - 1];
307
0
    } else {
308
0
      last_row_current_mb_col = &first_row_no_sync_above;
309
0
    }
310
0
311
0
    current_mb_col = &pbi->mt_current_mb_col[mb_row];
312
0
313
0
    recon_yoffset = mb_row * recon_y_stride * 16;
314
0
    recon_uvoffset = mb_row * recon_uv_stride * 8;
315
0
316
0
    /* reset contexts */
317
0
    xd->above_context = pc->above_context;
318
0
    memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
319
0
320
0
    xd->left_available = 0;
321
0
322
0
    xd->mb_to_top_edge = -((mb_row * 16) << 3);
323
0
    xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
324
0
325
0
    if (pbi->common.filter_level) {
326
0
      xd->recon_above[0] = pbi->mt_yabove_row[mb_row] + 0 * 16 + 32;
327
0
      xd->recon_above[1] = pbi->mt_uabove_row[mb_row] + 0 * 8 + 16;
328
0
      xd->recon_above[2] = pbi->mt_vabove_row[mb_row] + 0 * 8 + 16;
329
0
330
0
      xd->recon_left[0] = pbi->mt_yleft_col[mb_row];
331
0
      xd->recon_left[1] = pbi->mt_uleft_col[mb_row];
332
0
      xd->recon_left[2] = pbi->mt_vleft_col[mb_row];
333
0
334
0
      /* TODO: move to outside row loop */
335
0
      xd->recon_left_stride[0] = 1;
336
0
      xd->recon_left_stride[1] = 1;
337
0
    } else {
338
0
      xd->recon_above[0] = dst_buffer[0] + recon_yoffset;
339
0
      xd->recon_above[1] = dst_buffer[1] + recon_uvoffset;
340
0
      xd->recon_above[2] = dst_buffer[2] + recon_uvoffset;
341
0
342
0
      xd->recon_left[0] = xd->recon_above[0] - 1;
343
0
      xd->recon_left[1] = xd->recon_above[1] - 1;
344
0
      xd->recon_left[2] = xd->recon_above[2] - 1;
345
0
346
0
      xd->recon_above[0] -= xd->dst.y_stride;
347
0
      xd->recon_above[1] -= xd->dst.uv_stride;
348
0
      xd->recon_above[2] -= xd->dst.uv_stride;
349
0
350
0
      /* TODO: move to outside row loop */
351
0
      xd->recon_left_stride[0] = xd->dst.y_stride;
352
0
      xd->recon_left_stride[1] = xd->dst.uv_stride;
353
0
354
0
      setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1],
355
0
                             xd->recon_left[2], xd->dst.y_stride,
356
0
                             xd->dst.uv_stride);
357
0
    }
358
0
359
0
    for (mb_col = 0; mb_col < pc->mb_cols; ++mb_col) {
360
0
      if (((mb_col - 1) % nsync) == 0) {
361
0
        vpx_atomic_store_release(current_mb_col, mb_col - 1);
362
0
      }
363
0
364
0
      if (mb_row && !(mb_col & (nsync - 1))) {
365
0
        vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync);
366
0
      }
367
0
368
0
      /* Distance of MB to the various image edges.
369
0
       * These are specified to 8th pel as they are always
370
0
       * compared to values that are in 1/8th pel units.
371
0
       */
372
0
      xd->mb_to_left_edge = -((mb_col * 16) << 3);
373
0
      xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
374
0
375
#if CONFIG_ERROR_CONCEALMENT
376
      {
377
        int corrupt_residual =
378
            (!pbi->independent_partitions && pbi->frame_corrupt_residual) ||
379
            vp8dx_bool_error(xd->current_bc);
380
        if (pbi->ec_active &&
381
            (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
382
            corrupt_residual) {
383
          /* We have an intra block with corrupt
384
           * coefficients, better to conceal with an inter
385
           * block.
386
           * Interpolate MVs from neighboring MBs
387
           *
388
           * Note that for the first mb with corrupt
389
           * residual in a frame, we might not discover
390
           * that before decoding the residual. That
391
           * happens after this check, and therefore no
392
           * inter concealment will be done.
393
           */
394
          vp8_interpolate_motion(xd, mb_row, mb_col, pc->mb_rows, pc->mb_cols);
395
        }
396
      }
397
#endif
398
399
0
      xd->dst.y_buffer = dst_buffer[0] + recon_yoffset;
400
0
      xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset;
401
0
      xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset;
402
0
403
0
      xd->pre.y_buffer =
404
0
          ref_buffer[xd->mode_info_context->mbmi.ref_frame][0] + recon_yoffset;
405
0
      xd->pre.u_buffer =
406
0
          ref_buffer[xd->mode_info_context->mbmi.ref_frame][1] + recon_uvoffset;
407
0
      xd->pre.v_buffer =
408
0
          ref_buffer[xd->mode_info_context->mbmi.ref_frame][2] + recon_uvoffset;
409
0
410
0
      /* propagate errors from reference frames */
411
0
      xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame];
412
0
413
0
      mt_decode_macroblock(pbi, xd, 0);
414
0
415
0
      xd->left_available = 1;
416
0
417
0
      /* check if the boolean decoder has suffered an error */
418
0
      xd->corrupted |= vp8dx_bool_error(xd->current_bc);
419
0
420
0
      xd->recon_above[0] += 16;
421
0
      xd->recon_above[1] += 8;
422
0
      xd->recon_above[2] += 8;
423
0
424
0
      if (!pbi->common.filter_level) {
425
0
        xd->recon_left[0] += 16;
426
0
        xd->recon_left[1] += 8;
427
0
        xd->recon_left[2] += 8;
428
0
      }
429
0
430
0
      if (pbi->common.filter_level) {
431
0
        int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
432
0
                       xd->mode_info_context->mbmi.mode != SPLITMV &&
433
0
                       xd->mode_info_context->mbmi.mb_skip_coeff);
434
0
435
0
        const int mode_index =
436
0
            lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode];
437
0
        const int seg = xd->mode_info_context->mbmi.segment_id;
438
0
        const int ref_frame = xd->mode_info_context->mbmi.ref_frame;
439
0
440
0
        filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
441
0
442
0
        if (mb_row != pc->mb_rows - 1) {
443
0
          /* Save decoded MB last row data for next-row decoding */
444
0
          memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col * 16),
445
0
                 (xd->dst.y_buffer + 15 * recon_y_stride), 16);
446
0
          memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col * 8),
447
0
                 (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
448
0
          memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col * 8),
449
0
                 (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
450
0
        }
451
0
452
0
        /* save left_col for next MB decoding */
453
0
        if (mb_col != pc->mb_cols - 1) {
454
0
          MODE_INFO *next = xd->mode_info_context + 1;
455
0
456
0
          if (next->mbmi.ref_frame == INTRA_FRAME) {
457
0
            for (i = 0; i < 16; ++i) {
458
0
              pbi->mt_yleft_col[mb_row][i] =
459
0
                  xd->dst.y_buffer[i * recon_y_stride + 15];
460
0
            }
461
0
            for (i = 0; i < 8; ++i) {
462
0
              pbi->mt_uleft_col[mb_row][i] =
463
0
                  xd->dst.u_buffer[i * recon_uv_stride + 7];
464
0
              pbi->mt_vleft_col[mb_row][i] =
465
0
                  xd->dst.v_buffer[i * recon_uv_stride + 7];
466
0
            }
467
0
          }
468
0
        }
469
0
470
0
        /* loopfilter on this macroblock. */
471
0
        if (filter_level) {
472
0
          if (pc->filter_type == NORMAL_LOOPFILTER) {
473
0
            loop_filter_info lfi;
474
0
            FRAME_TYPE frame_type = pc->frame_type;
475
0
            const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
476
0
            lfi.mblim = lfi_n->mblim[filter_level];
477
0
            lfi.blim = lfi_n->blim[filter_level];
478
0
            lfi.lim = lfi_n->lim[filter_level];
479
0
            lfi.hev_thr = lfi_n->hev_thr[hev_index];
480
0
481
0
            if (mb_col > 0)
482
0
              vp8_loop_filter_mbv(xd->dst.y_buffer, xd->dst.u_buffer,
483
0
                                  xd->dst.v_buffer, recon_y_stride,
484
0
                                  recon_uv_stride, &lfi);
485
0
486
0
            if (!skip_lf)
487
0
              vp8_loop_filter_bv(xd->dst.y_buffer, xd->dst.u_buffer,
488
0
                                 xd->dst.v_buffer, recon_y_stride,
489
0
                                 recon_uv_stride, &lfi);
490
0
491
0
            /* don't apply across umv border */
492
0
            if (mb_row > 0)
493
0
              vp8_loop_filter_mbh(xd->dst.y_buffer, xd->dst.u_buffer,
494
0
                                  xd->dst.v_buffer, recon_y_stride,
495
0
                                  recon_uv_stride, &lfi);
496
0
497
0
            if (!skip_lf)
498
0
              vp8_loop_filter_bh(xd->dst.y_buffer, xd->dst.u_buffer,
499
0
                                 xd->dst.v_buffer, recon_y_stride,
500
0
                                 recon_uv_stride, &lfi);
501
0
          } else {
502
0
            if (mb_col > 0)
503
0
              vp8_loop_filter_simple_mbv(xd->dst.y_buffer, recon_y_stride,
504
0
                                         lfi_n->mblim[filter_level]);
505
0
506
0
            if (!skip_lf)
507
0
              vp8_loop_filter_simple_bv(xd->dst.y_buffer, recon_y_stride,
508
0
                                        lfi_n->blim[filter_level]);
509
0
510
0
            /* don't apply across umv border */
511
0
            if (mb_row > 0)
512
0
              vp8_loop_filter_simple_mbh(xd->dst.y_buffer, recon_y_stride,
513
0
                                         lfi_n->mblim[filter_level]);
514
0
515
0
            if (!skip_lf)
516
0
              vp8_loop_filter_simple_bh(xd->dst.y_buffer, recon_y_stride,
517
0
                                        lfi_n->blim[filter_level]);
518
0
          }
519
0
        }
520
0
      }
521
0
522
0
      recon_yoffset += 16;
523
0
      recon_uvoffset += 8;
524
0
525
0
      ++xd->mode_info_context; /* next mb */
526
0
527
0
      xd->above_context++;
528
0
    }
529
0
530
0
    /* adjust to the next row of mbs */
531
0
    if (pbi->common.filter_level) {
532
0
      if (mb_row != pc->mb_rows - 1) {
533
0
        int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS;
534
0
        int lastuv = (yv12_fb_lst->y_width >> 1) + (VP8BORDERINPIXELS >> 1);
535
0
536
0
        for (i = 0; i < 4; ++i) {
537
0
          pbi->mt_yabove_row[mb_row + 1][lasty + i] =
538
0
              pbi->mt_yabove_row[mb_row + 1][lasty - 1];
539
0
          pbi->mt_uabove_row[mb_row + 1][lastuv + i] =
540
0
              pbi->mt_uabove_row[mb_row + 1][lastuv - 1];
541
0
          pbi->mt_vabove_row[mb_row + 1][lastuv + i] =
542
0
              pbi->mt_vabove_row[mb_row + 1][lastuv - 1];
543
0
        }
544
0
      }
545
0
    } else {
546
0
      vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16,
547
0
                        xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
548
0
    }
549
0
550
0
    /* last MB of row is ready just after extension is done */
551
0
    vpx_atomic_store_release(current_mb_col, mb_col + nsync);
552
0
553
0
    ++xd->mode_info_context; /* skip prediction column */
554
0
    xd->up_available = 1;
555
0
556
0
    /* since we have multithread */
557
0
    xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
558
0
  }
559
0
560
0
  /* signal end of frame decoding if this thread processed the last mb_row */
561
0
  if (last_mb_row == (pc->mb_rows - 1)) sem_post(&pbi->h_event_end_decoding);
562
0
}
563
564
0
static THREAD_FUNCTION thread_decoding_proc(void *p_data) {
565
0
  int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
566
0
  VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
567
0
  MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
568
0
  ENTROPY_CONTEXT_PLANES mb_row_left_context;
569
0
570
0
  while (1) {
571
0
    if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) break;
572
0
573
0
    if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) {
574
0
      if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) {
575
0
        break;
576
0
      } else {
577
0
        MACROBLOCKD *xd = &mbrd->mbd;
578
0
        xd->left_context = &mb_row_left_context;
579
0
580
0
        mt_decode_mb_rows(pbi, xd, ithread + 1);
581
0
      }
582
0
    }
583
0
  }
584
0
585
0
  return 0;
586
0
}
587
588
0
void vp8_decoder_create_threads(VP8D_COMP *pbi) {
589
0
  int core_count = 0;
590
0
  unsigned int ithread;
591
0
592
0
  vpx_atomic_init(&pbi->b_multithreaded_rd, 0);
593
0
  pbi->allocated_decoding_thread_count = 0;
594
0
595
0
  /* limit decoding threads to the max number of token partitions */
596
0
  core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
597
0
598
0
  /* limit decoding threads to the available cores */
599
0
  if (core_count > pbi->common.processor_core_count) {
600
0
    core_count = pbi->common.processor_core_count;
601
0
  }
602
0
603
0
  if (core_count > 1) {
604
0
    vpx_atomic_init(&pbi->b_multithreaded_rd, 1);
605
0
    pbi->decoding_thread_count = core_count - 1;
606
0
607
0
    CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count);
608
0
    CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count);
609
0
    CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32);
610
0
    CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count);
611
0
612
0
    if (sem_init(&pbi->h_event_end_decoding, 0, 0)) {
613
0
      vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,
614
0
                         "Failed to initialize semaphore");
615
0
    }
616
0
617
0
    for (ithread = 0; ithread < pbi->decoding_thread_count; ++ithread) {
618
0
      if (sem_init(&pbi->h_event_start_decoding[ithread], 0, 0)) break;
619
0
620
0
      vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd);
621
0
622
0
      pbi->de_thread_data[ithread].ithread = ithread;
623
0
      pbi->de_thread_data[ithread].ptr1 = (void *)pbi;
624
0
      pbi->de_thread_data[ithread].ptr2 = (void *)&pbi->mb_row_di[ithread];
625
0
626
0
      if (pthread_create(&pbi->h_decoding_thread[ithread], 0,
627
0
                         thread_decoding_proc, &pbi->de_thread_data[ithread])) {
628
0
        sem_destroy(&pbi->h_event_start_decoding[ithread]);
629
0
        break;
630
0
      }
631
0
    }
632
0
633
0
    pbi->allocated_decoding_thread_count = ithread;
634
0
    if (pbi->allocated_decoding_thread_count !=
635
0
        (int)pbi->decoding_thread_count) {
636
0
      /* the remainder of cleanup cases will be handled in
637
0
       * vp8_decoder_remove_threads(). */
638
0
      if (pbi->allocated_decoding_thread_count == 0) {
639
0
        sem_destroy(&pbi->h_event_end_decoding);
640
0
      }
641
0
      vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,
642
0
                         "Failed to create threads");
643
0
    }
644
0
  }
645
0
}
646
647
0
void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) {
648
0
  int i;
649
0
650
0
  vpx_free(pbi->mt_current_mb_col);
651
0
  pbi->mt_current_mb_col = NULL;
652
0
653
0
  /* Free above_row buffers. */
654
0
  if (pbi->mt_yabove_row) {
655
0
    for (i = 0; i < mb_rows; ++i) {
656
0
      vpx_free(pbi->mt_yabove_row[i]);
657
0
      pbi->mt_yabove_row[i] = NULL;
658
0
    }
659
0
    vpx_free(pbi->mt_yabove_row);
660
0
    pbi->mt_yabove_row = NULL;
661
0
  }
662
0
663
0
  if (pbi->mt_uabove_row) {
664
0
    for (i = 0; i < mb_rows; ++i) {
665
0
      vpx_free(pbi->mt_uabove_row[i]);
666
0
      pbi->mt_uabove_row[i] = NULL;
667
0
    }
668
0
    vpx_free(pbi->mt_uabove_row);
669
0
    pbi->mt_uabove_row = NULL;
670
0
  }
671
0
672
0
  if (pbi->mt_vabove_row) {
673
0
    for (i = 0; i < mb_rows; ++i) {
674
0
      vpx_free(pbi->mt_vabove_row[i]);
675
0
      pbi->mt_vabove_row[i] = NULL;
676
0
    }
677
0
    vpx_free(pbi->mt_vabove_row);
678
0
    pbi->mt_vabove_row = NULL;
679
0
  }
680
0
681
0
  /* Free left_col buffers. */
682
0
  if (pbi->mt_yleft_col) {
683
0
    for (i = 0; i < mb_rows; ++i) {
684
0
      vpx_free(pbi->mt_yleft_col[i]);
685
0
      pbi->mt_yleft_col[i] = NULL;
686
0
    }
687
0
    vpx_free(pbi->mt_yleft_col);
688
0
    pbi->mt_yleft_col = NULL;
689
0
  }
690
0
691
0
  if (pbi->mt_uleft_col) {
692
0
    for (i = 0; i < mb_rows; ++i) {
693
0
      vpx_free(pbi->mt_uleft_col[i]);
694
0
      pbi->mt_uleft_col[i] = NULL;
695
0
    }
696
0
    vpx_free(pbi->mt_uleft_col);
697
0
    pbi->mt_uleft_col = NULL;
698
0
  }
699
0
700
0
  if (pbi->mt_vleft_col) {
701
0
    for (i = 0; i < mb_rows; ++i) {
702
0
      vpx_free(pbi->mt_vleft_col[i]);
703
0
      pbi->mt_vleft_col[i] = NULL;
704
0
    }
705
0
    vpx_free(pbi->mt_vleft_col);
706
0
    pbi->mt_vleft_col = NULL;
707
0
  }
708
0
}
709
710
0
void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
711
0
  VP8_COMMON *const pc = &pbi->common;
712
0
  int i;
713
0
  int uv_width;
714
0
715
0
  if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) {
716
0
    vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
717
0
718
0
    /* our internal buffers are always multiples of 16 */
719
0
    if ((width & 0xf) != 0) width += 16 - (width & 0xf);
720
0
721
0
    if (width < 640) {
722
0
      pbi->sync_range = 1;
723
0
    } else if (width <= 1280) {
724
0
      pbi->sync_range = 8;
725
0
    } else if (width <= 2560) {
726
0
      pbi->sync_range = 16;
727
0
    } else {
728
0
      pbi->sync_range = 32;
729
0
    }
730
0
731
0
    uv_width = width >> 1;
732
0
733
0
    /* Allocate a vpx_atomic_int for each mb row. */
734
0
    CHECK_MEM_ERROR(pbi->mt_current_mb_col,
735
0
                    vpx_malloc(sizeof(*pbi->mt_current_mb_col) * pc->mb_rows));
736
0
    for (i = 0; i < pc->mb_rows; ++i)
737
0
      vpx_atomic_init(&pbi->mt_current_mb_col[i], 0);
738
0
739
0
    /* Allocate memory for above_row buffers. */
740
0
    CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
741
0
    for (i = 0; i < pc->mb_rows; ++i)
742
0
      CHECK_MEM_ERROR(
743
0
          pbi->mt_yabove_row[i],
744
0
          vpx_memalign(
745
0
              16, sizeof(unsigned char) * (width + (VP8BORDERINPIXELS << 1))));
746
0
747
0
    CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);
748
0
    for (i = 0; i < pc->mb_rows; ++i)
749
0
      CHECK_MEM_ERROR(
750
0
          pbi->mt_uabove_row[i],
751
0
          vpx_memalign(16,
752
0
                       sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
753
0
754
0
    CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);
755
0
    for (i = 0; i < pc->mb_rows; ++i)
756
0
      CHECK_MEM_ERROR(
757
0
          pbi->mt_vabove_row[i],
758
0
          vpx_memalign(16,
759
0
                       sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
760
0
761
0
    /* Allocate memory for left_col buffers. */
762
0
    CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);
763
0
    for (i = 0; i < pc->mb_rows; ++i)
764
0
      CHECK_MEM_ERROR(pbi->mt_yleft_col[i],
765
0
                      vpx_calloc(sizeof(unsigned char) * 16, 1));
766
0
767
0
    CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows);
768
0
    for (i = 0; i < pc->mb_rows; ++i)
769
0
      CHECK_MEM_ERROR(pbi->mt_uleft_col[i],
770
0
                      vpx_calloc(sizeof(unsigned char) * 8, 1));
771
0
772
0
    CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows);
773
0
    for (i = 0; i < pc->mb_rows; ++i)
774
0
      CHECK_MEM_ERROR(pbi->mt_vleft_col[i],
775
0
                      vpx_calloc(sizeof(unsigned char) * 8, 1));
776
0
  }
777
0
}
778
779
0
void vp8_decoder_remove_threads(VP8D_COMP *pbi) {
780
0
  /* shutdown MB Decoding thread; */
781
0
  if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) {
782
0
    int i;
783
0
    vpx_atomic_store_release(&pbi->b_multithreaded_rd, 0);
784
0
785
0
    /* allow all threads to exit */
786
0
    for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) {
787
0
      sem_post(&pbi->h_event_start_decoding[i]);
788
0
      pthread_join(pbi->h_decoding_thread[i], NULL);
789
0
    }
790
0
791
0
    for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) {
792
0
      sem_destroy(&pbi->h_event_start_decoding[i]);
793
0
    }
794
0
795
0
    if (pbi->allocated_decoding_thread_count) {
796
0
      sem_destroy(&pbi->h_event_end_decoding);
797
0
    }
798
0
799
0
    vpx_free(pbi->h_decoding_thread);
800
0
    pbi->h_decoding_thread = NULL;
801
0
802
0
    vpx_free(pbi->h_event_start_decoding);
803
0
    pbi->h_event_start_decoding = NULL;
804
0
805
0
    vpx_free(pbi->mb_row_di);
806
0
    pbi->mb_row_di = NULL;
807
0
808
0
    vpx_free(pbi->de_thread_data);
809
0
    pbi->de_thread_data = NULL;
810
0
811
0
    vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows);
812
0
  }
813
0
}
814
815
0
void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd) {
816
0
  VP8_COMMON *pc = &pbi->common;
817
0
  unsigned int i;
818
0
  int j;
819
0
820
0
  int filter_level = pc->filter_level;
821
0
  YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
822
0
823
0
  if (filter_level) {
824
0
    /* Set above_row buffer to 127 for decoding first MB row */
825
0
    memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS - 1, 127,
826
0
           yv12_fb_new->y_width + 5);
827
0
    memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS >> 1) - 1, 127,
828
0
           (yv12_fb_new->y_width >> 1) + 5);
829
0
    memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS >> 1) - 1, 127,
830
0
           (yv12_fb_new->y_width >> 1) + 5);
831
0
832
0
    for (j = 1; j < pc->mb_rows; ++j) {
833
0
      memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS - 1, (unsigned char)129,
834
0
             1);
835
0
      memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS >> 1) - 1,
836
0
             (unsigned char)129, 1);
837
0
      memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS >> 1) - 1,
838
0
             (unsigned char)129, 1);
839
0
    }
840
0
841
0
    /* Set left_col to 129 initially */
842
0
    for (j = 0; j < pc->mb_rows; ++j) {
843
0
      memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
844
0
      memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
845
0
      memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
846
0
    }
847
0
848
0
    /* Initialize the loop filter for this frame. */
849
0
    vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level);
850
0
  } else {
851
0
    vp8_setup_intra_recon_top_line(yv12_fb_new);
852
0
  }
853
0
854
0
  setup_decoding_thread_data(pbi, xd, pbi->mb_row_di,
855
0
                             pbi->decoding_thread_count);
856
0
857
0
  for (i = 0; i < pbi->decoding_thread_count; ++i) {
858
0
    sem_post(&pbi->h_event_start_decoding[i]);
859
0
  }
860
0
861
0
  mt_decode_mb_rows(pbi, xd, 0);
862
0
863
0
  sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */
864
0
}