Coverage Report

Created: 2026-06-30 07:12

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libwebp/src/dec/frame_dec.c
Line
Count
Source
1
// Copyright 2010 Google Inc. All Rights Reserved.
2
//
3
// Use of this source code is governed by a BSD-style license
4
// that can be found in the COPYING file in the root of the source
5
// tree. An additional intellectual property rights grant can be found
6
// in the file PATENTS. All contributing project authors may
7
// be found in the AUTHORS file in the root of the source tree.
8
// -----------------------------------------------------------------------------
9
//
10
// Frame-reconstruction function. Memory allocation.
11
//
12
// Author: Skal (pascal.massimino@gmail.com)
13
14
#include <assert.h>
15
#include <stdlib.h>
16
#include <string.h>
17
18
#include "src/dec/common_dec.h"
19
#include "src/dec/vp8_dec.h"
20
#include "src/dec/vp8i_dec.h"
21
#include "src/dec/webpi_dec.h"
22
#include "src/dsp/dsp.h"
23
#include "src/utils/random_utils.h"
24
#include "src/utils/thread_utils.h"
25
#include "src/utils/utils.h"
26
#include "src/webp/decode.h"
27
#include "src/webp/types.h"
28
29
WEBP_ASSUME_UNSAFE_INDEXABLE_ABI
30
31
//------------------------------------------------------------------------------
32
// Main reconstruction function.
33
34
static const uint16_t kScan[16] = {
35
    0 + 0 * BPS,  4 + 0 * BPS,  8 + 0 * BPS,  12 + 0 * BPS,
36
    0 + 4 * BPS,  4 + 4 * BPS,  8 + 4 * BPS,  12 + 4 * BPS,
37
    0 + 8 * BPS,  4 + 8 * BPS,  8 + 8 * BPS,  12 + 8 * BPS,
38
    0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS};
39
40
27.1k
static int CheckMode(int mb_x, int mb_y, int mode) {
41
27.1k
  if (mode == B_DC_PRED) {
42
12.6k
    if (mb_x == 0) {
43
3.13k
      return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
44
9.50k
    } else {
45
9.50k
      return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
46
9.50k
    }
47
12.6k
  }
48
14.4k
  return mode;
49
27.1k
}
50
51
462k
static void Copy32b(uint8_t* const dst, const uint8_t* const src) {
52
462k
  WEBP_UNSAFE_MEMCPY(dst, src, 4);
53
462k
}
54
55
static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src,
56
190k
                                    uint8_t* const dst) {
57
190k
  switch (bits >> 30) {
58
15.6k
    case 3:
59
15.6k
      VP8Transform(src, dst, 0);
60
15.6k
      break;
61
10.8k
    case 2:
62
10.8k
      VP8TransformAC3(src, dst);
63
10.8k
      break;
64
41.0k
    case 1:
65
41.0k
      VP8TransformDC(src, dst);
66
41.0k
      break;
67
123k
    default:
68
123k
      break;
69
190k
  }
70
190k
}
71
72
static void DoUVTransform(uint32_t bits, const int16_t* const src,
73
35.1k
                          uint8_t* const dst) {
74
35.1k
  if (bits & 0xff) {             // any non-zero coeff at all?
75
6.61k
    if (bits & 0xaa) {           // any non-zero AC coefficient?
76
3.17k
      VP8TransformUV(src, dst);  // note we don't use the AC3 variant for U/V
77
3.44k
    } else {
78
3.44k
      VP8TransformDCUV(src, dst);
79
3.44k
    }
80
6.61k
  }
81
35.1k
}
82
83
static void ReconstructRow(const VP8Decoder* const dec,
84
4.37k
                           const VP8ThreadContext* ctx) {
85
4.37k
  int j;
86
4.37k
  int mb_x;
87
4.37k
  const int mb_y = ctx->mb_y;
88
4.37k
  const int cache_id = ctx->id;
89
4.37k
  uint8_t* const y_dst = dec->yuv_b + Y_OFF;
90
4.37k
  uint8_t* const u_dst = dec->yuv_b + U_OFF;
91
4.37k
  uint8_t* const v_dst = dec->yuv_b + V_OFF;
92
93
  // Initialize left-most block.
94
74.3k
  for (j = 0; j < 16; ++j) {
95
70.0k
    y_dst[j * BPS - 1] = 129;
96
70.0k
  }
97
39.3k
  for (j = 0; j < 8; ++j) {
98
35.0k
    u_dst[j * BPS - 1] = 129;
99
35.0k
    v_dst[j * BPS - 1] = 129;
100
35.0k
  }
101
102
  // Init top-left sample on left column too.
103
4.37k
  if (mb_y > 0) {
104
3.60k
    y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;
105
3.60k
  } else {
106
    // we only need to do this init once at block (0,0).
107
    // Afterward, it remains valid for the whole topmost row.
108
768
    WEBP_UNSAFE_MEMSET(y_dst - BPS - 1, 127, 16 + 4 + 1);
109
768
    WEBP_UNSAFE_MEMSET(u_dst - BPS - 1, 127, 8 + 1);
110
768
    WEBP_UNSAFE_MEMSET(v_dst - BPS - 1, 127, 8 + 1);
111
768
  }
112
113
  // Reconstruct one row.
114
21.9k
  for (mb_x = 0; mb_x < dec->mb_w; ++mb_x) {
115
17.5k
    const VP8MBData* const block = ctx->mb_data + mb_x;
116
117
    // Rotate in the left samples from previously decoded block. We move four
118
    // pixels at a time for alignment reason, and because of in-loop filter.
119
17.5k
    if (mb_x > 0) {
120
237k
      for (j = -1; j < 16; ++j) {
121
224k
        Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
122
224k
      }
123
132k
      for (j = -1; j < 8; ++j) {
124
118k
        Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);
125
118k
        Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
126
118k
      }
127
13.2k
    }
128
17.5k
    {
129
      // bring top samples into the cache
130
17.5k
      VP8TopSamples* const top_yuv = dec->yuv_t + mb_x;
131
17.5k
      const int16_t* const coeffs = block->coeffs;
132
17.5k
      uint32_t bits = block->non_zero_y;
133
17.5k
      int n;
134
135
17.5k
      if (mb_y > 0) {
136
13.9k
        WEBP_UNSAFE_MEMCPY(y_dst - BPS, top_yuv[0].y, 16);
137
13.9k
        WEBP_UNSAFE_MEMCPY(u_dst - BPS, top_yuv[0].u, 8);
138
13.9k
        WEBP_UNSAFE_MEMCPY(v_dst - BPS, top_yuv[0].v, 8);
139
13.9k
      }
140
141
      // predict and add residuals
142
17.5k
      if (block->is_i4x4) {  // 4x4
143
8.06k
        uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
144
145
8.06k
        if (mb_y > 0) {
146
5.80k
          if (mb_x >= dec->mb_w - 1) {  // on rightmost border
147
1.72k
            WEBP_UNSAFE_MEMSET(top_right, top_yuv[0].y[15], sizeof(*top_right));
148
4.08k
          } else {
149
4.08k
            WEBP_UNSAFE_MEMCPY(top_right, top_yuv[1].y, sizeof(*top_right));
150
4.08k
          }
151
5.80k
        }
152
        // replicate the top-right pixels below
153
8.06k
        top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
154
155
        // predict and add residuals for all 4x4 blocks in turn.
156
137k
        for (n = 0; n < 16; ++n, bits <<= 2) {
157
128k
          uint8_t* const dst = y_dst + kScan[n];
158
128k
          VP8PredLuma4[block->imodes[n]](dst);
159
128k
          DoTransform(bits, coeffs + n * 16, dst);
160
128k
        }
161
9.52k
      } else {  // 16x16
162
9.52k
        const int pred_func = CheckMode(mb_x, mb_y, block->imodes[0]);
163
9.52k
        VP8PredLuma16[pred_func](y_dst);
164
9.52k
        if (bits != 0) {
165
65.4k
          for (n = 0; n < 16; ++n, bits <<= 2) {
166
61.5k
            DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]);
167
61.5k
          }
168
3.84k
        }
169
9.52k
      }
170
17.5k
      {
171
        // Chroma
172
17.5k
        const uint32_t bits_uv = block->non_zero_uv;
173
17.5k
        const int pred_func = CheckMode(mb_x, mb_y, block->uvmode);
174
17.5k
        VP8PredChroma8[pred_func](u_dst);
175
17.5k
        VP8PredChroma8[pred_func](v_dst);
176
17.5k
        DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst);
177
17.5k
        DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst);
178
17.5k
      }
179
180
      // stash away top samples for next block
181
17.5k
      if (mb_y < dec->mb_h - 1) {
182
16.4k
        WEBP_UNSAFE_MEMCPY(top_yuv[0].y, y_dst + 15 * BPS, 16);
183
16.4k
        WEBP_UNSAFE_MEMCPY(top_yuv[0].u, u_dst + 7 * BPS, 8);
184
16.4k
        WEBP_UNSAFE_MEMCPY(top_yuv[0].v, v_dst + 7 * BPS, 8);
185
16.4k
      }
186
17.5k
    }
187
    // Transfer reconstructed samples from yuv_b cache to final destination.
188
17.5k
    {
189
17.5k
      const int y_offset = cache_id * 16 * dec->cache_y_stride;
190
17.5k
      const int uv_offset = cache_id * 8 * dec->cache_uv_stride;
191
17.5k
      uint8_t* const y_out = dec->cache_y + mb_x * 16 + y_offset;
192
17.5k
      uint8_t* const u_out = dec->cache_u + mb_x * 8 + uv_offset;
193
17.5k
      uint8_t* const v_out = dec->cache_v + mb_x * 8 + uv_offset;
194
298k
      for (j = 0; j < 16; ++j) {
195
281k
        WEBP_UNSAFE_MEMCPY(y_out + j * dec->cache_y_stride, y_dst + j * BPS,
196
281k
                           16);
197
281k
      }
198
158k
      for (j = 0; j < 8; ++j) {
199
140k
        WEBP_UNSAFE_MEMCPY(u_out + j * dec->cache_uv_stride, u_dst + j * BPS,
200
140k
                           8);
201
140k
        WEBP_UNSAFE_MEMCPY(v_out + j * dec->cache_uv_stride, v_dst + j * BPS,
202
140k
                           8);
203
140k
      }
204
17.5k
    }
205
17.5k
  }
206
4.37k
}
207
208
//------------------------------------------------------------------------------
209
// Filtering
210
211
// kFilterExtraRows[] = How many extra lines are needed on the MB boundary
212
// for caching, given a filtering level.
213
// Simple filter:  up to 2 luma samples are read and 1 is written.
214
// Complex filter: up to 4 luma samples are read and 3 are written. Same for
215
//                 U/V, so it's 8 samples total (because of the 2x upsampling).
216
static const uint8_t kFilterExtraRows[3] = {0, 2, 8};
217
218
14.6k
static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
219
14.6k
  const VP8ThreadContext* const ctx = &dec->thread_ctx;
220
14.6k
  const int cache_id = ctx->id;
221
14.6k
  const int y_bps = dec->cache_y_stride;
222
14.6k
  const VP8FInfo* const f_info = ctx->f_info + mb_x;
223
14.6k
  uint8_t* const y_dst = dec->cache_y + cache_id * 16 * y_bps + mb_x * 16;
224
14.6k
  const int ilevel = f_info->f_ilevel;
225
14.6k
  const int limit = f_info->f_limit;
226
14.6k
  if (limit == 0) {
227
2.02k
    return;
228
2.02k
  }
229
14.6k
  assert(limit >= 3);
230
12.6k
  if (dec->filter_type == 1) {  // simple
231
7.27k
    if (mb_x > 0) {
232
5.28k
      VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
233
5.28k
    }
234
7.27k
    if (f_info->f_inner) {
235
5.25k
      VP8SimpleHFilter16i(y_dst, y_bps, limit);
236
5.25k
    }
237
7.27k
    if (mb_y > 0) {
238
5.94k
      VP8SimpleVFilter16(y_dst, y_bps, limit + 4);
239
5.94k
    }
240
7.27k
    if (f_info->f_inner) {
241
5.25k
      VP8SimpleVFilter16i(y_dst, y_bps, limit);
242
5.25k
    }
243
7.27k
  } else {  // complex
244
5.38k
    const int uv_bps = dec->cache_uv_stride;
245
5.38k
    uint8_t* const u_dst = dec->cache_u + cache_id * 8 * uv_bps + mb_x * 8;
246
5.38k
    uint8_t* const v_dst = dec->cache_v + cache_id * 8 * uv_bps + mb_x * 8;
247
5.38k
    const int hev_thresh = f_info->hev_thresh;
248
5.38k
    if (mb_x > 0) {
249
4.19k
      VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
250
4.19k
      VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
251
4.19k
    }
252
5.38k
    if (f_info->f_inner) {
253
3.57k
      VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
254
3.57k
      VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
255
3.57k
    }
256
5.38k
    if (mb_y > 0) {
257
4.47k
      VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
258
4.47k
      VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
259
4.47k
    }
260
5.38k
    if (f_info->f_inner) {
261
3.57k
      VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
262
3.57k
      VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
263
3.57k
    }
264
5.38k
  }
265
12.6k
}
266
267
// Filter the decoded macroblock row (if needed)
268
3.52k
static void FilterRow(const VP8Decoder* const dec) {
269
3.52k
  int mb_x;
270
3.52k
  const int mb_y = dec->thread_ctx.mb_y;
271
3.52k
  assert(dec->thread_ctx.filter_row);
272
18.2k
  for (mb_x = dec->tl_mb_x; mb_x < dec->br_mb_x; ++mb_x) {
273
14.6k
    DoFilter(dec, mb_x, mb_y);
274
14.6k
  }
275
3.52k
}
276
277
//------------------------------------------------------------------------------
278
// Precompute the filtering strength for each segment and each i4x4/i16x16 mode.
279
280
876
static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
281
876
  if (dec->filter_type > 0) {
282
637
    int s;
283
637
    const VP8FilterHeader* const hdr = &dec->filter_hdr;
284
3.18k
    for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
285
2.54k
      int i4x4;
286
      // First, compute the initial level
287
2.54k
      int base_level;
288
2.54k
      if (dec->segment_hdr.use_segment) {
289
512
        base_level = dec->segment_hdr.filter_strength[s];
290
512
        if (!dec->segment_hdr.absolute_delta) {
291
140
          base_level += hdr->level;
292
140
        }
293
2.03k
      } else {
294
2.03k
        base_level = hdr->level;
295
2.03k
      }
296
7.64k
      for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
297
5.09k
        VP8FInfo* const info = &dec->fstrengths[s][i4x4];
298
5.09k
        int level = base_level;
299
5.09k
        if (hdr->use_lf_delta) {
300
1.45k
          level += hdr->ref_lf_delta[0];
301
1.45k
          if (i4x4) {
302
728
            level += hdr->mode_lf_delta[0];
303
728
          }
304
1.45k
        }
305
5.09k
        level = (level < 0) ? 0 : (level > 63) ? 63 : level;
306
5.09k
        if (level > 0) {
307
4.32k
          int ilevel = level;
308
4.32k
          if (hdr->sharpness > 0) {
309
1.77k
            if (hdr->sharpness > 4) {
310
866
              ilevel >>= 2;
311
908
            } else {
312
908
              ilevel >>= 1;
313
908
            }
314
1.77k
            if (ilevel > 9 - hdr->sharpness) {
315
1.13k
              ilevel = 9 - hdr->sharpness;
316
1.13k
            }
317
1.77k
          }
318
4.32k
          if (ilevel < 1) ilevel = 1;
319
4.32k
          info->f_ilevel = ilevel;
320
4.32k
          info->f_limit = 2 * level + ilevel;
321
4.32k
          info->hev_thresh = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
322
4.32k
        } else {
323
774
          info->f_limit = 0;  // no filtering
324
774
        }
325
5.09k
        info->f_inner = i4x4;
326
5.09k
      }
327
2.54k
    }
328
637
  }
329
876
}
330
331
//------------------------------------------------------------------------------
332
// Dithering
333
334
// minimal amp that will provide a non-zero dithering effect
335
0
#define MIN_DITHER_AMP 4
336
337
0
#define DITHER_AMP_TAB_SIZE 12
338
static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
339
    // roughly, it's dqm->uv_mat[1]
340
    8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1};
341
342
void VP8InitDithering(const WebPDecoderOptions* const options,
343
876
                      VP8Decoder* const dec) {
344
876
  assert(dec != NULL);
345
876
  if (options != NULL) {
346
876
    const int d = options->dithering_strength;
347
876
    const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1;
348
876
    const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100);
349
876
    if (f > 0) {
350
0
      int s;
351
0
      int all_amp = 0;
352
0
      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
353
0
        VP8QuantMatrix* const dqm = &dec->dqm[s];
354
0
        if (dqm->uv_quant < DITHER_AMP_TAB_SIZE) {
355
0
          const int idx = (dqm->uv_quant < 0) ? 0 : dqm->uv_quant;
356
0
          dqm->dither = (f * kQuantToDitherAmp[idx]) >> 3;
357
0
        }
358
0
        all_amp |= dqm->dither;
359
0
      }
360
0
      if (all_amp != 0) {
361
0
        VP8InitRandom(&dec->dithering_rg, 1.0f);
362
0
        dec->dither = 1;
363
0
      }
364
0
    }
365
    // potentially allow alpha dithering
366
876
    dec->alpha_dithering = options->alpha_dithering_strength;
367
876
    if (dec->alpha_dithering > 100) {
368
0
      dec->alpha_dithering = 100;
369
876
    } else if (dec->alpha_dithering < 0) {
370
0
      dec->alpha_dithering = 0;
371
0
    }
372
876
  }
373
876
}
374
375
// Convert to range: [-2,2] for dither=50, [-4,4] for dither=100
376
0
static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {
377
0
  uint8_t dither[64];
378
0
  int i;
379
0
  for (i = 0; i < 8 * 8; ++i) {
380
0
    dither[i] = VP8RandomBits2(rg, VP8_DITHER_AMP_BITS + 1, amp);
381
0
  }
382
0
  VP8DitherCombine8x8(dither, dst, bps);
383
0
}
384
385
0
static void DitherRow(VP8Decoder* const dec) {
386
0
  int mb_x;
387
0
  assert(dec->dither);
388
0
  for (mb_x = dec->tl_mb_x; mb_x < dec->br_mb_x; ++mb_x) {
389
0
    const VP8ThreadContext* const ctx = &dec->thread_ctx;
390
0
    const VP8MBData* const data = ctx->mb_data + mb_x;
391
0
    const int cache_id = ctx->id;
392
0
    const int uv_bps = dec->cache_uv_stride;
393
0
    if (data->dither >= MIN_DITHER_AMP) {
394
0
      uint8_t* const u_dst = dec->cache_u + cache_id * 8 * uv_bps + mb_x * 8;
395
0
      uint8_t* const v_dst = dec->cache_v + cache_id * 8 * uv_bps + mb_x * 8;
396
0
      Dither8x8(&dec->dithering_rg, u_dst, uv_bps, data->dither);
397
0
      Dither8x8(&dec->dithering_rg, v_dst, uv_bps, data->dither);
398
0
    }
399
0
  }
400
0
}
401
402
//------------------------------------------------------------------------------
403
// This function is called after a row of macroblocks is finished decoding.
404
// It also takes into account the following restrictions:
405
//  * In case of in-loop filtering, we must hold off sending some of the bottom
406
//    pixels as they are yet unfiltered. They will be when the next macroblock
407
//    row is decoded. Meanwhile, we must preserve them by rotating them in the
408
//    cache area. This doesn't hold for the very bottom row of the uncropped
409
//    picture of course.
410
//  * we must clip the remaining pixels against the cropping area. The VP8Io
411
//    struct must have the following fields set correctly before calling put():
412
413
8.75k
#define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16)  // vertical position of a MB
414
415
// Finalize and transmit a complete row. Return false in case of user-abort.
416
4.37k
static int FinishRow(void* arg1, void* arg2) {
417
4.37k
  VP8Decoder* const dec = (VP8Decoder*)arg1;
418
4.37k
  VP8Io* const io = (VP8Io*)arg2;
419
4.37k
  int ok = 1;
420
4.37k
  const VP8ThreadContext* const ctx = &dec->thread_ctx;
421
4.37k
  const int cache_id = ctx->id;
422
4.37k
  const int extra_y_rows = kFilterExtraRows[dec->filter_type];
423
4.37k
  const int ysize = extra_y_rows * dec->cache_y_stride;
424
4.37k
  const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride;
425
4.37k
  const int y_offset = cache_id * 16 * dec->cache_y_stride;
426
4.37k
  const int uv_offset = cache_id * 8 * dec->cache_uv_stride;
427
4.37k
  uint8_t* const ydst = dec->cache_y - ysize + y_offset;
428
4.37k
  uint8_t* const udst = dec->cache_u - uvsize + uv_offset;
429
4.37k
  uint8_t* const vdst = dec->cache_v - uvsize + uv_offset;
430
4.37k
  const int mb_y = ctx->mb_y;
431
4.37k
  const int is_first_row = (mb_y == 0);
432
4.37k
  const int is_last_row = (mb_y >= dec->br_mb_y - 1);
433
434
4.37k
  if (dec->mt_method == 2) {
435
0
    ReconstructRow(dec, ctx);
436
0
  }
437
438
4.37k
  if (ctx->filter_row) {
439
3.52k
    FilterRow(dec);
440
3.52k
  }
441
442
4.37k
  if (dec->dither) {
443
0
    DitherRow(dec);
444
0
  }
445
446
4.37k
  if (io->put != NULL) {
447
4.37k
    int y_start = MACROBLOCK_VPOS(mb_y);
448
4.37k
    int y_end = MACROBLOCK_VPOS(mb_y + 1);
449
4.37k
    if (!is_first_row) {
450
3.60k
      y_start -= extra_y_rows;
451
3.60k
      io->y = ydst;
452
3.60k
      io->u = udst;
453
3.60k
      io->v = vdst;
454
3.60k
    } else {
455
768
      io->y = dec->cache_y + y_offset;
456
768
      io->u = dec->cache_u + uv_offset;
457
768
      io->v = dec->cache_v + uv_offset;
458
768
    }
459
460
4.37k
    if (!is_last_row) {
461
3.99k
      y_end -= extra_y_rows;
462
3.99k
    }
463
4.37k
    if (y_end > io->crop_bottom) {
464
284
      y_end = io->crop_bottom;  // make sure we don't overflow on last row.
465
284
    }
466
    // If dec->alpha_data is not NULL, we have some alpha plane present.
467
4.37k
    io->a = NULL;
468
4.37k
    if (dec->alpha_data != NULL && y_start < y_end) {
469
768
      io->a = VP8DecompressAlphaRows(dec, io, y_start, y_end - y_start);
470
768
      if (io->a == NULL) {
471
78
        return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
472
78
                           "Could not decode alpha data.");
473
78
      }
474
768
    }
475
4.29k
    if (y_start < io->crop_top) {
476
0
      const int delta_y = io->crop_top - y_start;
477
0
      y_start = io->crop_top;
478
0
      assert(!(delta_y & 1));
479
0
      io->y += dec->cache_y_stride * delta_y;
480
0
      io->u += dec->cache_uv_stride * (delta_y >> 1);
481
0
      io->v += dec->cache_uv_stride * (delta_y >> 1);
482
0
      if (io->a != NULL) {
483
0
        io->a += io->width * delta_y;
484
0
      }
485
0
    }
486
4.29k
    if (y_start < y_end) {
487
4.29k
      io->y += io->crop_left;
488
4.29k
      io->u += io->crop_left >> 1;
489
4.29k
      io->v += io->crop_left >> 1;
490
4.29k
      if (io->a != NULL) {
491
690
        io->a += io->crop_left;
492
690
      }
493
4.29k
      io->mb_y = y_start - io->crop_top;
494
4.29k
      io->mb_w = io->crop_right - io->crop_left;
495
4.29k
      io->mb_h = y_end - y_start;
496
4.29k
      ok = io->put(io);
497
4.29k
    }
498
4.29k
  }
499
  // rotate top samples if needed
500
4.29k
  if (cache_id + 1 == dec->num_caches) {
501
4.29k
    if (!is_last_row) {
502
3.93k
      WEBP_UNSAFE_MEMCPY(dec->cache_y - ysize, ydst + 16 * dec->cache_y_stride,
503
3.93k
                         ysize);
504
3.93k
      WEBP_UNSAFE_MEMCPY(dec->cache_u - uvsize, udst + 8 * dec->cache_uv_stride,
505
3.93k
                         uvsize);
506
3.93k
      WEBP_UNSAFE_MEMCPY(dec->cache_v - uvsize, vdst + 8 * dec->cache_uv_stride,
507
3.93k
                         uvsize);
508
3.93k
    }
509
4.29k
  }
510
511
4.29k
  return ok;
512
4.37k
}
513
514
#undef MACROBLOCK_VPOS
515
516
//------------------------------------------------------------------------------
517
518
4.37k
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
519
4.37k
  int ok = 1;
520
4.37k
  VP8ThreadContext* const ctx = &dec->thread_ctx;
521
4.37k
  const int filter_row = (dec->filter_type > 0) &&
522
3.52k
                         (dec->mb_y >= dec->tl_mb_y) &&
523
3.52k
                         (dec->mb_y <= dec->br_mb_y);
524
4.37k
  if (dec->mt_method == 0) {
525
    // ctx->id and ctx->f_info are already set
526
4.37k
    ctx->mb_y = dec->mb_y;
527
4.37k
    ctx->filter_row = filter_row;
528
4.37k
    ReconstructRow(dec, ctx);
529
4.37k
    ok = FinishRow(dec, io);
530
4.37k
  } else {
531
0
    WebPWorker* const worker = &dec->worker;
532
    // Finish previous job *before* updating context
533
0
    ok &= WebPGetWorkerInterface()->Sync(worker);
534
0
    assert(worker->status == OK);
535
0
    if (ok) {  // spawn a new deblocking/output job
536
0
      ctx->io = *io;
537
0
      ctx->id = dec->cache_id;
538
0
      ctx->mb_y = dec->mb_y;
539
0
      ctx->filter_row = filter_row;
540
0
      if (dec->mt_method == 2) {  // swap macroblock data
541
0
        VP8MBData* const tmp = ctx->mb_data;
542
0
        ctx->mb_data = dec->mb_data;
543
0
        dec->mb_data = tmp;
544
0
      } else {
545
        // perform reconstruction directly in main thread
546
0
        ReconstructRow(dec, ctx);
547
0
      }
548
0
      if (filter_row) {  // swap filter info
549
0
        VP8FInfo* const tmp = ctx->f_info;
550
0
        ctx->f_info = dec->f_info;
551
0
        dec->f_info = tmp;
552
0
      }
553
      // (reconstruct)+filter in parallel
554
0
      WebPGetWorkerInterface()->Launch(worker);
555
0
      if (++dec->cache_id == dec->num_caches) {
556
0
        dec->cache_id = 0;
557
0
      }
558
0
    }
559
0
  }
560
4.37k
  return ok;
561
4.37k
}
562
563
//------------------------------------------------------------------------------
564
// Finish setting up the decoding parameter once user's setup() is called.
565
566
876
VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
567
  // Call setup() first. This may trigger additional decoding features on 'io'.
568
  // Note: Afterward, we must call teardown() no matter what.
569
876
  if (io->setup != NULL && !io->setup(io)) {
570
0
    VP8SetError(dec, VP8_STATUS_INVALID_PARAM, "Frame setup failed");
571
0
    return dec->status;
572
0
  }
573
574
  // Disable filtering per user request
575
876
  if (io->bypass_filtering) {
576
0
    dec->filter_type = 0;
577
0
  }
578
579
  // Define the area where we can skip in-loop filtering, in case of cropping.
580
  //
581
  // 'Simple' filter reads two luma samples outside of the macroblock
582
  // and filters one. It doesn't filter the chroma samples. Hence, we can
583
  // avoid doing the in-loop filtering before crop_top/crop_left position.
584
  // For the 'Complex' filter, 3 samples are read and up to 3 are filtered.
585
  // Means: there's a dependency chain that goes all the way up to the
586
  // top-left corner of the picture (MB #0). We must filter all the previous
587
  // macroblocks.
588
876
  {
589
876
    const int extra_pixels = kFilterExtraRows[dec->filter_type];
590
876
    if (dec->filter_type == 2) {
591
      // For complex filter, we need to preserve the dependency chain.
592
396
      dec->tl_mb_x = 0;
593
396
      dec->tl_mb_y = 0;
594
480
    } else {
595
      // For simple filter, we can filter only the cropped region.
596
      // We include 'extra_pixels' on the other side of the boundary, since
597
      // vertical or horizontal filtering of the previous macroblock can
598
      // modify some abutting pixels.
599
480
      dec->tl_mb_x = (io->crop_left - extra_pixels) >> 4;
600
480
      dec->tl_mb_y = (io->crop_top - extra_pixels) >> 4;
601
480
      if (dec->tl_mb_x < 0) dec->tl_mb_x = 0;
602
480
      if (dec->tl_mb_y < 0) dec->tl_mb_y = 0;
603
480
    }
604
    // We need some 'extra' pixels on the right/bottom.
605
876
    dec->br_mb_y = (io->crop_bottom + 15 + extra_pixels) >> 4;
606
876
    dec->br_mb_x = (io->crop_right + 15 + extra_pixels) >> 4;
607
876
    if (dec->br_mb_x > dec->mb_w) {
608
149
      dec->br_mb_x = dec->mb_w;
609
149
    }
610
876
    if (dec->br_mb_y > dec->mb_h) {
611
235
      dec->br_mb_y = dec->mb_h;
612
235
    }
613
876
  }
614
876
  PrecomputeFilterStrengths(dec);
615
876
  return VP8_STATUS_OK;
616
876
}
617
618
876
int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
619
876
  int ok = 1;
620
876
  if (dec->mt_method > 0) {
621
0
    ok = WebPGetWorkerInterface()->Sync(&dec->worker);
622
0
  }
623
624
876
  if (io->teardown != NULL) {
625
876
    io->teardown(io);
626
876
  }
627
876
  return ok;
628
876
}
629
630
//------------------------------------------------------------------------------
631
// For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line.
632
//
633
// Reason is: the deblocking filter cannot deblock the bottom horizontal edges
634
// immediately, and needs to wait for first few rows of the next macroblock to
635
// be decoded. Hence, deblocking is lagging behind by 4 or 8 pixels (depending
636
// on strength).
637
// With two threads, the vertical positions of the rows being decoded are:
638
// Decode:  [ 0..15][16..31][32..47][48..63][64..79][...
639
// Deblock:         [ 0..11][12..27][28..43][44..59][...
640
// If we use two threads and two caches of 16 pixels, the sequence would be:
641
// Decode:  [ 0..15][16..31][ 0..15!!][16..31][ 0..15][...
642
// Deblock:         [ 0..11][12..27!!][-4..11][12..27][...
643
// The problem occurs during row [12..15!!] that both the decoding and
644
// deblocking threads are writing simultaneously.
645
// With 3 cache lines, one get a safe write pattern:
646
// Decode:  [ 0..15][16..31][32..47][ 0..15][16..31][32..47][0..
647
// Deblock:         [ 0..11][12..27][28..43][-4..11][12..27][28...
648
// Note that multi-threaded output _without_ deblocking can make use of two
649
// cache lines of 16 pixels only, since there's no lagging behind. The decoding
650
// and output process have non-concurrent writing:
651
// Decode:  [ 0..15][16..31][ 0..15][16..31][...
652
// io->put:         [ 0..15][16..31][ 0..15][...
653
654
0
#define MT_CACHE_LINES 3
655
876
#define ST_CACHE_LINES 1  // 1 cache row only for single-threaded case
656
657
// Initialize multi/single-thread worker
658
876
static int InitThreadContext(VP8Decoder* const dec) {
659
876
  dec->cache_id = 0;
660
876
  if (dec->mt_method > 0) {
661
0
    WebPWorker* const worker = &dec->worker;
662
0
    if (!WebPGetWorkerInterface()->Reset(worker)) {
663
0
      return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
664
0
                         "thread initialization failed.");
665
0
    }
666
0
    worker->data1 = dec;
667
0
    worker->data2 = (void*)&dec->thread_ctx.io;
668
0
    worker->hook = FinishRow;
669
0
    dec->num_caches =
670
0
        (dec->filter_type > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
671
876
  } else {
672
876
    dec->num_caches = ST_CACHE_LINES;
673
876
  }
674
876
  return 1;
675
876
}
676
677
int VP8GetThreadMethod(const WebPDecoderOptions* const options,
678
                       const WebPHeaderStructure* const headers, int width,
679
876
                       int height) {
680
876
  if (options == NULL || options->use_threads == 0) {
681
876
    return 0;
682
876
  }
683
0
  (void)headers;
684
0
  (void)width;
685
0
  (void)height;
686
0
  assert(headers == NULL || !headers->is_lossless);
687
0
#if defined(WEBP_USE_THREAD)
688
0
  if (width >= MIN_WIDTH_FOR_THREADS) return 2;
689
0
#endif
690
0
  return 0;
691
0
}
692
693
#undef MT_CACHE_LINES
694
#undef ST_CACHE_LINES
695
696
//------------------------------------------------------------------------------
697
// Memory setup
698
699
876
static int AllocateMemory(VP8Decoder* const dec) {
700
876
  const int num_caches = dec->num_caches;
701
876
  const int mb_w = dec->mb_w;
702
  // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.
703
876
  const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
704
876
  const size_t top_size = sizeof(VP8TopSamples) * mb_w;
705
876
  const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
706
876
  const size_t f_info_size =
707
876
      (dec->filter_type > 0)
708
876
          ? mb_w * (dec->mt_method > 0 ? 2 : 1) * sizeof(VP8FInfo)
709
876
          : 0;
710
876
  const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b);
711
876
  const size_t mb_data_size =
712
876
      (dec->mt_method == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data);
713
876
  const size_t cache_height =
714
876
      (16 * num_caches + kFilterExtraRows[dec->filter_type]) * 3 / 2;
715
876
  const size_t cache_size = top_size * cache_height;
716
  // alpha_size is the only one that scales as width x height.
717
876
  const uint64_t alpha_size =
718
876
      (dec->alpha_data != NULL)
719
876
          ? (uint64_t)dec->pic_hdr.width * dec->pic_hdr.height
720
876
          : 0ULL;
721
876
  const uint64_t needed = (uint64_t)intra_pred_mode_size + top_size +
722
876
                          mb_info_size + f_info_size + yuv_size + mb_data_size +
723
876
                          cache_size + alpha_size + WEBP_ALIGN_CST;
724
876
  uint8_t* mem;
725
726
876
  if (!CheckSizeOverflow(needed)) return 0;  // check for overflow
727
876
  if (needed > dec->mem_size) {
728
876
    WebPSafeFree(dec->mem);
729
876
    dec->mem_size = 0;
730
876
    dec->mem = WebPSafeMalloc(needed, sizeof(uint8_t));
731
876
    if (dec->mem == NULL) {
732
0
      return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
733
0
                         "no memory during frame initialization.");
734
0
    }
735
    // down-cast is ok, thanks to WebPSafeMalloc() above.
736
876
    dec->mem_size = (size_t)needed;
737
876
  }
738
739
876
  mem = (uint8_t*)dec->mem;
740
876
  dec->intra_t = mem;
741
876
  mem += intra_pred_mode_size;
742
743
876
  dec->yuv_t = (VP8TopSamples*)mem;
744
876
  mem += top_size;
745
746
876
  dec->mb_info = ((VP8MB*)mem) + 1;
747
876
  mem += mb_info_size;
748
749
876
  dec->f_info = f_info_size ? (VP8FInfo*)mem : NULL;
750
876
  mem += f_info_size;
751
876
  dec->thread_ctx.id = 0;
752
876
  dec->thread_ctx.f_info = dec->f_info;
753
876
  if (dec->filter_type > 0 && dec->mt_method > 0) {
754
    // secondary cache line. The deblocking process need to make use of the
755
    // filtering strength from previous macroblock row, while the new ones
756
    // are being decoded in parallel. We'll just swap the pointers.
757
0
    dec->thread_ctx.f_info += mb_w;
758
0
  }
759
760
876
  mem = (uint8_t*)WEBP_ALIGN(mem);
761
876
  assert((yuv_size & WEBP_ALIGN_CST) == 0);
762
876
  dec->yuv_b = mem;
763
876
  mem += yuv_size;
764
765
876
  dec->mb_data = (VP8MBData*)mem;
766
876
  dec->thread_ctx.mb_data = (VP8MBData*)mem;
767
876
  if (dec->mt_method == 2) {
768
0
    dec->thread_ctx.mb_data += mb_w;
769
0
  }
770
876
  mem += mb_data_size;
771
772
876
  dec->cache_y_stride = 16 * mb_w;
773
876
  dec->cache_uv_stride = 8 * mb_w;
774
876
  {
775
876
    const int extra_rows = kFilterExtraRows[dec->filter_type];
776
876
    const int extra_y = extra_rows * dec->cache_y_stride;
777
876
    const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride;
778
876
    dec->cache_y = mem + extra_y;
779
876
    dec->cache_u =
780
876
        dec->cache_y + 16 * num_caches * dec->cache_y_stride + extra_uv;
781
876
    dec->cache_v =
782
876
        dec->cache_u + 8 * num_caches * dec->cache_uv_stride + extra_uv;
783
876
    dec->cache_id = 0;
784
876
  }
785
876
  mem += cache_size;
786
787
  // alpha plane
788
876
  dec->alpha_plane = alpha_size ? mem : NULL;
789
876
  mem += alpha_size;
790
876
  assert(mem <= (uint8_t*)dec->mem + dec->mem_size);
791
792
  // note: left/top-info is initialized once for all.
793
876
  WEBP_UNSAFE_MEMSET(dec->mb_info - 1, 0, mb_info_size);
794
876
  VP8InitScanline(dec);  // initialize left too.
795
796
  // initialize top
797
876
  WEBP_UNSAFE_MEMSET(dec->intra_t, B_DC_PRED, intra_pred_mode_size);
798
799
876
  return 1;
800
876
}
801
802
876
static void InitIo(VP8Decoder* const dec, VP8Io* io) {
803
  // prepare 'io'
804
876
  io->mb_y = 0;
805
876
  io->y = dec->cache_y;
806
876
  io->u = dec->cache_u;
807
876
  io->v = dec->cache_v;
808
876
  io->y_stride = dec->cache_y_stride;
809
876
  io->uv_stride = dec->cache_uv_stride;
810
876
  io->a = NULL;
811
876
}
812
813
876
int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) {
814
876
  if (!InitThreadContext(dec)) return 0;  // call first. Sets dec->num_caches.
815
876
  if (!AllocateMemory(dec)) return 0;
816
876
  InitIo(dec, io);
817
876
  VP8DspInit();  // Init critical function pointers and look-up tables.
818
876
  return 1;
819
876
}
820
821
//------------------------------------------------------------------------------