Coverage Report

Created: 2026-06-15 06:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/libde265/libde265/image.cc
Line
Count
Source
1
/*
2
 * H.265 video codec.
3
 * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de>
4
 *
5
 * This file is part of libde265.
6
 *
7
 * libde265 is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as
9
 * published by the Free Software Foundation, either version 3 of
10
 * the License, or (at your option) any later version.
11
 *
12
 * libde265 is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libde265.  If not, see <http://www.gnu.org/licenses/>.
19
 */
20
21
#include "image.h"
22
#include "decctx.h"
23
24
#include <atomic>
25
26
#include <stdlib.h>
27
#include <string.h>
28
#include <assert.h>
29
30
#include <limits>
31
32
33
#ifdef HAVE_MALLOC_H
34
#include <malloc.h>
35
#endif
36
37
#ifdef HAVE_SSE4_1
38
// SSE code processes 128bit per iteration and thus might read more data
39
// than is later actually used.
40
0
#define MEMORY_PADDING  16
41
#else
42
#define MEMORY_PADDING  0
43
#endif
44
45
0
#define STANDARD_ALIGNMENT 16
46
47
#if defined(__MINGW32__)
48
#define ALLOC_ALIGNED(alignment, size)         __mingw_aligned_malloc((size), (alignment))
49
#define FREE_ALIGNED(mem)                      __mingw_aligned_free((mem))
50
#elif defined(_MSC_VER)
51
#define ALLOC_ALIGNED(alignment, size)         _aligned_malloc((size), (alignment))
52
#define FREE_ALIGNED(mem)                      _aligned_free((mem))
53
#elif defined(HAVE_POSIX_MEMALIGN)
54
0
static inline void *ALLOC_ALIGNED(size_t alignment, size_t size) {
55
0
    void *mem = nullptr;
56
0
    if (posix_memalign(&mem, alignment, size) != 0) {
57
0
        return nullptr;
58
0
    }
59
0
    return mem;
60
0
};
61
0
#define FREE_ALIGNED(mem)                      free((mem))
62
#else
63
#define ALLOC_ALIGNED(alignment, size)      memalign((alignment), (size))
64
#define FREE_ALIGNED(mem)                   free((mem))
65
#endif
66
67
0
#define ALLOC_ALIGNED_16(size)              ALLOC_ALIGNED(16, size)
68
69
LIBDE265_API void* de265_alloc_image_plane(struct de265_image* img, int cIdx,
70
                                           void* inputdata, int inputstride, void *userdata)
71
0
{
72
0
  int alignment = STANDARD_ALIGNMENT;
73
0
  uint32_t stride = (img->get_width(cIdx) + alignment-1) / alignment * alignment;
74
0
  uint32_t height = img->get_height(cIdx);
75
76
  // size computed in size_t: stride*height can exceed UINT32_MAX for large planes
77
0
  uint8_t* p = static_cast<uint8_t*>(ALLOC_ALIGNED_16(static_cast<size_t>(stride) * height + MEMORY_PADDING));
78
79
0
  if (p==nullptr) { return nullptr; }
80
81
0
  img->set_image_plane(cIdx, p, stride, userdata);
82
83
  // copy input data if provided
84
85
0
  if (inputdata != nullptr) {
86
0
    if (inputstride == static_cast<int>(stride)) {
87
0
      memcpy(p, inputdata, static_cast<size_t>(stride) * height);
88
0
    }
89
0
    else {
90
0
      for (uint32_t y=0;y<height;y++) {
91
0
        memcpy(p + static_cast<size_t>(y) * stride,
92
0
               static_cast<char*>(inputdata) + static_cast<size_t>(inputstride) * y,
93
0
               inputstride);
94
0
      }
95
0
    }
96
0
  }
97
98
0
  return p;
99
0
}
100
101
102
LIBDE265_API void de265_free_image_plane(struct de265_image* img, int cIdx)
103
0
{
104
0
  uint8_t* p = img->get_image_plane(cIdx);
105
0
  assert(p);
106
0
  FREE_ALIGNED(p);
107
0
}
108
109
110
static int  de265_image_get_buffer(de265_decoder_context* ctx,
111
                                   de265_image_spec* spec, de265_image* img, void* userdata)
112
0
{
113
0
  const uint32_t rawChromaWidth  = spec->width  / img->SubWidthC;
114
0
  const uint32_t rawChromaHeight = spec->height / img->SubHeightC;
115
116
0
  uint32_t luma_stride   = (spec->width    + spec->alignment-1) / spec->alignment * spec->alignment;
117
0
  uint32_t chroma_stride = (rawChromaWidth + spec->alignment-1) / spec->alignment * spec->alignment;
118
119
0
  assert(img->BitDepth_Y >= 8 && img->BitDepth_Y <= 16);
120
0
  assert(img->BitDepth_C >= 8 && img->BitDepth_C <= 16);
121
122
0
  uint32_t luma_bpl   = luma_stride   * ((img->BitDepth_Y+7)/8);
123
0
  uint32_t chroma_bpl = chroma_stride * ((img->BitDepth_C+7)/8);
124
125
0
  uint32_t luma_height   = spec->height;
126
0
  uint32_t chroma_height = rawChromaHeight;
127
128
0
  bool alloc_failed = false;
129
130
  // Compute the plane sizes in size_t. Each operand fits in uint32_t, but the
131
  // height * bytes-per-line product can exceed UINT32_MAX for large frames, so
132
  // the multiplication must be done in 64 bits. Computing it in 32 bits wraps
133
  // the allocation size to a small value while fill_image() later writes the
134
  // real (size_t) size -> heap buffer overflow (GHSA-vv8h-932h-7r86).
135
0
  uint8_t* p[3] = { nullptr,nullptr,nullptr };
136
0
  p[0] = static_cast<uint8_t*>(ALLOC_ALIGNED_16(static_cast<size_t>(luma_height) * luma_bpl + MEMORY_PADDING));
137
0
  if (p[0]==nullptr) { alloc_failed=true; }
138
139
0
  if (img->get_chroma_format() != de265_chroma_mono) {
140
0
    p[1] = static_cast<uint8_t*>(ALLOC_ALIGNED_16(static_cast<size_t>(chroma_height) * chroma_bpl + MEMORY_PADDING));
141
0
    p[2] = static_cast<uint8_t*>(ALLOC_ALIGNED_16(static_cast<size_t>(chroma_height) * chroma_bpl + MEMORY_PADDING));
142
143
0
    if (p[1]==nullptr || p[2]==nullptr) { alloc_failed=true; }
144
0
  }
145
0
  else {
146
0
    p[1] = nullptr;
147
0
    p[2] = nullptr;
148
0
    chroma_stride = 0;
149
0
  }
150
151
0
  if (alloc_failed) {
152
0
    for (int i=0;i<3;i++)
153
0
      if (p[i]) {
154
0
        FREE_ALIGNED(p[i]);
155
0
      }
156
157
0
    return 0;
158
0
  }
159
160
0
  img->set_image_plane(0, p[0], luma_stride, nullptr);
161
0
  img->set_image_plane(1, p[1], chroma_stride, nullptr);
162
0
  img->set_image_plane(2, p[2], chroma_stride, nullptr);
163
164
0
  img->fill_image(0,0,0);
165
166
0
  return 1;
167
0
}
168
169
static void de265_image_release_buffer(de265_decoder_context* ctx,
170
                                       de265_image* img, void* userdata)
171
0
{
172
0
  for (int i=0;i<3;i++) {
173
0
    uint8_t* p = img->get_image_plane(i);
174
0
    if (p) {
175
0
      FREE_ALIGNED(p);
176
0
    }
177
0
  }
178
0
}
179
180
181
de265_image_allocation de265_image::default_image_allocation = {
182
  de265_image_get_buffer,
183
  de265_image_release_buffer
184
};
185
186
187
void de265_image::set_image_plane(int cIdx, uint8_t* mem, ptrdiff_t stride, void *userdata)
188
0
{
189
0
  pixels[cIdx] = mem;
190
0
  plane_user_data[cIdx] = userdata;
191
192
0
  if (cIdx==0) { this->stride        = stride; }
193
0
  else         { this->chroma_stride = stride; }
194
0
}
195
196
197
0
de265_image::de265_image() = default;
198
199
200
de265_error de265_image::alloc_image(int w,int h, enum de265_chroma c,
201
                                     std::shared_ptr<const seq_parameter_set> sps, bool allocMetadata,
202
                                     decoder_context* dctx,
203
                                     //encoder_context* ectx,
204
                                     de265_PTS pts, void* user_data,
205
                                     bool useCustomAllocFunc)
206
0
{
207
  //if (allocMetadata) { assert(sps); }
208
0
  if (allocMetadata) { assert(sps); }
209
210
0
  if (sps) { this->sps = sps; }
211
212
0
  release(); /* TODO: review code for efficient allocation when arrays are already
213
                allocated to the requested size. Without the release, the old image-data
214
                will not be freed. */
215
216
0
  static std::atomic<uint32_t> s_next_image_ID(0);
217
0
  ID = s_next_image_ID++;
218
0
  removed_at_picture_id = std::numeric_limits<uint32_t>::max();
219
220
0
  decctx = dctx;
221
  //encctx = ectx;
222
223
  // --- allocate image buffer ---
224
225
0
  chroma_format= c;
226
227
0
  width = w;
228
0
  height = h;
229
0
  chroma_width = w;
230
0
  chroma_height= h;
231
232
0
  this->user_data = user_data;
233
0
  this->pts = pts;
234
235
0
  de265_image_spec spec;
236
237
0
  uint8_t WinUnitX, WinUnitY;
238
239
0
  switch (chroma_format) {
240
0
    case de265_chroma_mono: WinUnitX=1; WinUnitY=1; break;
241
0
    case de265_chroma_420:  WinUnitX=2; WinUnitY=2; break;
242
0
    case de265_chroma_422:  WinUnitX=2; WinUnitY=1; break;
243
0
    case de265_chroma_444:  WinUnitX=1; WinUnitY=1; break;
244
0
    default:
245
0
      assert(0);
246
0
      WinUnitX = WinUnitY = 0;
247
0
  }
248
249
0
  switch (chroma_format) {
250
0
  case de265_chroma_420:
251
0
    spec.format = de265_image_format_YUV420P8;
252
0
    chroma_width  = (chroma_width +1)/2;
253
0
    chroma_height = (chroma_height+1)/2;
254
0
    SubWidthC  = 2;
255
0
    SubHeightC = 2;
256
0
    break;
257
258
0
  case de265_chroma_422:
259
0
    spec.format = de265_image_format_YUV422P8;
260
0
    chroma_width = (chroma_width+1)/2;
261
0
    SubWidthC  = 2;
262
0
    SubHeightC = 1;
263
0
    break;
264
265
0
  case de265_chroma_444:
266
0
    spec.format = de265_image_format_YUV444P8;
267
0
    SubWidthC  = 1;
268
0
    SubHeightC = 1;
269
0
    break;
270
271
0
  case de265_chroma_mono:
272
0
    spec.format = de265_image_format_mono8;
273
0
    chroma_width = 0;
274
0
    chroma_height= 0;
275
0
    SubWidthC  = 1;
276
0
    SubHeightC = 1;
277
0
    break;
278
279
0
  default:
280
0
    assert(false);
281
0
    break;
282
0
  }
283
284
0
  if (chroma_format != de265_chroma_mono && sps) {
285
0
    assert(sps->SubWidthC  == SubWidthC);
286
0
    assert(sps->SubHeightC == SubHeightC);
287
0
  }
288
289
0
  spec.width  = w;
290
0
  spec.height = h;
291
0
  spec.alignment = STANDARD_ALIGNMENT;
292
293
294
  // conformance window cropping
295
296
0
  int left   = sps ? sps->conf_win_left_offset : 0;
297
0
  int right  = sps ? sps->conf_win_right_offset : 0;
298
0
  int top    = sps ? sps->conf_win_top_offset : 0;
299
0
  int bottom = sps ? sps->conf_win_bottom_offset : 0;
300
301
0
  if ((left+right)*WinUnitX >= width) {
302
0
    return DE265_ERROR_CODED_PARAMETER_OUT_OF_RANGE;
303
0
  }
304
305
0
  if ((top+bottom)*WinUnitY >= height) {
306
0
    return DE265_ERROR_CODED_PARAMETER_OUT_OF_RANGE;
307
0
  }
308
309
0
  width_confwin = width - (left+right)*WinUnitX;
310
0
  height_confwin= height- (top+bottom)*WinUnitY;
311
0
  chroma_width_confwin = chroma_width -left-right;
312
0
  chroma_height_confwin= chroma_height-top-bottom;
313
314
0
  spec.crop_left  = left *WinUnitX;
315
0
  spec.crop_right = right*WinUnitX;
316
0
  spec.crop_top   = top   *WinUnitY;
317
0
  spec.crop_bottom= bottom*WinUnitY;
318
319
0
  spec.visible_width = width_confwin;
320
0
  spec.visible_height= height_confwin;
321
322
323
0
  BitDepth_Y = (sps==nullptr) ? 8 : sps->BitDepth_Y;
324
0
  BitDepth_C = (sps==nullptr) ? 8 : sps->BitDepth_C;
325
326
0
  bpp_shift[0] = (BitDepth_Y <= 8) ? 0 : 1;
327
0
  bpp_shift[1] = (BitDepth_C <= 8) ? 0 : 1;
328
0
  bpp_shift[2] = bpp_shift[1];
329
330
331
  // allocate memory and set conformance window pointers
332
333
0
  void* alloc_userdata = nullptr;
334
0
  if (decctx) alloc_userdata = decctx->param_image_allocation_userdata;
335
  // if (encctx) alloc_userdata = encctx->param_image_allocation_userdata; // actually not needed
336
337
  /*
338
  if (encctx && useCustomAllocFunc) {
339
    encoder_image_release_func = encctx->release_func;
340
341
    // if we do not provide a release function, use our own
342
343
    if (encoder_image_release_func == nullptr) {
344
      image_allocation_functions = de265_image::default_image_allocation;
345
    }
346
    else {
347
      image_allocation_functions.get_buffer     = nullptr;
348
      image_allocation_functions.release_buffer = nullptr;
349
    }
350
  }
351
0
  else*/ if (decctx && useCustomAllocFunc) {
352
0
    image_allocation_functions = decctx->param_image_allocation_functions;
353
0
  }
354
0
  else {
355
0
    image_allocation_functions = de265_image::default_image_allocation;
356
0
  }
357
358
0
  bool mem_alloc_success = true;
359
360
0
  if (image_allocation_functions.get_buffer != nullptr) {
361
0
    mem_alloc_success = image_allocation_functions.get_buffer(decctx, &spec, this,
362
0
                                                              alloc_userdata);
363
364
0
    pixels_confwin[0] = pixels[0] + left*WinUnitX + top*WinUnitY*stride;
365
366
0
    if (chroma_format != de265_chroma_mono) {
367
0
      pixels_confwin[1] = pixels[1] + left + top*chroma_stride;
368
0
      pixels_confwin[2] = pixels[2] + left + top*chroma_stride;
369
0
    }
370
0
    else {
371
0
      pixels_confwin[1] = nullptr;
372
0
      pixels_confwin[2] = nullptr;
373
0
    }
374
375
    // check for memory shortage
376
377
0
    if (!mem_alloc_success)
378
0
      {
379
0
        return DE265_ERROR_OUT_OF_MEMORY;
380
0
      }
381
0
  }
382
383
  //alloc_functions = *allocfunc;
384
  //alloc_userdata  = userdata;
385
386
  // --- allocate decoding info arrays ---
387
388
0
  if (allocMetadata) {
389
    // intra pred mode
390
391
0
    mem_alloc_success &= intraPredMode.alloc(sps->PicWidthInMinPUs, sps->PicHeightInMinPUs,
392
0
                                             sps->Log2MinPUSize);
393
394
0
    mem_alloc_success &= intraPredModeC.alloc(sps->PicWidthInMinPUs, sps->PicHeightInMinPUs,
395
0
                                              sps->Log2MinPUSize);
396
397
    // cb info
398
399
0
    mem_alloc_success &= cb_info.alloc(sps->PicWidthInMinCbsY, sps->PicHeightInMinCbsY,
400
0
                                       sps->Log2MinCbSizeY);
401
402
    // pb info
403
404
0
    int puWidth  = sps->PicWidthInMinCbsY  << (sps->Log2MinCbSizeY -2);
405
0
    int puHeight = sps->PicHeightInMinCbsY << (sps->Log2MinCbSizeY -2);
406
407
0
    mem_alloc_success &= pb_info.alloc(puWidth,puHeight, 2);
408
409
410
    // tu info
411
412
0
    mem_alloc_success &= tu_info.alloc(sps->PicWidthInTbsY, sps->PicHeightInTbsY,
413
0
                                       sps->Log2MinTrafoSize);
414
415
    // deblk info
416
417
0
    int deblk_w = (sps->pic_width_in_luma_samples +3)/4;
418
0
    int deblk_h = (sps->pic_height_in_luma_samples+3)/4;
419
420
0
    mem_alloc_success &= deblk_info.alloc(deblk_w, deblk_h, 2);
421
422
    // CTB info
423
424
0
    if (ctb_info.width_in_units  != sps->PicWidthInCtbsY  ||
425
0
        ctb_info.height_in_units != sps->PicHeightInCtbsY ||
426
0
        ctb_info.log2unitSize    != sps->Log2CtbSizeY)
427
0
      {
428
0
        delete[] ctb_progress;
429
430
0
        mem_alloc_success &= ctb_info.alloc(sps->PicWidthInCtbsY, sps->PicHeightInCtbsY,
431
0
                                            sps->Log2CtbSizeY);
432
433
0
        ctb_progress = new de265_progress_lock[ ctb_info.data_size ];
434
0
      }
435
436
437
    // check for memory shortage
438
439
0
    if (!mem_alloc_success)
440
0
      {
441
0
        return DE265_ERROR_OUT_OF_MEMORY;
442
0
      }
443
0
  }
444
445
0
  return DE265_OK;
446
0
}
447
448
449
de265_image::~de265_image()
450
0
{
451
0
  release();
452
453
  // free progress locks
454
455
0
  if (ctb_progress) {
456
0
    delete[] ctb_progress;
457
0
  }
458
0
}
459
460
461
void de265_image::release()
462
0
{
463
  // free image memory
464
465
0
  if (pixels[0])
466
0
    {
467
      /*
468
      if (encoder_image_release_func != nullptr) {
469
        encoder_image_release_func(encctx, this,
470
                                   encctx->param_image_allocation_userdata);
471
      }
472
0
      else*/ {
473
0
        image_allocation_functions.release_buffer(decctx, this,
474
0
                                                  decctx ?
475
0
                                                  decctx->param_image_allocation_userdata :
476
0
                                                  nullptr);
477
0
      }
478
479
0
      for (int i=0;i<3;i++)
480
0
        {
481
0
          pixels[i] = nullptr;
482
0
          pixels_confwin[i] = nullptr;
483
0
        }
484
0
    }
485
486
  // free slices
487
488
0
  for (size_t i=0;i<slices.size();i++) {
489
0
    delete slices[i];
490
0
  }
491
0
  slices.clear();
492
0
}
493
494
495
void de265_image::fill_plane(int channel, int value)
496
0
{
497
0
  int bytes_per_pixel = get_bytes_per_pixel(channel);
498
0
  assert(value >= 0); // needed for the shift operation in the check below
499
500
  // Each plane is allocated with MEMORY_PADDING trailing bytes for safe SSE overread; the
501
  // memsets below cover that padding too so it never contains uninitialized heap data.
502
0
  const size_t plane_bytes =
503
0
      (channel == 0 ? static_cast<size_t>(stride) * height
504
0
                    : static_cast<size_t>(chroma_stride) * chroma_height)
505
0
      * bytes_per_pixel;
506
507
0
  if (bytes_per_pixel == 1) {
508
0
    memset(pixels[channel], value, plane_bytes + MEMORY_PADDING);
509
0
  }
510
0
  else if ((value >> 8) == (value & 0xFF)) {
511
0
    assert(bytes_per_pixel == 2);
512
513
    // if we fill the same byte value to all bytes, we can still use memset()
514
0
    memset(pixels[channel], 0, plane_bytes + MEMORY_PADDING);
515
0
  }
516
0
  else {
517
0
    assert(bytes_per_pixel == 2);
518
0
    uint16_t v = value;
519
520
0
    if (channel==0) {
521
      // copy value into first row
522
0
      for (int x = 0; x < width; x++) {
523
0
        *reinterpret_cast<uint16_t*>(&pixels[channel][2 * x]) = v;
524
0
      }
525
526
      // copy first row into remaining rows
527
0
      for (int y = 1; y < height; y++) {
528
0
        memcpy(pixels[channel] + y * stride * 2, pixels[channel], chroma_width * 2);
529
0
      }
530
0
    }
531
0
    else {
532
      // copy value into first row
533
0
      for (int x = 0; x < chroma_width; x++) {
534
0
        *reinterpret_cast<uint16_t*>(&pixels[channel][2 * x]) = v;
535
0
      }
536
537
      // copy first row into remaining rows
538
0
      for (int y = 1; y < chroma_height; y++) {
539
0
        memcpy(pixels[channel] + y * chroma_stride * 2, pixels[channel], chroma_width * 2);
540
0
      }
541
0
    }
542
543
0
#if MEMORY_PADDING > 0
544
0
    memset(pixels[channel] + plane_bytes, 0, MEMORY_PADDING);
545
0
#endif
546
0
  }
547
0
}
548
549
550
void de265_image::fill_image(int y,int cb,int cr)
551
0
{
552
0
  if (pixels[0]) {
553
0
    fill_plane(0, y);
554
0
  }
555
556
0
  if (pixels[1]) {
557
0
    fill_plane(1, cb);
558
0
  }
559
560
0
  if (pixels[2]) {
561
0
    fill_plane(2, cr);
562
0
  }
563
0
}
564
565
566
de265_error de265_image::copy_image(const de265_image* src)
567
0
{
568
  /* TODO: actually, since we allocate the image only for internal purpose, we
569
     do not have to call the external allocation routines for this. However, then
570
     we have to track for each image how to release it again.
571
     Another option would be to safe the copied data not in an de265_image at all.
572
  */
573
574
0
  de265_error err = alloc_image(src->width, src->height, src->chroma_format, src->sps, false,
575
0
                                src->decctx, /*src->encctx,*/ src->pts, src->user_data, false);
576
0
  if (err != DE265_OK) {
577
0
    return err;
578
0
  }
579
580
0
  copy_lines_from(src, 0, src->height);
581
582
0
  return err;
583
0
}
584
585
586
// end = last line + 1
587
void de265_image::copy_lines_from(const de265_image* src, int first, int end)
588
0
{
589
0
  if (end > src->height) end=src->height;
590
591
0
  assert(first % 2 == 0);
592
0
  assert(end   % 2 == 0);
593
594
0
  int luma_bpp   = (sps->BitDepth_Y+7)/8;
595
0
  int chroma_bpp = (sps->BitDepth_C+7)/8;
596
597
0
  if (src->stride == stride) {
598
0
    memcpy(pixels[0]      + first*stride * luma_bpp,
599
0
           src->pixels[0] + first*src->stride * luma_bpp,
600
0
           (end-first)*stride * luma_bpp);
601
0
  }
602
0
  else {
603
0
    for (int yp=first;yp<end;yp++) {
604
0
      memcpy(pixels[0]+yp*stride * luma_bpp,
605
0
             src->pixels[0]+yp*src->stride * luma_bpp,
606
0
             src->width * luma_bpp);
607
0
    }
608
0
  }
609
610
0
  int first_chroma = first / src->SubHeightC;
611
0
  int end_chroma   = end   / src->SubHeightC;
612
613
0
  if (src->chroma_format != de265_chroma_mono) {
614
0
    if (src->chroma_stride == chroma_stride) {
615
0
      memcpy(pixels[1]      + first_chroma*chroma_stride * chroma_bpp,
616
0
             src->pixels[1] + first_chroma*chroma_stride * chroma_bpp,
617
0
             (end_chroma-first_chroma) * chroma_stride * chroma_bpp);
618
0
      memcpy(pixels[2]      + first_chroma*chroma_stride * chroma_bpp,
619
0
             src->pixels[2] + first_chroma*chroma_stride * chroma_bpp,
620
0
             (end_chroma-first_chroma) * chroma_stride * chroma_bpp);
621
0
    }
622
0
    else {
623
0
      for (int y=first_chroma;y<end_chroma;y++) {
624
0
        memcpy(pixels[1]+y*chroma_stride * chroma_bpp,
625
0
               src->pixels[1]+y*src->chroma_stride * chroma_bpp,
626
0
               src->chroma_width * chroma_bpp);
627
0
        memcpy(pixels[2]+y*chroma_stride * chroma_bpp,
628
0
               src->pixels[2]+y*src->chroma_stride * chroma_bpp,
629
0
               src->chroma_width * chroma_bpp);
630
0
      }
631
0
    }
632
0
  }
633
0
}
634
635
636
void de265_image::exchange_pixel_data_with(de265_image& b)
637
0
{
638
0
  for (int i=0;i<3;i++) {
639
0
    std::swap(pixels[i], b.pixels[i]);
640
0
    std::swap(pixels_confwin[i], b.pixels_confwin[i]);
641
0
    std::swap(plane_user_data[i], b.plane_user_data[i]);
642
0
  }
643
644
0
  std::swap(stride, b.stride);
645
0
  std::swap(chroma_stride, b.chroma_stride);
646
0
  std::swap(image_allocation_functions, b.image_allocation_functions);
647
0
}
648
649
650
void de265_image::thread_start(int nThreads)
651
0
{
652
0
  std::unique_lock<std::mutex> lock(mutex);
653
654
  //printf("nThreads before: %d %d\n",nThreadsQueued, nThreadsTotal);
655
656
0
  nThreadsQueued += nThreads;
657
0
  nThreadsTotal += nThreads;
658
659
  //printf("nThreads after: %d %d\n",nThreadsQueued, nThreadsTotal);
660
0
}
661
662
void de265_image::thread_run(const thread_task* task)
663
0
{
664
0
  std::unique_lock<std::mutex> lock(mutex);
665
666
  //printf("run thread %s\n", task->name().c_str());
667
668
0
  nThreadsQueued--;
669
0
  nThreadsRunning++;
670
0
}
671
672
void de265_image::thread_blocks()
673
0
{
674
0
  std::unique_lock<std::mutex> lock(mutex);
675
676
0
  nThreadsRunning--;
677
0
  nThreadsBlocked++;
678
0
}
679
680
void de265_image::thread_unblocks()
681
0
{
682
0
  std::unique_lock<std::mutex> lock(mutex);
683
684
0
  nThreadsBlocked--;
685
0
  nThreadsRunning++;
686
0
}
687
688
void de265_image::thread_finishes(const thread_task* task)
689
0
{
690
  //printf("finish thread %s\n", task->name().c_str());
691
692
0
  std::unique_lock<std::mutex> lock(mutex);
693
694
0
  nThreadsRunning--;
695
0
  nThreadsFinished++;
696
0
  assert(nThreadsRunning >= 0);
697
698
0
  if (nThreadsFinished==nThreadsTotal) {
699
0
    finished_cond.notify_all();
700
0
  }
701
0
}
702
703
void de265_image::wait_for_progress(thread_task* task, int ctbx,int ctby, int progress)
704
0
{
705
0
  const int ctbW = sps->PicWidthInCtbsY;
706
707
0
  wait_for_progress(task, ctbx + ctbW*ctby, progress);
708
0
}
709
710
void de265_image::wait_for_progress(thread_task* task, int ctbAddrRS, int progress)
711
0
{
712
0
  if (task==nullptr) { return; }
713
714
0
  de265_progress_lock* progresslock = &ctb_progress[ctbAddrRS];
715
0
  if (progresslock->get_progress() < progress) {
716
0
    thread_blocks();
717
718
0
    assert(task!=nullptr);
719
0
    task->state = thread_task::Blocked;
720
721
    /* TODO: check whether we are the first blocked task in the list.
722
       If we are, we have to conceal input errors.
723
       Simplest concealment: do not block.
724
    */
725
726
0
    progresslock->wait_for_progress(progress);
727
0
    task->state = thread_task::Running;
728
0
    thread_unblocks();
729
0
  }
730
0
}
731
732
733
void de265_image::wait_for_completion()
734
0
{
735
0
  std::unique_lock<std::mutex> lock(mutex);
736
737
0
  while (nThreadsFinished!=nThreadsTotal) {
738
0
    finished_cond.wait(lock);
739
0
  }
740
0
}
741
742
bool de265_image::debug_is_completed() const
743
0
{
744
0
  return nThreadsFinished==nThreadsTotal;
745
0
}
746
747
748
749
void de265_image::clear_metadata()
750
0
{
751
  // TODO: maybe we could avoid the memset by ensuring that all data is written to
752
  // during decoding (especially log2CbSize), but it is unlikely to be faster than the memset.
753
754
0
  cb_info.clear();
755
0
  intraPredMode.clear();
756
  //tu_info.clear();  // done on the fly
757
0
  ctb_info.clear();
758
0
  deblk_info.clear();
759
760
  // --- reset CTB progresses ---
761
762
0
  for (int i=0;i<ctb_info.data_size;i++) {
763
0
    ctb_progress[i].reset(CTB_PROGRESS_NONE);
764
0
  }
765
0
}
766
767
768
void de265_image::set_mv_info(int x,int y, int nPbW,int nPbH, const PBMotion& mv)
769
0
{
770
0
  int log2PuSize = 2;
771
772
0
  int xPu = x >> log2PuSize;
773
0
  int yPu = y >> log2PuSize;
774
0
  int wPu = nPbW >> log2PuSize;
775
0
  int hPu = nPbH >> log2PuSize;
776
777
0
  int stride = pb_info.width_in_units;
778
779
0
  for (int pby=0;pby<hPu;pby++)
780
0
    for (int pbx=0;pbx<wPu;pbx++)
781
0
      {
782
0
        pb_info[ xPu+pbx + (yPu+pby)*stride ] = mv;
783
0
      }
784
0
}
785
786
787
bool de265_image::available_zscan(int xCurr,int yCurr, int xN,int yN) const
788
0
{
789
0
  if (xN<0 || yN<0) return false;
790
0
  if (xN>=sps->pic_width_in_luma_samples ||
791
0
      yN>=sps->pic_height_in_luma_samples) return false;
792
793
0
  int minBlockAddrN = pps->scan->MinTbAddrZS[ (xN>>sps->Log2MinTrafoSize) +
794
0
                                        (yN>>sps->Log2MinTrafoSize) * sps->PicWidthInTbsY ];
795
0
  int minBlockAddrCurr = pps->scan->MinTbAddrZS[ (xCurr>>sps->Log2MinTrafoSize) +
796
0
                                           (yCurr>>sps->Log2MinTrafoSize) * sps->PicWidthInTbsY ];
797
798
0
  if (minBlockAddrN > minBlockAddrCurr) return false;
799
800
0
  int xCurrCtb = xCurr >> sps->Log2CtbSizeY;
801
0
  int yCurrCtb = yCurr >> sps->Log2CtbSizeY;
802
0
  int xNCtb = xN >> sps->Log2CtbSizeY;
803
0
  int yNCtb = yN >> sps->Log2CtbSizeY;
804
805
0
  if (get_SliceAddrRS(xCurrCtb,yCurrCtb) !=
806
0
      get_SliceAddrRS(xNCtb,   yNCtb)) {
807
0
    return false;
808
0
  }
809
810
0
  if (pps->scan->TileIdRS[xCurrCtb + yCurrCtb*sps->PicWidthInCtbsY] !=
811
0
      pps->scan->TileIdRS[xNCtb    + yNCtb   *sps->PicWidthInCtbsY]) {
812
0
    return false;
813
0
  }
814
815
0
  return true;
816
0
}
817
818
819
bool de265_image::available_pred_blk(int xC,int yC, int nCbS, int xP, int yP,
820
                                     int nPbW, int nPbH, int partIdx, int xN,int yN) const
821
0
{
822
0
  logtrace(LogMotion,"C:%d;%d P:%d;%d N:%d;%d size=%d;%d\n",xC,yC,xP,yP,xN,yN,nPbW,nPbH);
823
824
0
  int sameCb = (xC <= xN && xN < xC+nCbS &&
825
0
                yC <= yN && yN < yC+nCbS);
826
827
0
  bool availableN;
828
829
0
  if (!sameCb) {
830
0
    availableN = available_zscan(xP,yP,xN,yN);
831
0
  }
832
0
  else {
833
0
    availableN = !(nPbW<<1 == nCbS && nPbH<<1 == nCbS &&  // NxN
834
0
                   partIdx==1 &&
835
0
                   yN >= yC+nPbH && xN < xC+nPbW);  // xN/yN inside partIdx 2
836
0
  }
837
838
0
  if (availableN && get_pred_mode(xN,yN) == MODE_INTRA) {
839
0
    availableN = false;
840
0
  }
841
842
0
  return availableN;
843
0
}