Coverage Report

Created: 2026-04-01 07:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libde265/libde265/image.cc
Line
Count
Source
1
/*
2
 * H.265 video codec.
3
 * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de>
4
 *
5
 * This file is part of libde265.
6
 *
7
 * libde265 is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as
9
 * published by the Free Software Foundation, either version 3 of
10
 * the License, or (at your option) any later version.
11
 *
12
 * libde265 is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libde265.  If not, see <http://www.gnu.org/licenses/>.
19
 */
20
21
#include "image.h"
22
#include "decctx.h"
23
24
#include <atomic>
25
26
#include <stdlib.h>
27
#include <string.h>
28
#include <assert.h>
29
30
#include <limits>
31
32
33
#ifdef HAVE_MALLOC_H
34
#include <malloc.h>
35
#endif
36
37
#ifdef HAVE_SSE4_1
38
// SSE code processes 128bit per iteration and thus might read more data
39
// than is later actually used.
40
#define MEMORY_PADDING  16
41
#else
42
#define MEMORY_PADDING  0
43
#endif
44
45
13.8k
#define STANDARD_ALIGNMENT 16
46
47
#ifdef HAVE___MINGW_ALIGNED_MALLOC
48
#define ALLOC_ALIGNED(alignment, size)         __mingw_aligned_malloc((size), (alignment))
49
#define FREE_ALIGNED(mem)                      __mingw_aligned_free((mem))
50
#elif _WIN32
51
#define ALLOC_ALIGNED(alignment, size)         _aligned_malloc((size), (alignment))
52
#define FREE_ALIGNED(mem)                      _aligned_free((mem))
53
#elif defined(HAVE_POSIX_MEMALIGN)
54
36.0k
static inline void *ALLOC_ALIGNED(size_t alignment, size_t size) {
55
36.0k
    void *mem = nullptr;
56
36.0k
    if (posix_memalign(&mem, alignment, size) != 0) {
57
0
        return nullptr;
58
0
    }
59
36.0k
    return mem;
60
36.0k
};
61
36.0k
#define FREE_ALIGNED(mem)                      free((mem))
62
#else
63
#define ALLOC_ALIGNED(alignment, size)      memalign((alignment), (size))
64
#define FREE_ALIGNED(mem)                   free((mem))
65
#endif
66
67
36.0k
#define ALLOC_ALIGNED_16(size)              ALLOC_ALIGNED(16, size)
68
69
LIBDE265_API void* de265_alloc_image_plane(struct de265_image* img, int cIdx,
70
                                           void* inputdata, int inputstride, void *userdata)
71
0
{
72
0
  int alignment = STANDARD_ALIGNMENT;
73
0
  int stride = (img->get_width(cIdx) + alignment-1) / alignment * alignment;
74
0
  int height = img->get_height(cIdx);
75
76
0
  uint8_t* p = static_cast<uint8_t*>(ALLOC_ALIGNED_16(stride * height + MEMORY_PADDING));
77
78
0
  if (p==nullptr) { return nullptr; }
79
80
0
  img->set_image_plane(cIdx, p, stride, userdata);
81
82
  // copy input data if provided
83
84
0
  if (inputdata != nullptr) {
85
0
    if (inputstride == stride) {
86
0
      memcpy(p, inputdata, stride*height);
87
0
    }
88
0
    else {
89
0
      for (int y=0;y<height;y++) {
90
0
        memcpy(p+y*stride, static_cast<char*>(inputdata) + inputstride*y, inputstride);
91
0
      }
92
0
    }
93
0
  }
94
95
0
  return p;
96
0
}
97
98
99
LIBDE265_API void de265_free_image_plane(struct de265_image* img, int cIdx)
100
0
{
101
0
  uint8_t* p = img->get_image_plane(cIdx);
102
0
  assert(p);
103
0
  FREE_ALIGNED(p);
104
0
}
105
106
107
static int  de265_image_get_buffer(de265_decoder_context* ctx,
108
                                   de265_image_spec* spec, de265_image* img, void* userdata)
109
13.8k
{
110
13.8k
  const int rawChromaWidth  = spec->width  / img->SubWidthC;
111
13.8k
  const int rawChromaHeight = spec->height / img->SubHeightC;
112
113
13.8k
  int luma_stride   = (spec->width    + spec->alignment-1) / spec->alignment * spec->alignment;
114
13.8k
  int chroma_stride = (rawChromaWidth + spec->alignment-1) / spec->alignment * spec->alignment;
115
116
13.8k
  assert(img->BitDepth_Y >= 8 && img->BitDepth_Y <= 16);
117
13.8k
  assert(img->BitDepth_C >= 8 && img->BitDepth_C <= 16);
118
119
13.8k
  int luma_bpl   = luma_stride   * ((img->BitDepth_Y+7)/8);
120
13.8k
  int chroma_bpl = chroma_stride * ((img->BitDepth_C+7)/8);
121
122
13.8k
  int luma_height   = spec->height;
123
13.8k
  int chroma_height = rawChromaHeight;
124
125
13.8k
  bool alloc_failed = false;
126
127
13.8k
  uint8_t* p[3] = { 0,0,0 };
128
13.8k
  p[0] = static_cast<uint8_t*>(ALLOC_ALIGNED_16(luma_height   * luma_bpl   + MEMORY_PADDING));
129
13.8k
  if (p[0]==nullptr) { alloc_failed=true; }
130
131
13.8k
  if (img->get_chroma_format() != de265_chroma_mono) {
132
11.0k
    p[1] = static_cast<uint8_t*>(ALLOC_ALIGNED_16(chroma_height * chroma_bpl + MEMORY_PADDING));
133
11.0k
    p[2] = static_cast<uint8_t*>(ALLOC_ALIGNED_16(chroma_height * chroma_bpl + MEMORY_PADDING));
134
135
11.0k
    if (p[1]==nullptr || p[2]==nullptr) { alloc_failed=true; }
136
11.0k
  }
137
2.81k
  else {
138
2.81k
    p[1] = nullptr;
139
2.81k
    p[2] = nullptr;
140
2.81k
    chroma_stride = 0;
141
2.81k
  }
142
143
13.8k
  if (alloc_failed) {
144
0
    for (int i=0;i<3;i++)
145
0
      if (p[i]) {
146
0
        FREE_ALIGNED(p[i]);
147
0
      }
148
149
0
    return 0;
150
0
  }
151
152
13.8k
  img->set_image_plane(0, p[0], luma_stride, nullptr);
153
13.8k
  img->set_image_plane(1, p[1], chroma_stride, nullptr);
154
13.8k
  img->set_image_plane(2, p[2], chroma_stride, nullptr);
155
156
13.8k
  img->fill_image(0,0,0);
157
158
13.8k
  return 1;
159
13.8k
}
160
161
static void de265_image_release_buffer(de265_decoder_context* ctx,
162
                                       de265_image* img, void* userdata)
163
13.8k
{
164
55.5k
  for (int i=0;i<3;i++) {
165
41.6k
    uint8_t* p = img->get_image_plane(i);
166
41.6k
    if (p) {
167
36.0k
      FREE_ALIGNED(p);
168
36.0k
    }
169
41.6k
  }
170
13.8k
}
171
172
173
de265_image_allocation de265_image::default_image_allocation = {
174
  de265_image_get_buffer,
175
  de265_image_release_buffer
176
};
177
178
179
void de265_image::set_image_plane(int cIdx, uint8_t* mem, int stride, void *userdata)
180
41.6k
{
181
41.6k
  pixels[cIdx] = mem;
182
41.6k
  plane_user_data[cIdx] = userdata;
183
184
41.6k
  if (cIdx==0) { this->stride        = stride; }
185
27.7k
  else         { this->chroma_stride = stride; }
186
41.6k
}
187
188
189
15.2k
de265_image::de265_image() = default;
190
191
192
de265_error de265_image::alloc_image(int w,int h, enum de265_chroma c,
193
                                     std::shared_ptr<const seq_parameter_set> sps, bool allocMetadata,
194
                                     decoder_context* dctx,
195
                                     //encoder_context* ectx,
196
                                     de265_PTS pts, void* user_data,
197
                                     bool useCustomAllocFunc)
198
13.8k
{
199
  //if (allocMetadata) { assert(sps); }
200
13.8k
  if (allocMetadata) { assert(sps); }
201
202
13.8k
  if (sps) { this->sps = sps; }
203
204
13.8k
  release(); /* TODO: review code for efficient allocation when arrays are already
205
                allocated to the requested size. Without the release, the old image-data
206
                will not be freed. */
207
208
13.8k
  static std::atomic<uint32_t> s_next_image_ID(0);
209
13.8k
  ID = s_next_image_ID++;
210
13.8k
  removed_at_picture_id = std::numeric_limits<uint32_t>::max();
211
212
13.8k
  decctx = dctx;
213
  //encctx = ectx;
214
215
  // --- allocate image buffer ---
216
217
13.8k
  chroma_format= c;
218
219
13.8k
  width = w;
220
13.8k
  height = h;
221
13.8k
  chroma_width = w;
222
13.8k
  chroma_height= h;
223
224
13.8k
  this->user_data = user_data;
225
13.8k
  this->pts = pts;
226
227
13.8k
  de265_image_spec spec;
228
229
13.8k
  uint8_t WinUnitX, WinUnitY;
230
231
13.8k
  switch (chroma_format) {
232
2.81k
    case de265_chroma_mono: WinUnitX=1; WinUnitY=1; break;
233
336
    case de265_chroma_420:  WinUnitX=2; WinUnitY=2; break;
234
262
    case de265_chroma_422:  WinUnitX=2; WinUnitY=1; break;
235
10.4k
    case de265_chroma_444:  WinUnitX=1; WinUnitY=1; break;
236
0
    default:
237
0
      assert(0);
238
0
      WinUnitX = WinUnitY = 0;
239
13.8k
  }
240
241
13.8k
  switch (chroma_format) {
242
336
  case de265_chroma_420:
243
336
    spec.format = de265_image_format_YUV420P8;
244
336
    chroma_width  = (chroma_width +1)/2;
245
336
    chroma_height = (chroma_height+1)/2;
246
336
    SubWidthC  = 2;
247
336
    SubHeightC = 2;
248
336
    break;
249
250
262
  case de265_chroma_422:
251
262
    spec.format = de265_image_format_YUV422P8;
252
262
    chroma_width = (chroma_width+1)/2;
253
262
    SubWidthC  = 2;
254
262
    SubHeightC = 1;
255
262
    break;
256
257
10.4k
  case de265_chroma_444:
258
10.4k
    spec.format = de265_image_format_YUV444P8;
259
10.4k
    SubWidthC  = 1;
260
10.4k
    SubHeightC = 1;
261
10.4k
    break;
262
263
2.81k
  case de265_chroma_mono:
264
2.81k
    spec.format = de265_image_format_mono8;
265
2.81k
    chroma_width = 0;
266
2.81k
    chroma_height= 0;
267
2.81k
    SubWidthC  = 1;
268
2.81k
    SubHeightC = 1;
269
2.81k
    break;
270
271
0
  default:
272
0
    assert(false);
273
0
    break;
274
13.8k
  }
275
276
13.8k
  if (chroma_format != de265_chroma_mono && sps) {
277
11.0k
    assert(sps->SubWidthC  == SubWidthC);
278
11.0k
    assert(sps->SubHeightC == SubHeightC);
279
11.0k
  }
280
281
13.8k
  spec.width  = w;
282
13.8k
  spec.height = h;
283
13.8k
  spec.alignment = STANDARD_ALIGNMENT;
284
285
286
  // conformance window cropping
287
288
13.8k
  int left   = sps ? sps->conf_win_left_offset : 0;
289
13.8k
  int right  = sps ? sps->conf_win_right_offset : 0;
290
13.8k
  int top    = sps ? sps->conf_win_top_offset : 0;
291
13.8k
  int bottom = sps ? sps->conf_win_bottom_offset : 0;
292
293
13.8k
  if ((left+right)*WinUnitX >= width) {
294
0
    return DE265_ERROR_CODED_PARAMETER_OUT_OF_RANGE;
295
0
  }
296
297
13.8k
  if ((top+bottom)*WinUnitY >= height) {
298
0
    return DE265_ERROR_CODED_PARAMETER_OUT_OF_RANGE;
299
0
  }
300
301
13.8k
  width_confwin = width - (left+right)*WinUnitX;
302
13.8k
  height_confwin= height- (top+bottom)*WinUnitY;
303
13.8k
  chroma_width_confwin = chroma_width -left-right;
304
13.8k
  chroma_height_confwin= chroma_height-top-bottom;
305
306
13.8k
  spec.crop_left  = left *WinUnitX;
307
13.8k
  spec.crop_right = right*WinUnitX;
308
13.8k
  spec.crop_top   = top   *WinUnitY;
309
13.8k
  spec.crop_bottom= bottom*WinUnitY;
310
311
13.8k
  spec.visible_width = width_confwin;
312
13.8k
  spec.visible_height= height_confwin;
313
314
315
13.8k
  BitDepth_Y = (sps==nullptr) ? 8 : sps->BitDepth_Y;
316
13.8k
  BitDepth_C = (sps==nullptr) ? 8 : sps->BitDepth_C;
317
318
13.8k
  bpp_shift[0] = (BitDepth_Y <= 8) ? 0 : 1;
319
13.8k
  bpp_shift[1] = (BitDepth_C <= 8) ? 0 : 1;
320
13.8k
  bpp_shift[2] = bpp_shift[1];
321
322
323
  // allocate memory and set conformance window pointers
324
325
13.8k
  void* alloc_userdata = nullptr;
326
13.8k
  if (decctx) alloc_userdata = decctx->param_image_allocation_userdata;
327
  // if (encctx) alloc_userdata = encctx->param_image_allocation_userdata; // actually not needed
328
329
  /*
330
  if (encctx && useCustomAllocFunc) {
331
    encoder_image_release_func = encctx->release_func;
332
333
    // if we do not provide a release function, use our own
334
335
    if (encoder_image_release_func == nullptr) {
336
      image_allocation_functions = de265_image::default_image_allocation;
337
    }
338
    else {
339
      image_allocation_functions.get_buffer     = nullptr;
340
      image_allocation_functions.release_buffer = nullptr;
341
    }
342
  }
343
13.8k
  else*/ if (decctx && useCustomAllocFunc) {
344
4.44k
    image_allocation_functions = decctx->param_image_allocation_functions;
345
4.44k
  }
346
9.44k
  else {
347
9.44k
    image_allocation_functions = de265_image::default_image_allocation;
348
9.44k
  }
349
350
13.8k
  bool mem_alloc_success = true;
351
352
13.8k
  if (image_allocation_functions.get_buffer != nullptr) {
353
13.8k
    mem_alloc_success = image_allocation_functions.get_buffer(decctx, &spec, this,
354
13.8k
                                                              alloc_userdata);
355
356
13.8k
    pixels_confwin[0] = pixels[0] + left*WinUnitX + top*WinUnitY*stride;
357
358
13.8k
    if (chroma_format != de265_chroma_mono) {
359
11.0k
      pixels_confwin[1] = pixels[1] + left + top*chroma_stride;
360
11.0k
      pixels_confwin[2] = pixels[2] + left + top*chroma_stride;
361
11.0k
    }
362
2.81k
    else {
363
2.81k
      pixels_confwin[1] = nullptr;
364
2.81k
      pixels_confwin[2] = nullptr;
365
2.81k
    }
366
367
    // check for memory shortage
368
369
13.8k
    if (!mem_alloc_success)
370
0
      {
371
0
        return DE265_ERROR_OUT_OF_MEMORY;
372
0
      }
373
13.8k
  }
374
375
  //alloc_functions = *allocfunc;
376
  //alloc_userdata  = userdata;
377
378
  // --- allocate decoding info arrays ---
379
380
13.8k
  if (allocMetadata) {
381
    // intra pred mode
382
383
10.3k
    mem_alloc_success &= intraPredMode.alloc(sps->PicWidthInMinPUs, sps->PicHeightInMinPUs,
384
10.3k
                                             sps->Log2MinPUSize);
385
386
10.3k
    mem_alloc_success &= intraPredModeC.alloc(sps->PicWidthInMinPUs, sps->PicHeightInMinPUs,
387
10.3k
                                              sps->Log2MinPUSize);
388
389
    // cb info
390
391
10.3k
    mem_alloc_success &= cb_info.alloc(sps->PicWidthInMinCbsY, sps->PicHeightInMinCbsY,
392
10.3k
                                       sps->Log2MinCbSizeY);
393
394
    // pb info
395
396
10.3k
    int puWidth  = sps->PicWidthInMinCbsY  << (sps->Log2MinCbSizeY -2);
397
10.3k
    int puHeight = sps->PicHeightInMinCbsY << (sps->Log2MinCbSizeY -2);
398
399
10.3k
    mem_alloc_success &= pb_info.alloc(puWidth,puHeight, 2);
400
401
402
    // tu info
403
404
10.3k
    mem_alloc_success &= tu_info.alloc(sps->PicWidthInTbsY, sps->PicHeightInTbsY,
405
10.3k
                                       sps->Log2MinTrafoSize);
406
407
    // deblk info
408
409
10.3k
    int deblk_w = (sps->pic_width_in_luma_samples +3)/4;
410
10.3k
    int deblk_h = (sps->pic_height_in_luma_samples+3)/4;
411
412
10.3k
    mem_alloc_success &= deblk_info.alloc(deblk_w, deblk_h, 2);
413
414
    // CTB info
415
416
10.3k
    if (ctb_info.width_in_units  != sps->PicWidthInCtbsY  ||
417
31
        ctb_info.height_in_units != sps->PicHeightInCtbsY ||
418
31
        ctb_info.log2unitSize    != sps->Log2CtbSizeY)
419
10.2k
      {
420
10.2k
        delete[] ctb_progress;
421
422
10.2k
        mem_alloc_success &= ctb_info.alloc(sps->PicWidthInCtbsY, sps->PicHeightInCtbsY,
423
10.2k
                                            sps->Log2CtbSizeY);
424
425
10.2k
        ctb_progress = new de265_progress_lock[ ctb_info.data_size ];
426
10.2k
      }
427
428
429
    // check for memory shortage
430
431
10.3k
    if (!mem_alloc_success)
432
0
      {
433
0
        return DE265_ERROR_OUT_OF_MEMORY;
434
0
      }
435
10.3k
  }
436
437
13.8k
  return DE265_OK;
438
13.8k
}
439
440
441
de265_image::~de265_image()
442
15.2k
{
443
15.2k
  release();
444
445
  // free progress locks
446
447
15.2k
  if (ctb_progress) {
448
10.2k
    delete[] ctb_progress;
449
10.2k
  }
450
15.2k
}
451
452
453
void de265_image::release()
454
29.1k
{
455
  // free image memory
456
457
29.1k
  if (pixels[0])
458
13.8k
    {
459
      /*
460
      if (encoder_image_release_func != nullptr) {
461
        encoder_image_release_func(encctx, this,
462
                                   encctx->param_image_allocation_userdata);
463
      }
464
13.8k
      else*/ {
465
13.8k
        image_allocation_functions.release_buffer(decctx, this,
466
13.8k
                                                  decctx ?
467
13.8k
                                                  decctx->param_image_allocation_userdata :
468
13.8k
                                                  nullptr);
469
13.8k
      }
470
471
55.5k
      for (int i=0;i<3;i++)
472
41.6k
        {
473
41.6k
          pixels[i] = nullptr;
474
41.6k
          pixels_confwin[i] = nullptr;
475
41.6k
        }
476
13.8k
    }
477
478
  // free slices
479
480
34.9k
  for (size_t i=0;i<slices.size();i++) {
481
5.77k
    delete slices[i];
482
5.77k
  }
483
29.1k
  slices.clear();
484
29.1k
}
485
486
487
void de265_image::fill_plane(int channel, int value)
488
48.5k
{
489
48.5k
  int bytes_per_pixel = get_bytes_per_pixel(channel);
490
48.5k
  assert(value >= 0); // needed for the shift operation in the check below
491
492
48.5k
  if (bytes_per_pixel == 1) {
493
33.6k
    if (channel==0) {
494
13.1k
      memset(pixels[channel], value, stride * height);
495
13.1k
    }
496
20.4k
    else {
497
20.4k
      memset(pixels[channel], value, chroma_stride * chroma_height);
498
20.4k
    }
499
33.6k
  }
500
14.9k
  else if ((value >> 8) == (value & 0xFF)) {
501
11.0k
    assert(bytes_per_pixel == 2);
502
503
    // if we fill the same byte value to all bytes, we can still use memset()
504
11.0k
    if (channel==0) {
505
4.43k
      memset(pixels[channel], 0, stride * height * bytes_per_pixel);
506
4.43k
    }
507
6.59k
    else {
508
6.59k
      memset(pixels[channel], 0, chroma_stride * chroma_height * bytes_per_pixel);
509
6.59k
    }
510
11.0k
  }
511
3.89k
  else {
512
3.89k
    assert(bytes_per_pixel == 2);
513
3.89k
    uint16_t v = value;
514
515
3.89k
    if (channel==0) {
516
      // copy value into first row
517
481k
      for (int x = 0; x < width; x++) {
518
479k
        *reinterpret_cast<uint16_t*>(&pixels[channel][2 * x]) = v;
519
479k
      }
520
521
      // copy first row into remaining rows
522
203k
      for (int y = 1; y < height; y++) {
523
201k
        memcpy(pixels[channel] + y * stride * 2, pixels[channel], chroma_width * 2);
524
201k
      }
525
1.55k
    }
526
2.34k
    else {
527
      // copy value into first row
528
412k
      for (int x = 0; x < chroma_width; x++) {
529
410k
        *reinterpret_cast<uint16_t*>(&pixels[channel][2 * x]) = v;
530
410k
      }
531
532
      // copy first row into remaining rows
533
277k
      for (int y = 1; y < chroma_height; y++) {
534
274k
        memcpy(pixels[channel] + y * chroma_stride * 2, pixels[channel], chroma_width * 2);
535
274k
      }
536
2.34k
    }
537
3.89k
  }
538
48.5k
}
539
540
541
void de265_image::fill_image(int y,int cb,int cr)
542
19.1k
{
543
19.1k
  if (pixels[0]) {
544
19.1k
    fill_plane(0, y);
545
19.1k
  }
546
547
19.1k
  if (pixels[1]) {
548
14.7k
    fill_plane(1, cb);
549
14.7k
  }
550
551
19.1k
  if (pixels[2]) {
552
14.7k
    fill_plane(2, cr);
553
14.7k
  }
554
19.1k
}
555
556
557
de265_error de265_image::copy_image(const de265_image* src)
558
0
{
559
  /* TODO: actually, since we allocate the image only for internal purpose, we
560
     do not have to call the external allocation routines for this. However, then
561
     we have to track for each image how to release it again.
562
     Another option would be to safe the copied data not in an de265_image at all.
563
  */
564
565
0
  de265_error err = alloc_image(src->width, src->height, src->chroma_format, src->sps, false,
566
0
                                src->decctx, /*src->encctx,*/ src->pts, src->user_data, false);
567
0
  if (err != DE265_OK) {
568
0
    return err;
569
0
  }
570
571
0
  copy_lines_from(src, 0, src->height);
572
573
0
  return err;
574
0
}
575
576
577
// end = last line + 1
578
void de265_image::copy_lines_from(const de265_image* src, int first, int end)
579
23.4k
{
580
23.4k
  if (end > src->height) end=src->height;
581
582
23.4k
  assert(first % 2 == 0);
583
23.4k
  assert(end   % 2 == 0);
584
585
23.4k
  int luma_bpp   = (sps->BitDepth_Y+7)/8;
586
23.4k
  int chroma_bpp = (sps->BitDepth_C+7)/8;
587
588
23.4k
  if (src->stride == stride) {
589
23.4k
    memcpy(pixels[0]      + first*stride * luma_bpp,
590
23.4k
           src->pixels[0] + first*src->stride * luma_bpp,
591
23.4k
           (end-first)*stride * luma_bpp);
592
23.4k
  }
593
0
  else {
594
0
    for (int yp=first;yp<end;yp++) {
595
0
      memcpy(pixels[0]+yp*stride * luma_bpp,
596
0
             src->pixels[0]+yp*src->stride * luma_bpp,
597
0
             src->width * luma_bpp);
598
0
    }
599
0
  }
600
601
23.4k
  int first_chroma = first / src->SubHeightC;
602
23.4k
  int end_chroma   = end   / src->SubHeightC;
603
604
23.4k
  if (src->chroma_format != de265_chroma_mono) {
605
11.1k
    if (src->chroma_stride == chroma_stride) {
606
11.1k
      memcpy(pixels[1]      + first_chroma*chroma_stride * chroma_bpp,
607
11.1k
             src->pixels[1] + first_chroma*chroma_stride * chroma_bpp,
608
11.1k
             (end_chroma-first_chroma) * chroma_stride * chroma_bpp);
609
11.1k
      memcpy(pixels[2]      + first_chroma*chroma_stride * chroma_bpp,
610
11.1k
             src->pixels[2] + first_chroma*chroma_stride * chroma_bpp,
611
11.1k
             (end_chroma-first_chroma) * chroma_stride * chroma_bpp);
612
11.1k
    }
613
0
    else {
614
0
      for (int y=first_chroma;y<end_chroma;y++) {
615
0
        memcpy(pixels[1]+y*chroma_stride * chroma_bpp,
616
0
               src->pixels[1]+y*src->chroma_stride * chroma_bpp,
617
0
               src->chroma_width * chroma_bpp);
618
0
        memcpy(pixels[2]+y*chroma_stride * chroma_bpp,
619
0
               src->pixels[2]+y*src->chroma_stride * chroma_bpp,
620
0
               src->chroma_width * chroma_bpp);
621
0
      }
622
0
    }
623
11.1k
  }
624
23.4k
}
625
626
627
void de265_image::exchange_pixel_data_with(de265_image& b)
628
3.57k
{
629
14.3k
  for (int i=0;i<3;i++) {
630
10.7k
    std::swap(pixels[i], b.pixels[i]);
631
10.7k
    std::swap(pixels_confwin[i], b.pixels_confwin[i]);
632
10.7k
    std::swap(plane_user_data[i], b.plane_user_data[i]);
633
10.7k
  }
634
635
3.57k
  std::swap(stride, b.stride);
636
3.57k
  std::swap(chroma_stride, b.chroma_stride);
637
3.57k
  std::swap(image_allocation_functions, b.image_allocation_functions);
638
3.57k
}
639
640
641
void de265_image::thread_start(int nThreads)
642
13.4k
{
643
13.4k
  std::unique_lock<std::mutex> lock(mutex);
644
645
  //printf("nThreads before: %d %d\n",nThreadsQueued, nThreadsTotal);
646
647
13.4k
  nThreadsQueued += nThreads;
648
13.4k
  nThreadsTotal += nThreads;
649
650
  //printf("nThreads after: %d %d\n",nThreadsQueued, nThreadsTotal);
651
13.4k
}
652
653
void de265_image::thread_run(const thread_task* task)
654
82.0k
{
655
82.0k
  std::unique_lock<std::mutex> lock(mutex);
656
657
  //printf("run thread %s\n", task->name().c_str());
658
659
82.0k
  nThreadsQueued--;
660
82.0k
  nThreadsRunning++;
661
82.0k
}
662
663
void de265_image::thread_blocks()
664
0
{
665
0
  std::unique_lock<std::mutex> lock(mutex);
666
667
0
  nThreadsRunning--;
668
0
  nThreadsBlocked++;
669
0
}
670
671
void de265_image::thread_unblocks()
672
0
{
673
0
  std::unique_lock<std::mutex> lock(mutex);
674
675
0
  nThreadsBlocked--;
676
0
  nThreadsRunning++;
677
0
}
678
679
void de265_image::thread_finishes(const thread_task* task)
680
82.0k
{
681
  //printf("finish thread %s\n", task->name().c_str());
682
683
82.0k
  std::unique_lock<std::mutex> lock(mutex);
684
685
82.0k
  nThreadsRunning--;
686
82.0k
  nThreadsFinished++;
687
82.0k
  assert(nThreadsRunning >= 0);
688
689
82.0k
  if (nThreadsFinished==nThreadsTotal) {
690
5.70k
    finished_cond.notify_all();
691
5.70k
  }
692
82.0k
}
693
694
void de265_image::wait_for_progress(thread_task* task, int ctbx,int ctby, int progress)
695
187k
{
696
187k
  const int ctbW = sps->PicWidthInCtbsY;
697
698
187k
  wait_for_progress(task, ctbx + ctbW*ctby, progress);
699
187k
}
700
701
void de265_image::wait_for_progress(thread_task* task, int ctbAddrRS, int progress)
702
187k
{
703
187k
  if (task==nullptr) { return; }
704
705
187k
  de265_progress_lock* progresslock = &ctb_progress[ctbAddrRS];
706
187k
  if (progresslock->get_progress() < progress) {
707
0
    thread_blocks();
708
709
0
    assert(task!=nullptr);
710
0
    task->state = thread_task::Blocked;
711
712
    /* TODO: check whether we are the first blocked task in the list.
713
       If we are, we have to conceal input errors.
714
       Simplest concealment: do not block.
715
    */
716
717
0
    progresslock->wait_for_progress(progress);
718
0
    task->state = thread_task::Running;
719
0
    thread_unblocks();
720
0
  }
721
187k
}
722
723
724
void de265_image::wait_for_completion()
725
9.31k
{
726
9.31k
  std::unique_lock<std::mutex> lock(mutex);
727
728
14.9k
  while (nThreadsFinished!=nThreadsTotal) {
729
5.67k
    finished_cond.wait(lock);
730
5.67k
  }
731
9.31k
}
732
733
bool de265_image::debug_is_completed() const
734
0
{
735
0
  return nThreadsFinished==nThreadsTotal;
736
0
}
737
738
739
740
void de265_image::clear_metadata()
741
5.06k
{
742
  // TODO: maybe we could avoid the memset by ensuring that all data is written to
743
  // during decoding (especially log2CbSize), but it is unlikely to be faster than the memset.
744
745
5.06k
  cb_info.clear();
746
5.06k
  intraPredMode.clear();
747
  //tu_info.clear();  // done on the fly
748
5.06k
  ctb_info.clear();
749
5.06k
  deblk_info.clear();
750
751
  // --- reset CTB progresses ---
752
753
315k
  for (int i=0;i<ctb_info.data_size;i++) {
754
310k
    ctb_progress[i].reset(CTB_PROGRESS_NONE);
755
310k
  }
756
5.06k
}
757
758
759
void de265_image::set_mv_info(int x,int y, int nPbW,int nPbH, const PBMotion& mv)
760
427k
{
761
427k
  int log2PuSize = 2;
762
763
427k
  int xPu = x >> log2PuSize;
764
427k
  int yPu = y >> log2PuSize;
765
427k
  int wPu = nPbW >> log2PuSize;
766
427k
  int hPu = nPbH >> log2PuSize;
767
768
427k
  int stride = pb_info.width_in_units;
769
770
1.36M
  for (int pby=0;pby<hPu;pby++)
771
3.48M
    for (int pbx=0;pbx<wPu;pbx++)
772
2.55M
      {
773
2.55M
        pb_info[ xPu+pbx + (yPu+pby)*stride ] = mv;
774
2.55M
      }
775
427k
}
776
777
778
bool de265_image::available_zscan(int xCurr,int yCurr, int xN,int yN) const
779
3.53M
{
780
3.53M
  if (xN<0 || yN<0) return false;
781
2.85M
  if (xN>=sps->pic_width_in_luma_samples ||
782
2.83M
      yN>=sps->pic_height_in_luma_samples) return false;
783
784
2.80M
  int minBlockAddrN = pps->MinTbAddrZS[ (xN>>sps->Log2MinTrafoSize) +
785
2.80M
                                        (yN>>sps->Log2MinTrafoSize) * sps->PicWidthInTbsY ];
786
2.80M
  int minBlockAddrCurr = pps->MinTbAddrZS[ (xCurr>>sps->Log2MinTrafoSize) +
787
2.80M
                                           (yCurr>>sps->Log2MinTrafoSize) * sps->PicWidthInTbsY ];
788
789
2.80M
  if (minBlockAddrN > minBlockAddrCurr) return false;
790
791
2.61M
  int xCurrCtb = xCurr >> sps->Log2CtbSizeY;
792
2.61M
  int yCurrCtb = yCurr >> sps->Log2CtbSizeY;
793
2.61M
  int xNCtb = xN >> sps->Log2CtbSizeY;
794
2.61M
  int yNCtb = yN >> sps->Log2CtbSizeY;
795
796
2.61M
  if (get_SliceAddrRS(xCurrCtb,yCurrCtb) !=
797
2.61M
      get_SliceAddrRS(xNCtb,   yNCtb)) {
798
24.0k
    return false;
799
24.0k
  }
800
801
2.59M
  if (pps->TileIdRS[xCurrCtb + yCurrCtb*sps->PicWidthInCtbsY] !=
802
2.59M
      pps->TileIdRS[xNCtb    + yNCtb   *sps->PicWidthInCtbsY]) {
803
2.00k
    return false;
804
2.00k
  }
805
806
2.59M
  return true;
807
2.59M
}
808
809
810
bool de265_image::available_pred_blk(int xC,int yC, int nCbS, int xP, int yP,
811
                                     int nPbW, int nPbH, int partIdx, int xN,int yN) const
812
1.41M
{
813
1.41M
  logtrace(LogMotion,"C:%d;%d P:%d;%d N:%d;%d size=%d;%d\n",xC,yC,xP,yP,xN,yN,nPbW,nPbH);
814
815
1.41M
  int sameCb = (xC <= xN && xN < xC+nCbS &&
816
460k
                yC <= yN && yN < yC+nCbS);
817
818
1.41M
  bool availableN;
819
820
1.41M
  if (!sameCb) {
821
1.34M
    availableN = available_zscan(xP,yP,xN,yN);
822
1.34M
  }
823
68.3k
  else {
824
68.3k
    availableN = !(nPbW<<1 == nCbS && nPbH<<1 == nCbS &&  // NxN
825
3.22k
                   partIdx==1 &&
826
956
                   yN >= yC+nPbH && xN < xC+nPbW);  // xN/yN inside partIdx 2
827
68.3k
  }
828
829
1.41M
  if (availableN && get_pred_mode(xN,yN) == MODE_INTRA) {
830
31.9k
    availableN = false;
831
31.9k
  }
832
833
1.41M
  return availableN;
834
1.41M
}