/work/libde265/libde265/image.cc
Line | Count | Source |
1 | | /* |
2 | | * H.265 video codec. |
3 | | * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de> |
4 | | * |
5 | | * This file is part of libde265. |
6 | | * |
7 | | * libde265 is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as |
9 | | * published by the Free Software Foundation, either version 3 of |
10 | | * the License, or (at your option) any later version. |
11 | | * |
12 | | * libde265 is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libde265. If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "image.h" |
22 | | #include "decctx.h" |
23 | | |
24 | | #include <atomic> |
25 | | |
26 | | #include <stdlib.h> |
27 | | #include <string.h> |
28 | | #include <assert.h> |
29 | | |
30 | | #include <limits> |
31 | | |
32 | | |
33 | | #ifdef HAVE_MALLOC_H |
34 | | #include <malloc.h> |
35 | | #endif |
36 | | |
37 | | #ifdef HAVE_SSE4_1 |
38 | | // SSE code processes 128bit per iteration and thus might read more data |
39 | | // than is later actually used. |
40 | 0 | #define MEMORY_PADDING 16 |
41 | | #else |
42 | | #define MEMORY_PADDING 0 |
43 | | #endif |
44 | | |
45 | 0 | #define STANDARD_ALIGNMENT 16 |
46 | | |
47 | | #if defined(__MINGW32__) |
48 | | #define ALLOC_ALIGNED(alignment, size) __mingw_aligned_malloc((size), (alignment)) |
49 | | #define FREE_ALIGNED(mem) __mingw_aligned_free((mem)) |
50 | | #elif defined(_MSC_VER) |
51 | | #define ALLOC_ALIGNED(alignment, size) _aligned_malloc((size), (alignment)) |
52 | | #define FREE_ALIGNED(mem) _aligned_free((mem)) |
53 | | #elif defined(HAVE_POSIX_MEMALIGN) |
54 | 0 | static inline void *ALLOC_ALIGNED(size_t alignment, size_t size) { |
55 | 0 | void *mem = nullptr; |
56 | 0 | if (posix_memalign(&mem, alignment, size) != 0) { |
57 | 0 | return nullptr; |
58 | 0 | } |
59 | 0 | return mem; |
60 | 0 | }; |
61 | 0 | #define FREE_ALIGNED(mem) free((mem)) |
62 | | #else |
63 | | #define ALLOC_ALIGNED(alignment, size) memalign((alignment), (size)) |
64 | | #define FREE_ALIGNED(mem) free((mem)) |
65 | | #endif |
66 | | |
67 | 0 | #define ALLOC_ALIGNED_16(size) ALLOC_ALIGNED(16, size) |
68 | | |
69 | | LIBDE265_API void* de265_alloc_image_plane(struct de265_image* img, int cIdx, |
70 | | void* inputdata, int inputstride, void *userdata) |
71 | 0 | { |
72 | 0 | int alignment = STANDARD_ALIGNMENT; |
73 | 0 | uint32_t stride = (img->get_width(cIdx) + alignment-1) / alignment * alignment; |
74 | 0 | uint32_t height = img->get_height(cIdx); |
75 | | |
76 | | // size computed in size_t: stride*height can exceed UINT32_MAX for large planes |
77 | 0 | uint8_t* p = static_cast<uint8_t*>(ALLOC_ALIGNED_16(static_cast<size_t>(stride) * height + MEMORY_PADDING)); |
78 | |
|
79 | 0 | if (p==nullptr) { return nullptr; } |
80 | | |
81 | 0 | img->set_image_plane(cIdx, p, stride, userdata); |
82 | | |
83 | | // copy input data if provided |
84 | |
|
85 | 0 | if (inputdata != nullptr) { |
86 | 0 | if (inputstride == static_cast<int>(stride)) { |
87 | 0 | memcpy(p, inputdata, static_cast<size_t>(stride) * height); |
88 | 0 | } |
89 | 0 | else { |
90 | 0 | for (uint32_t y=0;y<height;y++) { |
91 | 0 | memcpy(p + static_cast<size_t>(y) * stride, |
92 | 0 | static_cast<char*>(inputdata) + static_cast<size_t>(inputstride) * y, |
93 | 0 | inputstride); |
94 | 0 | } |
95 | 0 | } |
96 | 0 | } |
97 | |
|
98 | 0 | return p; |
99 | 0 | } |
100 | | |
101 | | |
102 | | LIBDE265_API void de265_free_image_plane(struct de265_image* img, int cIdx) |
103 | 0 | { |
104 | 0 | uint8_t* p = img->get_image_plane(cIdx); |
105 | 0 | assert(p); |
106 | 0 | FREE_ALIGNED(p); |
107 | 0 | } |
108 | | |
109 | | |
110 | | static int de265_image_get_buffer(de265_decoder_context* ctx, |
111 | | de265_image_spec* spec, de265_image* img, void* userdata) |
112 | 0 | { |
113 | 0 | const uint32_t rawChromaWidth = spec->width / img->SubWidthC; |
114 | 0 | const uint32_t rawChromaHeight = spec->height / img->SubHeightC; |
115 | |
|
116 | 0 | uint32_t luma_stride = (spec->width + spec->alignment-1) / spec->alignment * spec->alignment; |
117 | 0 | uint32_t chroma_stride = (rawChromaWidth + spec->alignment-1) / spec->alignment * spec->alignment; |
118 | |
|
119 | 0 | assert(img->BitDepth_Y >= 8 && img->BitDepth_Y <= 16); |
120 | 0 | assert(img->BitDepth_C >= 8 && img->BitDepth_C <= 16); |
121 | | |
122 | 0 | uint32_t luma_bpl = luma_stride * ((img->BitDepth_Y+7)/8); |
123 | 0 | uint32_t chroma_bpl = chroma_stride * ((img->BitDepth_C+7)/8); |
124 | |
|
125 | 0 | uint32_t luma_height = spec->height; |
126 | 0 | uint32_t chroma_height = rawChromaHeight; |
127 | |
|
128 | 0 | bool alloc_failed = false; |
129 | | |
130 | | // Compute the plane sizes in size_t. Each operand fits in uint32_t, but the |
131 | | // height * bytes-per-line product can exceed UINT32_MAX for large frames, so |
132 | | // the multiplication must be done in 64 bits. Computing it in 32 bits wraps |
133 | | // the allocation size to a small value while fill_image() later writes the |
134 | | // real (size_t) size -> heap buffer overflow (GHSA-vv8h-932h-7r86). |
135 | 0 | uint8_t* p[3] = { nullptr,nullptr,nullptr }; |
136 | 0 | p[0] = static_cast<uint8_t*>(ALLOC_ALIGNED_16(static_cast<size_t>(luma_height) * luma_bpl + MEMORY_PADDING)); |
137 | 0 | if (p[0]==nullptr) { alloc_failed=true; } |
138 | |
|
139 | 0 | if (img->get_chroma_format() != de265_chroma_mono) { |
140 | 0 | p[1] = static_cast<uint8_t*>(ALLOC_ALIGNED_16(static_cast<size_t>(chroma_height) * chroma_bpl + MEMORY_PADDING)); |
141 | 0 | p[2] = static_cast<uint8_t*>(ALLOC_ALIGNED_16(static_cast<size_t>(chroma_height) * chroma_bpl + MEMORY_PADDING)); |
142 | |
|
143 | 0 | if (p[1]==nullptr || p[2]==nullptr) { alloc_failed=true; } |
144 | 0 | } |
145 | 0 | else { |
146 | 0 | p[1] = nullptr; |
147 | 0 | p[2] = nullptr; |
148 | 0 | chroma_stride = 0; |
149 | 0 | } |
150 | |
|
151 | 0 | if (alloc_failed) { |
152 | 0 | for (int i=0;i<3;i++) |
153 | 0 | if (p[i]) { |
154 | 0 | FREE_ALIGNED(p[i]); |
155 | 0 | } |
156 | |
|
157 | 0 | return 0; |
158 | 0 | } |
159 | | |
160 | 0 | img->set_image_plane(0, p[0], luma_stride, nullptr); |
161 | 0 | img->set_image_plane(1, p[1], chroma_stride, nullptr); |
162 | 0 | img->set_image_plane(2, p[2], chroma_stride, nullptr); |
163 | |
|
164 | 0 | img->fill_image(0,0,0); |
165 | |
|
166 | 0 | return 1; |
167 | 0 | } |
168 | | |
169 | | static void de265_image_release_buffer(de265_decoder_context* ctx, |
170 | | de265_image* img, void* userdata) |
171 | 0 | { |
172 | 0 | for (int i=0;i<3;i++) { |
173 | 0 | uint8_t* p = img->get_image_plane(i); |
174 | 0 | if (p) { |
175 | 0 | FREE_ALIGNED(p); |
176 | 0 | } |
177 | 0 | } |
178 | 0 | } |
179 | | |
180 | | |
181 | | de265_image_allocation de265_image::default_image_allocation = { |
182 | | de265_image_get_buffer, |
183 | | de265_image_release_buffer |
184 | | }; |
185 | | |
186 | | |
187 | | void de265_image::set_image_plane(int cIdx, uint8_t* mem, ptrdiff_t stride, void *userdata) |
188 | 0 | { |
189 | 0 | pixels[cIdx] = mem; |
190 | 0 | plane_user_data[cIdx] = userdata; |
191 | |
|
192 | 0 | if (cIdx==0) { this->stride = stride; } |
193 | 0 | else { this->chroma_stride = stride; } |
194 | 0 | } |
195 | | |
196 | | |
197 | 0 | de265_image::de265_image() = default; |
198 | | |
199 | | |
200 | | de265_error de265_image::alloc_image(int w,int h, enum de265_chroma c, |
201 | | std::shared_ptr<const seq_parameter_set> sps, bool allocMetadata, |
202 | | decoder_context* dctx, |
203 | | //encoder_context* ectx, |
204 | | de265_PTS pts, void* user_data, |
205 | | bool useCustomAllocFunc) |
206 | 0 | { |
207 | | //if (allocMetadata) { assert(sps); } |
208 | 0 | if (allocMetadata) { assert(sps); } |
209 | | |
210 | 0 | if (sps) { this->sps = sps; } |
211 | |
|
212 | 0 | release(); /* TODO: review code for efficient allocation when arrays are already |
213 | | allocated to the requested size. Without the release, the old image-data |
214 | | will not be freed. */ |
215 | |
|
216 | 0 | static std::atomic<uint32_t> s_next_image_ID(0); |
217 | 0 | ID = s_next_image_ID++; |
218 | 0 | removed_at_picture_id = std::numeric_limits<uint32_t>::max(); |
219 | |
|
220 | 0 | decctx = dctx; |
221 | | //encctx = ectx; |
222 | | |
223 | | // --- allocate image buffer --- |
224 | |
|
225 | 0 | chroma_format= c; |
226 | |
|
227 | 0 | width = w; |
228 | 0 | height = h; |
229 | 0 | chroma_width = w; |
230 | 0 | chroma_height= h; |
231 | |
|
232 | 0 | this->user_data = user_data; |
233 | 0 | this->pts = pts; |
234 | |
|
235 | 0 | de265_image_spec spec; |
236 | |
|
237 | 0 | uint8_t WinUnitX, WinUnitY; |
238 | |
|
239 | 0 | switch (chroma_format) { |
240 | 0 | case de265_chroma_mono: WinUnitX=1; WinUnitY=1; break; |
241 | 0 | case de265_chroma_420: WinUnitX=2; WinUnitY=2; break; |
242 | 0 | case de265_chroma_422: WinUnitX=2; WinUnitY=1; break; |
243 | 0 | case de265_chroma_444: WinUnitX=1; WinUnitY=1; break; |
244 | 0 | default: |
245 | 0 | assert(0); |
246 | 0 | WinUnitX = WinUnitY = 0; |
247 | 0 | } |
248 | | |
249 | 0 | switch (chroma_format) { |
250 | 0 | case de265_chroma_420: |
251 | 0 | spec.format = de265_image_format_YUV420P8; |
252 | 0 | chroma_width = (chroma_width +1)/2; |
253 | 0 | chroma_height = (chroma_height+1)/2; |
254 | 0 | SubWidthC = 2; |
255 | 0 | SubHeightC = 2; |
256 | 0 | break; |
257 | | |
258 | 0 | case de265_chroma_422: |
259 | 0 | spec.format = de265_image_format_YUV422P8; |
260 | 0 | chroma_width = (chroma_width+1)/2; |
261 | 0 | SubWidthC = 2; |
262 | 0 | SubHeightC = 1; |
263 | 0 | break; |
264 | | |
265 | 0 | case de265_chroma_444: |
266 | 0 | spec.format = de265_image_format_YUV444P8; |
267 | 0 | SubWidthC = 1; |
268 | 0 | SubHeightC = 1; |
269 | 0 | break; |
270 | | |
271 | 0 | case de265_chroma_mono: |
272 | 0 | spec.format = de265_image_format_mono8; |
273 | 0 | chroma_width = 0; |
274 | 0 | chroma_height= 0; |
275 | 0 | SubWidthC = 1; |
276 | 0 | SubHeightC = 1; |
277 | 0 | break; |
278 | | |
279 | 0 | default: |
280 | 0 | assert(false); |
281 | 0 | break; |
282 | 0 | } |
283 | | |
284 | 0 | if (chroma_format != de265_chroma_mono && sps) { |
285 | 0 | assert(sps->SubWidthC == SubWidthC); |
286 | 0 | assert(sps->SubHeightC == SubHeightC); |
287 | 0 | } |
288 | | |
289 | 0 | spec.width = w; |
290 | 0 | spec.height = h; |
291 | 0 | spec.alignment = STANDARD_ALIGNMENT; |
292 | | |
293 | | |
294 | | // conformance window cropping |
295 | |
|
296 | 0 | int left = sps ? sps->conf_win_left_offset : 0; |
297 | 0 | int right = sps ? sps->conf_win_right_offset : 0; |
298 | 0 | int top = sps ? sps->conf_win_top_offset : 0; |
299 | 0 | int bottom = sps ? sps->conf_win_bottom_offset : 0; |
300 | |
|
301 | 0 | if ((left+right)*WinUnitX >= width) { |
302 | 0 | return DE265_ERROR_CODED_PARAMETER_OUT_OF_RANGE; |
303 | 0 | } |
304 | | |
305 | 0 | if ((top+bottom)*WinUnitY >= height) { |
306 | 0 | return DE265_ERROR_CODED_PARAMETER_OUT_OF_RANGE; |
307 | 0 | } |
308 | | |
309 | 0 | width_confwin = width - (left+right)*WinUnitX; |
310 | 0 | height_confwin= height- (top+bottom)*WinUnitY; |
311 | 0 | chroma_width_confwin = chroma_width -left-right; |
312 | 0 | chroma_height_confwin= chroma_height-top-bottom; |
313 | |
|
314 | 0 | spec.crop_left = left *WinUnitX; |
315 | 0 | spec.crop_right = right*WinUnitX; |
316 | 0 | spec.crop_top = top *WinUnitY; |
317 | 0 | spec.crop_bottom= bottom*WinUnitY; |
318 | |
|
319 | 0 | spec.visible_width = width_confwin; |
320 | 0 | spec.visible_height= height_confwin; |
321 | | |
322 | |
|
323 | 0 | BitDepth_Y = (sps==nullptr) ? 8 : sps->BitDepth_Y; |
324 | 0 | BitDepth_C = (sps==nullptr) ? 8 : sps->BitDepth_C; |
325 | |
|
326 | 0 | bpp_shift[0] = (BitDepth_Y <= 8) ? 0 : 1; |
327 | 0 | bpp_shift[1] = (BitDepth_C <= 8) ? 0 : 1; |
328 | 0 | bpp_shift[2] = bpp_shift[1]; |
329 | | |
330 | | |
331 | | // allocate memory and set conformance window pointers |
332 | |
|
333 | 0 | void* alloc_userdata = nullptr; |
334 | 0 | if (decctx) alloc_userdata = decctx->param_image_allocation_userdata; |
335 | | // if (encctx) alloc_userdata = encctx->param_image_allocation_userdata; // actually not needed |
336 | | |
337 | | /* |
338 | | if (encctx && useCustomAllocFunc) { |
339 | | encoder_image_release_func = encctx->release_func; |
340 | | |
341 | | // if we do not provide a release function, use our own |
342 | | |
343 | | if (encoder_image_release_func == nullptr) { |
344 | | image_allocation_functions = de265_image::default_image_allocation; |
345 | | } |
346 | | else { |
347 | | image_allocation_functions.get_buffer = nullptr; |
348 | | image_allocation_functions.release_buffer = nullptr; |
349 | | } |
350 | | } |
351 | 0 | else*/ if (decctx && useCustomAllocFunc) { |
352 | 0 | image_allocation_functions = decctx->param_image_allocation_functions; |
353 | 0 | } |
354 | 0 | else { |
355 | 0 | image_allocation_functions = de265_image::default_image_allocation; |
356 | 0 | } |
357 | |
|
358 | 0 | bool mem_alloc_success = true; |
359 | |
|
360 | 0 | if (image_allocation_functions.get_buffer != nullptr) { |
361 | 0 | mem_alloc_success = image_allocation_functions.get_buffer(decctx, &spec, this, |
362 | 0 | alloc_userdata); |
363 | |
|
364 | 0 | pixels_confwin[0] = pixels[0] + left*WinUnitX + top*WinUnitY*stride; |
365 | |
|
366 | 0 | if (chroma_format != de265_chroma_mono) { |
367 | 0 | pixels_confwin[1] = pixels[1] + left + top*chroma_stride; |
368 | 0 | pixels_confwin[2] = pixels[2] + left + top*chroma_stride; |
369 | 0 | } |
370 | 0 | else { |
371 | 0 | pixels_confwin[1] = nullptr; |
372 | 0 | pixels_confwin[2] = nullptr; |
373 | 0 | } |
374 | | |
375 | | // check for memory shortage |
376 | |
|
377 | 0 | if (!mem_alloc_success) |
378 | 0 | { |
379 | 0 | return DE265_ERROR_OUT_OF_MEMORY; |
380 | 0 | } |
381 | 0 | } |
382 | | |
383 | | //alloc_functions = *allocfunc; |
384 | | //alloc_userdata = userdata; |
385 | | |
386 | | // --- allocate decoding info arrays --- |
387 | | |
388 | 0 | if (allocMetadata) { |
389 | | // intra pred mode |
390 | |
|
391 | 0 | mem_alloc_success &= intraPredMode.alloc(sps->PicWidthInMinPUs, sps->PicHeightInMinPUs, |
392 | 0 | sps->Log2MinPUSize); |
393 | |
|
394 | 0 | mem_alloc_success &= intraPredModeC.alloc(sps->PicWidthInMinPUs, sps->PicHeightInMinPUs, |
395 | 0 | sps->Log2MinPUSize); |
396 | | |
397 | | // cb info |
398 | |
|
399 | 0 | mem_alloc_success &= cb_info.alloc(sps->PicWidthInMinCbsY, sps->PicHeightInMinCbsY, |
400 | 0 | sps->Log2MinCbSizeY); |
401 | | |
402 | | // pb info |
403 | |
|
404 | 0 | int puWidth = sps->PicWidthInMinCbsY << (sps->Log2MinCbSizeY -2); |
405 | 0 | int puHeight = sps->PicHeightInMinCbsY << (sps->Log2MinCbSizeY -2); |
406 | |
|
407 | 0 | mem_alloc_success &= pb_info.alloc(puWidth,puHeight, 2); |
408 | | |
409 | | |
410 | | // tu info |
411 | |
|
412 | 0 | mem_alloc_success &= tu_info.alloc(sps->PicWidthInTbsY, sps->PicHeightInTbsY, |
413 | 0 | sps->Log2MinTrafoSize); |
414 | | |
415 | | // deblk info |
416 | |
|
417 | 0 | int deblk_w = (sps->pic_width_in_luma_samples +3)/4; |
418 | 0 | int deblk_h = (sps->pic_height_in_luma_samples+3)/4; |
419 | |
|
420 | 0 | mem_alloc_success &= deblk_info.alloc(deblk_w, deblk_h, 2); |
421 | | |
422 | | // CTB info |
423 | |
|
424 | 0 | if (ctb_info.width_in_units != sps->PicWidthInCtbsY || |
425 | 0 | ctb_info.height_in_units != sps->PicHeightInCtbsY || |
426 | 0 | ctb_info.log2unitSize != sps->Log2CtbSizeY) |
427 | 0 | { |
428 | 0 | delete[] ctb_progress; |
429 | |
|
430 | 0 | mem_alloc_success &= ctb_info.alloc(sps->PicWidthInCtbsY, sps->PicHeightInCtbsY, |
431 | 0 | sps->Log2CtbSizeY); |
432 | |
|
433 | 0 | ctb_progress = new de265_progress_lock[ ctb_info.data_size ]; |
434 | 0 | } |
435 | | |
436 | | |
437 | | // check for memory shortage |
438 | |
|
439 | 0 | if (!mem_alloc_success) |
440 | 0 | { |
441 | 0 | return DE265_ERROR_OUT_OF_MEMORY; |
442 | 0 | } |
443 | 0 | } |
444 | | |
445 | 0 | return DE265_OK; |
446 | 0 | } |
447 | | |
448 | | |
449 | | de265_image::~de265_image() |
450 | 0 | { |
451 | 0 | release(); |
452 | | |
453 | | // free progress locks |
454 | |
|
455 | 0 | if (ctb_progress) { |
456 | 0 | delete[] ctb_progress; |
457 | 0 | } |
458 | 0 | } |
459 | | |
460 | | |
461 | | void de265_image::release() |
462 | 0 | { |
463 | | // free image memory |
464 | |
|
465 | 0 | if (pixels[0]) |
466 | 0 | { |
467 | | /* |
468 | | if (encoder_image_release_func != nullptr) { |
469 | | encoder_image_release_func(encctx, this, |
470 | | encctx->param_image_allocation_userdata); |
471 | | } |
472 | 0 | else*/ { |
473 | 0 | image_allocation_functions.release_buffer(decctx, this, |
474 | 0 | decctx ? |
475 | 0 | decctx->param_image_allocation_userdata : |
476 | 0 | nullptr); |
477 | 0 | } |
478 | |
|
479 | 0 | for (int i=0;i<3;i++) |
480 | 0 | { |
481 | 0 | pixels[i] = nullptr; |
482 | 0 | pixels_confwin[i] = nullptr; |
483 | 0 | } |
484 | 0 | } |
485 | | |
486 | | // free slices |
487 | |
|
488 | 0 | for (size_t i=0;i<slices.size();i++) { |
489 | 0 | delete slices[i]; |
490 | 0 | } |
491 | 0 | slices.clear(); |
492 | 0 | } |
493 | | |
494 | | |
495 | | void de265_image::fill_plane(int channel, int value) |
496 | 0 | { |
497 | 0 | int bytes_per_pixel = get_bytes_per_pixel(channel); |
498 | 0 | assert(value >= 0); // needed for the shift operation in the check below |
499 | | |
500 | | // Each plane is allocated with MEMORY_PADDING trailing bytes for safe SSE overread; the |
501 | | // memsets below cover that padding too so it never contains uninitialized heap data. |
502 | 0 | const size_t plane_bytes = |
503 | 0 | (channel == 0 ? static_cast<size_t>(stride) * height |
504 | 0 | : static_cast<size_t>(chroma_stride) * chroma_height) |
505 | 0 | * bytes_per_pixel; |
506 | |
|
507 | 0 | if (bytes_per_pixel == 1) { |
508 | 0 | memset(pixels[channel], value, plane_bytes + MEMORY_PADDING); |
509 | 0 | } |
510 | 0 | else if ((value >> 8) == (value & 0xFF)) { |
511 | 0 | assert(bytes_per_pixel == 2); |
512 | | |
513 | | // if we fill the same byte value to all bytes, we can still use memset() |
514 | 0 | memset(pixels[channel], 0, plane_bytes + MEMORY_PADDING); |
515 | 0 | } |
516 | 0 | else { |
517 | 0 | assert(bytes_per_pixel == 2); |
518 | 0 | uint16_t v = value; |
519 | |
|
520 | 0 | if (channel==0) { |
521 | | // copy value into first row |
522 | 0 | for (int x = 0; x < width; x++) { |
523 | 0 | *reinterpret_cast<uint16_t*>(&pixels[channel][2 * x]) = v; |
524 | 0 | } |
525 | | |
526 | | // copy first row into remaining rows |
527 | 0 | for (int y = 1; y < height; y++) { |
528 | 0 | memcpy(pixels[channel] + y * stride * 2, pixels[channel], chroma_width * 2); |
529 | 0 | } |
530 | 0 | } |
531 | 0 | else { |
532 | | // copy value into first row |
533 | 0 | for (int x = 0; x < chroma_width; x++) { |
534 | 0 | *reinterpret_cast<uint16_t*>(&pixels[channel][2 * x]) = v; |
535 | 0 | } |
536 | | |
537 | | // copy first row into remaining rows |
538 | 0 | for (int y = 1; y < chroma_height; y++) { |
539 | 0 | memcpy(pixels[channel] + y * chroma_stride * 2, pixels[channel], chroma_width * 2); |
540 | 0 | } |
541 | 0 | } |
542 | |
|
543 | 0 | #if MEMORY_PADDING > 0 |
544 | 0 | memset(pixels[channel] + plane_bytes, 0, MEMORY_PADDING); |
545 | 0 | #endif |
546 | 0 | } |
547 | 0 | } |
548 | | |
549 | | |
550 | | void de265_image::fill_image(int y,int cb,int cr) |
551 | 0 | { |
552 | 0 | if (pixels[0]) { |
553 | 0 | fill_plane(0, y); |
554 | 0 | } |
555 | |
|
556 | 0 | if (pixels[1]) { |
557 | 0 | fill_plane(1, cb); |
558 | 0 | } |
559 | |
|
560 | 0 | if (pixels[2]) { |
561 | 0 | fill_plane(2, cr); |
562 | 0 | } |
563 | 0 | } |
564 | | |
565 | | |
566 | | de265_error de265_image::copy_image(const de265_image* src) |
567 | 0 | { |
568 | | /* TODO: actually, since we allocate the image only for internal purpose, we |
569 | | do not have to call the external allocation routines for this. However, then |
570 | | we have to track for each image how to release it again. |
571 | | Another option would be to safe the copied data not in an de265_image at all. |
572 | | */ |
573 | |
|
574 | 0 | de265_error err = alloc_image(src->width, src->height, src->chroma_format, src->sps, false, |
575 | 0 | src->decctx, /*src->encctx,*/ src->pts, src->user_data, false); |
576 | 0 | if (err != DE265_OK) { |
577 | 0 | return err; |
578 | 0 | } |
579 | | |
580 | 0 | copy_lines_from(src, 0, src->height); |
581 | |
|
582 | 0 | return err; |
583 | 0 | } |
584 | | |
585 | | |
586 | | // end = last line + 1 |
587 | | void de265_image::copy_lines_from(const de265_image* src, int first, int end) |
588 | 0 | { |
589 | 0 | if (end > src->height) end=src->height; |
590 | |
|
591 | 0 | assert(first % 2 == 0); |
592 | 0 | assert(end % 2 == 0); |
593 | | |
594 | 0 | int luma_bpp = (sps->BitDepth_Y+7)/8; |
595 | 0 | int chroma_bpp = (sps->BitDepth_C+7)/8; |
596 | |
|
597 | 0 | if (src->stride == stride) { |
598 | 0 | memcpy(pixels[0] + first*stride * luma_bpp, |
599 | 0 | src->pixels[0] + first*src->stride * luma_bpp, |
600 | 0 | (end-first)*stride * luma_bpp); |
601 | 0 | } |
602 | 0 | else { |
603 | 0 | for (int yp=first;yp<end;yp++) { |
604 | 0 | memcpy(pixels[0]+yp*stride * luma_bpp, |
605 | 0 | src->pixels[0]+yp*src->stride * luma_bpp, |
606 | 0 | src->width * luma_bpp); |
607 | 0 | } |
608 | 0 | } |
609 | |
|
610 | 0 | int first_chroma = first / src->SubHeightC; |
611 | 0 | int end_chroma = end / src->SubHeightC; |
612 | |
|
613 | 0 | if (src->chroma_format != de265_chroma_mono) { |
614 | 0 | if (src->chroma_stride == chroma_stride) { |
615 | 0 | memcpy(pixels[1] + first_chroma*chroma_stride * chroma_bpp, |
616 | 0 | src->pixels[1] + first_chroma*chroma_stride * chroma_bpp, |
617 | 0 | (end_chroma-first_chroma) * chroma_stride * chroma_bpp); |
618 | 0 | memcpy(pixels[2] + first_chroma*chroma_stride * chroma_bpp, |
619 | 0 | src->pixels[2] + first_chroma*chroma_stride * chroma_bpp, |
620 | 0 | (end_chroma-first_chroma) * chroma_stride * chroma_bpp); |
621 | 0 | } |
622 | 0 | else { |
623 | 0 | for (int y=first_chroma;y<end_chroma;y++) { |
624 | 0 | memcpy(pixels[1]+y*chroma_stride * chroma_bpp, |
625 | 0 | src->pixels[1]+y*src->chroma_stride * chroma_bpp, |
626 | 0 | src->chroma_width * chroma_bpp); |
627 | 0 | memcpy(pixels[2]+y*chroma_stride * chroma_bpp, |
628 | 0 | src->pixels[2]+y*src->chroma_stride * chroma_bpp, |
629 | 0 | src->chroma_width * chroma_bpp); |
630 | 0 | } |
631 | 0 | } |
632 | 0 | } |
633 | 0 | } |
634 | | |
635 | | |
636 | | void de265_image::exchange_pixel_data_with(de265_image& b) |
637 | 0 | { |
638 | 0 | for (int i=0;i<3;i++) { |
639 | 0 | std::swap(pixels[i], b.pixels[i]); |
640 | 0 | std::swap(pixels_confwin[i], b.pixels_confwin[i]); |
641 | 0 | std::swap(plane_user_data[i], b.plane_user_data[i]); |
642 | 0 | } |
643 | |
|
644 | 0 | std::swap(stride, b.stride); |
645 | 0 | std::swap(chroma_stride, b.chroma_stride); |
646 | 0 | std::swap(image_allocation_functions, b.image_allocation_functions); |
647 | 0 | } |
648 | | |
649 | | |
650 | | void de265_image::thread_start(int nThreads) |
651 | 0 | { |
652 | 0 | std::unique_lock<std::mutex> lock(mutex); |
653 | | |
654 | | //printf("nThreads before: %d %d\n",nThreadsQueued, nThreadsTotal); |
655 | |
|
656 | 0 | nThreadsQueued += nThreads; |
657 | 0 | nThreadsTotal += nThreads; |
658 | | |
659 | | //printf("nThreads after: %d %d\n",nThreadsQueued, nThreadsTotal); |
660 | 0 | } |
661 | | |
662 | | void de265_image::thread_run(const thread_task* task) |
663 | 0 | { |
664 | 0 | std::unique_lock<std::mutex> lock(mutex); |
665 | | |
666 | | //printf("run thread %s\n", task->name().c_str()); |
667 | |
|
668 | 0 | nThreadsQueued--; |
669 | 0 | nThreadsRunning++; |
670 | 0 | } |
671 | | |
672 | | void de265_image::thread_blocks() |
673 | 0 | { |
674 | 0 | std::unique_lock<std::mutex> lock(mutex); |
675 | |
|
676 | 0 | nThreadsRunning--; |
677 | 0 | nThreadsBlocked++; |
678 | 0 | } |
679 | | |
680 | | void de265_image::thread_unblocks() |
681 | 0 | { |
682 | 0 | std::unique_lock<std::mutex> lock(mutex); |
683 | |
|
684 | 0 | nThreadsBlocked--; |
685 | 0 | nThreadsRunning++; |
686 | 0 | } |
687 | | |
688 | | void de265_image::thread_finishes(const thread_task* task) |
689 | 0 | { |
690 | | //printf("finish thread %s\n", task->name().c_str()); |
691 | |
|
692 | 0 | std::unique_lock<std::mutex> lock(mutex); |
693 | |
|
694 | 0 | nThreadsRunning--; |
695 | 0 | nThreadsFinished++; |
696 | 0 | assert(nThreadsRunning >= 0); |
697 | | |
698 | 0 | if (nThreadsFinished==nThreadsTotal) { |
699 | 0 | finished_cond.notify_all(); |
700 | 0 | } |
701 | 0 | } |
702 | | |
703 | | void de265_image::wait_for_progress(thread_task* task, int ctbx,int ctby, int progress) |
704 | 0 | { |
705 | 0 | const int ctbW = sps->PicWidthInCtbsY; |
706 | |
|
707 | 0 | wait_for_progress(task, ctbx + ctbW*ctby, progress); |
708 | 0 | } |
709 | | |
710 | | void de265_image::wait_for_progress(thread_task* task, int ctbAddrRS, int progress) |
711 | 0 | { |
712 | 0 | if (task==nullptr) { return; } |
713 | | |
714 | 0 | de265_progress_lock* progresslock = &ctb_progress[ctbAddrRS]; |
715 | 0 | if (progresslock->get_progress() < progress) { |
716 | 0 | thread_blocks(); |
717 | |
|
718 | 0 | assert(task!=nullptr); |
719 | 0 | task->state = thread_task::Blocked; |
720 | | |
721 | | /* TODO: check whether we are the first blocked task in the list. |
722 | | If we are, we have to conceal input errors. |
723 | | Simplest concealment: do not block. |
724 | | */ |
725 | |
|
726 | 0 | progresslock->wait_for_progress(progress); |
727 | 0 | task->state = thread_task::Running; |
728 | 0 | thread_unblocks(); |
729 | 0 | } |
730 | 0 | } |
731 | | |
732 | | |
733 | | void de265_image::wait_for_completion() |
734 | 0 | { |
735 | 0 | std::unique_lock<std::mutex> lock(mutex); |
736 | |
|
737 | 0 | while (nThreadsFinished!=nThreadsTotal) { |
738 | 0 | finished_cond.wait(lock); |
739 | 0 | } |
740 | 0 | } |
741 | | |
742 | | bool de265_image::debug_is_completed() const |
743 | 0 | { |
744 | 0 | return nThreadsFinished==nThreadsTotal; |
745 | 0 | } |
746 | | |
747 | | |
748 | | |
749 | | void de265_image::clear_metadata() |
750 | 0 | { |
751 | | // TODO: maybe we could avoid the memset by ensuring that all data is written to |
752 | | // during decoding (especially log2CbSize), but it is unlikely to be faster than the memset. |
753 | |
|
754 | 0 | cb_info.clear(); |
755 | 0 | intraPredMode.clear(); |
756 | | //tu_info.clear(); // done on the fly |
757 | 0 | ctb_info.clear(); |
758 | 0 | deblk_info.clear(); |
759 | | |
760 | | // --- reset CTB progresses --- |
761 | |
|
762 | 0 | for (int i=0;i<ctb_info.data_size;i++) { |
763 | 0 | ctb_progress[i].reset(CTB_PROGRESS_NONE); |
764 | 0 | } |
765 | 0 | } |
766 | | |
767 | | |
768 | | void de265_image::set_mv_info(int x,int y, int nPbW,int nPbH, const PBMotion& mv) |
769 | 0 | { |
770 | 0 | int log2PuSize = 2; |
771 | |
|
772 | 0 | int xPu = x >> log2PuSize; |
773 | 0 | int yPu = y >> log2PuSize; |
774 | 0 | int wPu = nPbW >> log2PuSize; |
775 | 0 | int hPu = nPbH >> log2PuSize; |
776 | |
|
777 | 0 | int stride = pb_info.width_in_units; |
778 | |
|
779 | 0 | for (int pby=0;pby<hPu;pby++) |
780 | 0 | for (int pbx=0;pbx<wPu;pbx++) |
781 | 0 | { |
782 | 0 | pb_info[ xPu+pbx + (yPu+pby)*stride ] = mv; |
783 | 0 | } |
784 | 0 | } |
785 | | |
786 | | |
787 | | bool de265_image::available_zscan(int xCurr,int yCurr, int xN,int yN) const |
788 | 0 | { |
789 | 0 | if (xN<0 || yN<0) return false; |
790 | 0 | if (xN>=sps->pic_width_in_luma_samples || |
791 | 0 | yN>=sps->pic_height_in_luma_samples) return false; |
792 | | |
793 | 0 | int minBlockAddrN = pps->scan->MinTbAddrZS[ (xN>>sps->Log2MinTrafoSize) + |
794 | 0 | (yN>>sps->Log2MinTrafoSize) * sps->PicWidthInTbsY ]; |
795 | 0 | int minBlockAddrCurr = pps->scan->MinTbAddrZS[ (xCurr>>sps->Log2MinTrafoSize) + |
796 | 0 | (yCurr>>sps->Log2MinTrafoSize) * sps->PicWidthInTbsY ]; |
797 | |
|
798 | 0 | if (minBlockAddrN > minBlockAddrCurr) return false; |
799 | | |
800 | 0 | int xCurrCtb = xCurr >> sps->Log2CtbSizeY; |
801 | 0 | int yCurrCtb = yCurr >> sps->Log2CtbSizeY; |
802 | 0 | int xNCtb = xN >> sps->Log2CtbSizeY; |
803 | 0 | int yNCtb = yN >> sps->Log2CtbSizeY; |
804 | |
|
805 | 0 | if (get_SliceAddrRS(xCurrCtb,yCurrCtb) != |
806 | 0 | get_SliceAddrRS(xNCtb, yNCtb)) { |
807 | 0 | return false; |
808 | 0 | } |
809 | | |
810 | 0 | if (pps->scan->TileIdRS[xCurrCtb + yCurrCtb*sps->PicWidthInCtbsY] != |
811 | 0 | pps->scan->TileIdRS[xNCtb + yNCtb *sps->PicWidthInCtbsY]) { |
812 | 0 | return false; |
813 | 0 | } |
814 | | |
815 | 0 | return true; |
816 | 0 | } |
817 | | |
818 | | |
819 | | bool de265_image::available_pred_blk(int xC,int yC, int nCbS, int xP, int yP, |
820 | | int nPbW, int nPbH, int partIdx, int xN,int yN) const |
821 | 0 | { |
822 | 0 | logtrace(LogMotion,"C:%d;%d P:%d;%d N:%d;%d size=%d;%d\n",xC,yC,xP,yP,xN,yN,nPbW,nPbH); |
823 | |
|
824 | 0 | int sameCb = (xC <= xN && xN < xC+nCbS && |
825 | 0 | yC <= yN && yN < yC+nCbS); |
826 | |
|
827 | 0 | bool availableN; |
828 | |
|
829 | 0 | if (!sameCb) { |
830 | 0 | availableN = available_zscan(xP,yP,xN,yN); |
831 | 0 | } |
832 | 0 | else { |
833 | 0 | availableN = !(nPbW<<1 == nCbS && nPbH<<1 == nCbS && // NxN |
834 | 0 | partIdx==1 && |
835 | 0 | yN >= yC+nPbH && xN < xC+nPbW); // xN/yN inside partIdx 2 |
836 | 0 | } |
837 | |
|
838 | 0 | if (availableN && get_pred_mode(xN,yN) == MODE_INTRA) { |
839 | 0 | availableN = false; |
840 | 0 | } |
841 | |
|
842 | 0 | return availableN; |
843 | 0 | } |