/work/libde265/libde265/image.cc
Line | Count | Source |
1 | | /* |
2 | | * H.265 video codec. |
3 | | * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de> |
4 | | * |
5 | | * This file is part of libde265. |
6 | | * |
7 | | * libde265 is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as |
9 | | * published by the Free Software Foundation, either version 3 of |
10 | | * the License, or (at your option) any later version. |
11 | | * |
12 | | * libde265 is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libde265. If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "image.h" |
22 | | #include "decctx.h" |
23 | | #include "en265.h" |
24 | | |
25 | | #include <atomic> |
26 | | |
27 | | #include <stdlib.h> |
28 | | #include <string.h> |
29 | | #include <assert.h> |
30 | | |
31 | | #include <limits> |
32 | | |
33 | | |
34 | | #ifdef HAVE_MALLOC_H |
35 | | #include <malloc.h> |
36 | | #endif |
37 | | |
38 | | #ifdef HAVE_SSE4_1 |
39 | | // SSE code processes 128bit per iteration and thus might read more data |
40 | | // than is later actually used. |
41 | | #define MEMORY_PADDING 16 |
42 | | #else |
43 | | #define MEMORY_PADDING 0 |
44 | | #endif |
45 | | |
46 | 0 | #define STANDARD_ALIGNMENT 16 |
47 | | |
48 | | #ifdef HAVE___MINGW_ALIGNED_MALLOC |
49 | | #define ALLOC_ALIGNED(alignment, size) __mingw_aligned_malloc((size), (alignment)) |
50 | | #define FREE_ALIGNED(mem) __mingw_aligned_free((mem)) |
51 | | #elif _WIN32 |
52 | | #define ALLOC_ALIGNED(alignment, size) _aligned_malloc((size), (alignment)) |
53 | | #define FREE_ALIGNED(mem) _aligned_free((mem)) |
54 | | #elif defined(HAVE_POSIX_MEMALIGN) |
55 | 0 | static inline void *ALLOC_ALIGNED(size_t alignment, size_t size) { |
56 | 0 | void *mem = NULL; |
57 | 0 | if (posix_memalign(&mem, alignment, size) != 0) { |
58 | 0 | return NULL; |
59 | 0 | } |
60 | 0 | return mem; |
61 | 0 | }; |
62 | 0 | #define FREE_ALIGNED(mem) free((mem)) |
63 | | #else |
64 | | #define ALLOC_ALIGNED(alignment, size) memalign((alignment), (size)) |
65 | | #define FREE_ALIGNED(mem) free((mem)) |
66 | | #endif |
67 | | |
68 | 0 | #define ALLOC_ALIGNED_16(size) ALLOC_ALIGNED(16, size) |
69 | | |
70 | | static const int alignment = 16; |
71 | | |
72 | | LIBDE265_API void* de265_alloc_image_plane(struct de265_image* img, int cIdx, |
73 | | void* inputdata, int inputstride, void *userdata) |
74 | 0 | { |
75 | 0 | int alignment = STANDARD_ALIGNMENT; |
76 | 0 | int stride = (img->get_width(cIdx) + alignment-1) / alignment * alignment; |
77 | 0 | int height = img->get_height(cIdx); |
78 | |
|
79 | 0 | uint8_t* p = (uint8_t *)ALLOC_ALIGNED_16(stride * height + MEMORY_PADDING); |
80 | |
|
81 | 0 | if (p==NULL) { return NULL; } |
82 | | |
83 | 0 | img->set_image_plane(cIdx, p, stride, userdata); |
84 | | |
85 | | // copy input data if provided |
86 | |
|
87 | 0 | if (inputdata != NULL) { |
88 | 0 | if (inputstride == stride) { |
89 | 0 | memcpy(p, inputdata, stride*height); |
90 | 0 | } |
91 | 0 | else { |
92 | 0 | for (int y=0;y<height;y++) { |
93 | 0 | memcpy(p+y*stride, ((char*)inputdata) + inputstride*y, inputstride); |
94 | 0 | } |
95 | 0 | } |
96 | 0 | } |
97 | |
|
98 | 0 | return p; |
99 | 0 | } |
100 | | |
101 | | |
102 | | LIBDE265_API void de265_free_image_plane(struct de265_image* img, int cIdx) |
103 | 0 | { |
104 | 0 | uint8_t* p = (uint8_t*)img->get_image_plane(cIdx); |
105 | 0 | assert(p); |
106 | 0 | FREE_ALIGNED(p); |
107 | 0 | } |
108 | | |
109 | | |
110 | | static int de265_image_get_buffer(de265_decoder_context* ctx, |
111 | | de265_image_spec* spec, de265_image* img, void* userdata) |
112 | 0 | { |
113 | 0 | const int rawChromaWidth = spec->width / img->SubWidthC; |
114 | 0 | const int rawChromaHeight = spec->height / img->SubHeightC; |
115 | |
|
116 | 0 | int luma_stride = (spec->width + spec->alignment-1) / spec->alignment * spec->alignment; |
117 | 0 | int chroma_stride = (rawChromaWidth + spec->alignment-1) / spec->alignment * spec->alignment; |
118 | |
|
119 | 0 | assert(img->BitDepth_Y >= 8 && img->BitDepth_Y <= 16); |
120 | 0 | assert(img->BitDepth_C >= 8 && img->BitDepth_C <= 16); |
121 | | |
122 | 0 | int luma_bpl = luma_stride * ((img->BitDepth_Y+7)/8); |
123 | 0 | int chroma_bpl = chroma_stride * ((img->BitDepth_C+7)/8); |
124 | |
|
125 | 0 | int luma_height = spec->height; |
126 | 0 | int chroma_height = rawChromaHeight; |
127 | |
|
128 | 0 | bool alloc_failed = false; |
129 | |
|
130 | 0 | uint8_t* p[3] = { 0,0,0 }; |
131 | 0 | p[0] = (uint8_t *)ALLOC_ALIGNED_16(luma_height * luma_bpl + MEMORY_PADDING); |
132 | 0 | if (p[0]==NULL) { alloc_failed=true; } |
133 | |
|
134 | 0 | if (img->get_chroma_format() != de265_chroma_mono) { |
135 | 0 | p[1] = (uint8_t *)ALLOC_ALIGNED_16(chroma_height * chroma_bpl + MEMORY_PADDING); |
136 | 0 | p[2] = (uint8_t *)ALLOC_ALIGNED_16(chroma_height * chroma_bpl + MEMORY_PADDING); |
137 | |
|
138 | 0 | if (p[1]==NULL || p[2]==NULL) { alloc_failed=true; } |
139 | 0 | } |
140 | 0 | else { |
141 | 0 | p[1] = NULL; |
142 | 0 | p[2] = NULL; |
143 | 0 | chroma_stride = 0; |
144 | 0 | } |
145 | |
|
146 | 0 | if (alloc_failed) { |
147 | 0 | for (int i=0;i<3;i++) |
148 | 0 | if (p[i]) { |
149 | 0 | FREE_ALIGNED(p[i]); |
150 | 0 | } |
151 | |
|
152 | 0 | return 0; |
153 | 0 | } |
154 | | |
155 | 0 | img->set_image_plane(0, p[0], luma_stride, NULL); |
156 | 0 | img->set_image_plane(1, p[1], chroma_stride, NULL); |
157 | 0 | img->set_image_plane(2, p[2], chroma_stride, NULL); |
158 | |
|
159 | 0 | img->fill_image(0,0,0); |
160 | |
|
161 | 0 | return 1; |
162 | 0 | } |
163 | | |
164 | | static void de265_image_release_buffer(de265_decoder_context* ctx, |
165 | | de265_image* img, void* userdata) |
166 | 0 | { |
167 | 0 | for (int i=0;i<3;i++) { |
168 | 0 | uint8_t* p = (uint8_t*)img->get_image_plane(i); |
169 | 0 | if (p) { |
170 | 0 | FREE_ALIGNED(p); |
171 | 0 | } |
172 | 0 | } |
173 | 0 | } |
174 | | |
175 | | |
176 | | de265_image_allocation de265_image::default_image_allocation = { |
177 | | de265_image_get_buffer, |
178 | | de265_image_release_buffer |
179 | | }; |
180 | | |
181 | | |
182 | | void de265_image::set_image_plane(int cIdx, uint8_t* mem, int stride, void *userdata) |
183 | 0 | { |
184 | 0 | pixels[cIdx] = mem; |
185 | 0 | plane_user_data[cIdx] = userdata; |
186 | |
|
187 | 0 | if (cIdx==0) { this->stride = stride; } |
188 | 0 | else { this->chroma_stride = stride; } |
189 | 0 | } |
190 | | |
191 | | |
192 | | de265_image::de265_image() |
193 | 0 | { |
194 | 0 | ID = -1; |
195 | 0 | removed_at_picture_id = 0; // picture not used, so we can assume it has been removed |
196 | |
|
197 | 0 | decctx = NULL; |
198 | | //encctx = NULL; |
199 | | |
200 | | //encoder_image_release_func = NULL; |
201 | | |
202 | | //alloc_functions.get_buffer = NULL; |
203 | | //alloc_functions.release_buffer = NULL; |
204 | |
|
205 | 0 | for (int c=0;c<3;c++) { |
206 | 0 | pixels[c] = NULL; |
207 | 0 | pixels_confwin[c] = NULL; |
208 | 0 | plane_user_data[c] = NULL; |
209 | 0 | } |
210 | |
|
211 | 0 | width=height=0; |
212 | |
|
213 | 0 | pts = 0; |
214 | 0 | user_data = NULL; |
215 | |
|
216 | 0 | ctb_progress = NULL; |
217 | |
|
218 | 0 | integrity = INTEGRITY_NOT_DECODED; |
219 | |
|
220 | 0 | picture_order_cnt_lsb = -1; // undefined |
221 | 0 | PicOrderCntVal = -1; // undefined |
222 | 0 | PicState = UnusedForReference; |
223 | 0 | PicOutputFlag = false; |
224 | |
|
225 | 0 | nThreadsQueued = 0; |
226 | 0 | nThreadsRunning = 0; |
227 | 0 | nThreadsBlocked = 0; |
228 | 0 | nThreadsFinished = 0; |
229 | 0 | nThreadsTotal = 0; |
230 | 0 | } |
231 | | |
232 | | |
233 | | de265_error de265_image::alloc_image(int w,int h, enum de265_chroma c, |
234 | | std::shared_ptr<const seq_parameter_set> sps, bool allocMetadata, |
235 | | decoder_context* dctx, |
236 | | //encoder_context* ectx, |
237 | | de265_PTS pts, void* user_data, |
238 | | bool useCustomAllocFunc) |
239 | 0 | { |
240 | | //if (allocMetadata) { assert(sps); } |
241 | 0 | if (allocMetadata) { assert(sps); } |
242 | | |
243 | 0 | if (sps) { this->sps = sps; } |
244 | |
|
245 | 0 | release(); /* TODO: review code for efficient allocation when arrays are already |
246 | | allocated to the requested size. Without the release, the old image-data |
247 | | will not be freed. */ |
248 | |
|
249 | 0 | static std::atomic<uint32_t> s_next_image_ID(0); |
250 | 0 | ID = s_next_image_ID++; |
251 | 0 | removed_at_picture_id = std::numeric_limits<int32_t>::max(); |
252 | |
|
253 | 0 | decctx = dctx; |
254 | | //encctx = ectx; |
255 | | |
256 | | // --- allocate image buffer --- |
257 | |
|
258 | 0 | chroma_format= c; |
259 | |
|
260 | 0 | width = w; |
261 | 0 | height = h; |
262 | 0 | chroma_width = w; |
263 | 0 | chroma_height= h; |
264 | |
|
265 | 0 | this->user_data = user_data; |
266 | 0 | this->pts = pts; |
267 | |
|
268 | 0 | de265_image_spec spec; |
269 | |
|
270 | 0 | int WinUnitX, WinUnitY; |
271 | |
|
272 | 0 | switch (chroma_format) { |
273 | 0 | case de265_chroma_mono: WinUnitX=1; WinUnitY=1; break; |
274 | 0 | case de265_chroma_420: WinUnitX=2; WinUnitY=2; break; |
275 | 0 | case de265_chroma_422: WinUnitX=2; WinUnitY=1; break; |
276 | 0 | case de265_chroma_444: WinUnitX=1; WinUnitY=1; break; |
277 | 0 | default: |
278 | 0 | assert(0); |
279 | 0 | } |
280 | | |
281 | 0 | switch (chroma_format) { |
282 | 0 | case de265_chroma_420: |
283 | 0 | spec.format = de265_image_format_YUV420P8; |
284 | 0 | chroma_width = (chroma_width +1)/2; |
285 | 0 | chroma_height = (chroma_height+1)/2; |
286 | 0 | SubWidthC = 2; |
287 | 0 | SubHeightC = 2; |
288 | 0 | break; |
289 | | |
290 | 0 | case de265_chroma_422: |
291 | 0 | spec.format = de265_image_format_YUV422P8; |
292 | 0 | chroma_width = (chroma_width+1)/2; |
293 | 0 | SubWidthC = 2; |
294 | 0 | SubHeightC = 1; |
295 | 0 | break; |
296 | | |
297 | 0 | case de265_chroma_444: |
298 | 0 | spec.format = de265_image_format_YUV444P8; |
299 | 0 | SubWidthC = 1; |
300 | 0 | SubHeightC = 1; |
301 | 0 | break; |
302 | | |
303 | 0 | case de265_chroma_mono: |
304 | 0 | spec.format = de265_image_format_mono8; |
305 | 0 | chroma_width = 0; |
306 | 0 | chroma_height= 0; |
307 | 0 | SubWidthC = 1; |
308 | 0 | SubHeightC = 1; |
309 | 0 | break; |
310 | | |
311 | 0 | default: |
312 | 0 | assert(false); |
313 | 0 | break; |
314 | 0 | } |
315 | | |
316 | 0 | if (chroma_format != de265_chroma_mono && sps) { |
317 | 0 | assert(sps->SubWidthC == SubWidthC); |
318 | 0 | assert(sps->SubHeightC == SubHeightC); |
319 | 0 | } |
320 | | |
321 | 0 | spec.width = w; |
322 | 0 | spec.height = h; |
323 | 0 | spec.alignment = STANDARD_ALIGNMENT; |
324 | | |
325 | | |
326 | | // conformance window cropping |
327 | |
|
328 | 0 | int left = sps ? sps->conf_win_left_offset : 0; |
329 | 0 | int right = sps ? sps->conf_win_right_offset : 0; |
330 | 0 | int top = sps ? sps->conf_win_top_offset : 0; |
331 | 0 | int bottom = sps ? sps->conf_win_bottom_offset : 0; |
332 | |
|
333 | 0 | if ((left+right)*WinUnitX >= width) { |
334 | 0 | return DE265_ERROR_CODED_PARAMETER_OUT_OF_RANGE; |
335 | 0 | } |
336 | | |
337 | 0 | if ((top+bottom)*WinUnitY >= height) { |
338 | 0 | return DE265_ERROR_CODED_PARAMETER_OUT_OF_RANGE; |
339 | 0 | } |
340 | | |
341 | 0 | width_confwin = width - (left+right)*WinUnitX; |
342 | 0 | height_confwin= height- (top+bottom)*WinUnitY; |
343 | 0 | chroma_width_confwin = chroma_width -left-right; |
344 | 0 | chroma_height_confwin= chroma_height-top-bottom; |
345 | |
|
346 | 0 | spec.crop_left = left *WinUnitX; |
347 | 0 | spec.crop_right = right*WinUnitX; |
348 | 0 | spec.crop_top = top *WinUnitY; |
349 | 0 | spec.crop_bottom= bottom*WinUnitY; |
350 | |
|
351 | 0 | spec.visible_width = width_confwin; |
352 | 0 | spec.visible_height= height_confwin; |
353 | | |
354 | |
|
355 | 0 | BitDepth_Y = (sps==NULL) ? 8 : sps->BitDepth_Y; |
356 | 0 | BitDepth_C = (sps==NULL) ? 8 : sps->BitDepth_C; |
357 | |
|
358 | 0 | bpp_shift[0] = (BitDepth_Y <= 8) ? 0 : 1; |
359 | 0 | bpp_shift[1] = (BitDepth_C <= 8) ? 0 : 1; |
360 | 0 | bpp_shift[2] = bpp_shift[1]; |
361 | | |
362 | | |
363 | | // allocate memory and set conformance window pointers |
364 | |
|
365 | 0 | void* alloc_userdata = NULL; |
366 | 0 | if (decctx) alloc_userdata = decctx->param_image_allocation_userdata; |
367 | | // if (encctx) alloc_userdata = encctx->param_image_allocation_userdata; // actually not needed |
368 | | |
369 | | /* |
370 | | if (encctx && useCustomAllocFunc) { |
371 | | encoder_image_release_func = encctx->release_func; |
372 | | |
373 | | // if we do not provide a release function, use our own |
374 | | |
375 | | if (encoder_image_release_func == NULL) { |
376 | | image_allocation_functions = de265_image::default_image_allocation; |
377 | | } |
378 | | else { |
379 | | image_allocation_functions.get_buffer = NULL; |
380 | | image_allocation_functions.release_buffer = NULL; |
381 | | } |
382 | | } |
383 | 0 | else*/ if (decctx && useCustomAllocFunc) { |
384 | 0 | image_allocation_functions = decctx->param_image_allocation_functions; |
385 | 0 | } |
386 | 0 | else { |
387 | 0 | image_allocation_functions = de265_image::default_image_allocation; |
388 | 0 | } |
389 | |
|
390 | 0 | bool mem_alloc_success = true; |
391 | |
|
392 | 0 | if (image_allocation_functions.get_buffer != NULL) { |
393 | 0 | mem_alloc_success = image_allocation_functions.get_buffer(decctx, &spec, this, |
394 | 0 | alloc_userdata); |
395 | |
|
396 | 0 | pixels_confwin[0] = pixels[0] + left*WinUnitX + top*WinUnitY*stride; |
397 | |
|
398 | 0 | if (chroma_format != de265_chroma_mono) { |
399 | 0 | pixels_confwin[1] = pixels[1] + left + top*chroma_stride; |
400 | 0 | pixels_confwin[2] = pixels[2] + left + top*chroma_stride; |
401 | 0 | } |
402 | 0 | else { |
403 | 0 | pixels_confwin[1] = NULL; |
404 | 0 | pixels_confwin[2] = NULL; |
405 | 0 | } |
406 | | |
407 | | // check for memory shortage |
408 | |
|
409 | 0 | if (!mem_alloc_success) |
410 | 0 | { |
411 | 0 | return DE265_ERROR_OUT_OF_MEMORY; |
412 | 0 | } |
413 | 0 | } |
414 | | |
415 | | //alloc_functions = *allocfunc; |
416 | | //alloc_userdata = userdata; |
417 | | |
418 | | // --- allocate decoding info arrays --- |
419 | | |
420 | 0 | if (allocMetadata) { |
421 | | // intra pred mode |
422 | |
|
423 | 0 | mem_alloc_success &= intraPredMode.alloc(sps->PicWidthInMinPUs, sps->PicHeightInMinPUs, |
424 | 0 | sps->Log2MinPUSize); |
425 | |
|
426 | 0 | mem_alloc_success &= intraPredModeC.alloc(sps->PicWidthInMinPUs, sps->PicHeightInMinPUs, |
427 | 0 | sps->Log2MinPUSize); |
428 | | |
429 | | // cb info |
430 | |
|
431 | 0 | mem_alloc_success &= cb_info.alloc(sps->PicWidthInMinCbsY, sps->PicHeightInMinCbsY, |
432 | 0 | sps->Log2MinCbSizeY); |
433 | | |
434 | | // pb info |
435 | |
|
436 | 0 | int puWidth = sps->PicWidthInMinCbsY << (sps->Log2MinCbSizeY -2); |
437 | 0 | int puHeight = sps->PicHeightInMinCbsY << (sps->Log2MinCbSizeY -2); |
438 | |
|
439 | 0 | mem_alloc_success &= pb_info.alloc(puWidth,puHeight, 2); |
440 | | |
441 | | |
442 | | // tu info |
443 | |
|
444 | 0 | mem_alloc_success &= tu_info.alloc(sps->PicWidthInTbsY, sps->PicHeightInTbsY, |
445 | 0 | sps->Log2MinTrafoSize); |
446 | | |
447 | | // deblk info |
448 | |
|
449 | 0 | int deblk_w = (sps->pic_width_in_luma_samples +3)/4; |
450 | 0 | int deblk_h = (sps->pic_height_in_luma_samples+3)/4; |
451 | |
|
452 | 0 | mem_alloc_success &= deblk_info.alloc(deblk_w, deblk_h, 2); |
453 | | |
454 | | // CTB info |
455 | |
|
456 | 0 | if (ctb_info.width_in_units != sps->PicWidthInCtbsY || |
457 | 0 | ctb_info.height_in_units != sps->PicHeightInCtbsY) |
458 | 0 | { |
459 | 0 | delete[] ctb_progress; |
460 | |
|
461 | 0 | mem_alloc_success &= ctb_info.alloc(sps->PicWidthInCtbsY, sps->PicHeightInCtbsY, |
462 | 0 | sps->Log2CtbSizeY); |
463 | |
|
464 | 0 | ctb_progress = new de265_progress_lock[ ctb_info.data_size ]; |
465 | 0 | } |
466 | | |
467 | | |
468 | | // check for memory shortage |
469 | |
|
470 | 0 | if (!mem_alloc_success) |
471 | 0 | { |
472 | 0 | return DE265_ERROR_OUT_OF_MEMORY; |
473 | 0 | } |
474 | 0 | } |
475 | | |
476 | 0 | return DE265_OK; |
477 | 0 | } |
478 | | |
479 | | |
480 | | de265_image::~de265_image() |
481 | 0 | { |
482 | 0 | release(); |
483 | | |
484 | | // free progress locks |
485 | |
|
486 | 0 | if (ctb_progress) { |
487 | 0 | delete[] ctb_progress; |
488 | 0 | } |
489 | 0 | } |
490 | | |
491 | | |
492 | | void de265_image::release() |
493 | 0 | { |
494 | | // free image memory |
495 | |
|
496 | 0 | if (pixels[0]) |
497 | 0 | { |
498 | | /* |
499 | | if (encoder_image_release_func != NULL) { |
500 | | encoder_image_release_func(encctx, this, |
501 | | encctx->param_image_allocation_userdata); |
502 | | } |
503 | 0 | else*/ { |
504 | 0 | image_allocation_functions.release_buffer(decctx, this, |
505 | 0 | decctx ? |
506 | 0 | decctx->param_image_allocation_userdata : |
507 | 0 | NULL); |
508 | 0 | } |
509 | |
|
510 | 0 | for (int i=0;i<3;i++) |
511 | 0 | { |
512 | 0 | pixels[i] = NULL; |
513 | 0 | pixels_confwin[i] = NULL; |
514 | 0 | } |
515 | 0 | } |
516 | | |
517 | | // free slices |
518 | |
|
519 | 0 | for (size_t i=0;i<slices.size();i++) { |
520 | 0 | delete slices[i]; |
521 | 0 | } |
522 | 0 | slices.clear(); |
523 | 0 | } |
524 | | |
525 | | |
526 | | void de265_image::fill_plane(int channel, int value) |
527 | 0 | { |
528 | 0 | int bytes_per_pixel = get_bytes_per_pixel(channel); |
529 | 0 | assert(value >= 0); // needed for the shift operation in the check below |
530 | | |
531 | 0 | if (bytes_per_pixel == 1) { |
532 | 0 | if (channel==0) { |
533 | 0 | memset(pixels[channel], value, stride * height); |
534 | 0 | } |
535 | 0 | else { |
536 | 0 | memset(pixels[channel], value, chroma_stride * chroma_height); |
537 | 0 | } |
538 | 0 | } |
539 | 0 | else if ((value >> 8) == (value & 0xFF)) { |
540 | 0 | assert(bytes_per_pixel == 2); |
541 | | |
542 | | // if we fill the same byte value to all bytes, we can still use memset() |
543 | 0 | if (channel==0) { |
544 | 0 | memset(pixels[channel], 0, stride * height * bytes_per_pixel); |
545 | 0 | } |
546 | 0 | else { |
547 | 0 | memset(pixels[channel], 0, chroma_stride * chroma_height * bytes_per_pixel); |
548 | 0 | } |
549 | 0 | } |
550 | 0 | else { |
551 | 0 | assert(bytes_per_pixel == 2); |
552 | 0 | uint16_t v = value; |
553 | |
|
554 | 0 | if (channel==0) { |
555 | | // copy value into first row |
556 | 0 | for (int x = 0; x < width; x++) { |
557 | 0 | *(uint16_t*) (&pixels[channel][2 * x]) = v; |
558 | 0 | } |
559 | | |
560 | | // copy first row into remaining rows |
561 | 0 | for (int y = 1; y < height; y++) { |
562 | 0 | memcpy(pixels[channel] + y * stride * 2, pixels[channel], chroma_width * 2); |
563 | 0 | } |
564 | 0 | } |
565 | 0 | else { |
566 | | // copy value into first row |
567 | 0 | for (int x = 0; x < chroma_width; x++) { |
568 | 0 | *(uint16_t*) (&pixels[channel][2 * x]) = v; |
569 | 0 | } |
570 | | |
571 | | // copy first row into remaining rows |
572 | 0 | for (int y = 1; y < chroma_height; y++) { |
573 | 0 | memcpy(pixels[channel] + y * chroma_stride * 2, pixels[channel], chroma_width * 2); |
574 | 0 | } |
575 | 0 | } |
576 | 0 | } |
577 | 0 | } |
578 | | |
579 | | |
580 | | void de265_image::fill_image(int y,int cb,int cr) |
581 | 0 | { |
582 | 0 | if (pixels[0]) { |
583 | 0 | fill_plane(0, y); |
584 | 0 | } |
585 | |
|
586 | 0 | if (pixels[1]) { |
587 | 0 | fill_plane(1, cb); |
588 | 0 | } |
589 | |
|
590 | 0 | if (pixels[2]) { |
591 | 0 | fill_plane(2, cr); |
592 | 0 | } |
593 | 0 | } |
594 | | |
595 | | |
596 | | de265_error de265_image::copy_image(const de265_image* src) |
597 | 0 | { |
598 | | /* TODO: actually, since we allocate the image only for internal purpose, we |
599 | | do not have to call the external allocation routines for this. However, then |
600 | | we have to track for each image how to release it again. |
601 | | Another option would be to safe the copied data not in an de265_image at all. |
602 | | */ |
603 | |
|
604 | 0 | de265_error err = alloc_image(src->width, src->height, src->chroma_format, src->sps, false, |
605 | 0 | src->decctx, /*src->encctx,*/ src->pts, src->user_data, false); |
606 | 0 | if (err != DE265_OK) { |
607 | 0 | return err; |
608 | 0 | } |
609 | | |
610 | 0 | copy_lines_from(src, 0, src->height); |
611 | |
|
612 | 0 | return err; |
613 | 0 | } |
614 | | |
615 | | |
616 | | // end = last line + 1 |
617 | | void de265_image::copy_lines_from(const de265_image* src, int first, int end) |
618 | 0 | { |
619 | 0 | if (end > src->height) end=src->height; |
620 | |
|
621 | 0 | assert(first % 2 == 0); |
622 | 0 | assert(end % 2 == 0); |
623 | | |
624 | 0 | int luma_bpp = (sps->BitDepth_Y+7)/8; |
625 | 0 | int chroma_bpp = (sps->BitDepth_C+7)/8; |
626 | |
|
627 | 0 | if (src->stride == stride) { |
628 | 0 | memcpy(pixels[0] + first*stride * luma_bpp, |
629 | 0 | src->pixels[0] + first*src->stride * luma_bpp, |
630 | 0 | (end-first)*stride * luma_bpp); |
631 | 0 | } |
632 | 0 | else { |
633 | 0 | for (int yp=first;yp<end;yp++) { |
634 | 0 | memcpy(pixels[0]+yp*stride * luma_bpp, |
635 | 0 | src->pixels[0]+yp*src->stride * luma_bpp, |
636 | 0 | src->width * luma_bpp); |
637 | 0 | } |
638 | 0 | } |
639 | |
|
640 | 0 | int first_chroma = first / src->SubHeightC; |
641 | 0 | int end_chroma = end / src->SubHeightC; |
642 | |
|
643 | 0 | if (src->chroma_format != de265_chroma_mono) { |
644 | 0 | if (src->chroma_stride == chroma_stride) { |
645 | 0 | memcpy(pixels[1] + first_chroma*chroma_stride * chroma_bpp, |
646 | 0 | src->pixels[1] + first_chroma*chroma_stride * chroma_bpp, |
647 | 0 | (end_chroma-first_chroma) * chroma_stride * chroma_bpp); |
648 | 0 | memcpy(pixels[2] + first_chroma*chroma_stride * chroma_bpp, |
649 | 0 | src->pixels[2] + first_chroma*chroma_stride * chroma_bpp, |
650 | 0 | (end_chroma-first_chroma) * chroma_stride * chroma_bpp); |
651 | 0 | } |
652 | 0 | else { |
653 | 0 | for (int y=first_chroma;y<end_chroma;y++) { |
654 | 0 | memcpy(pixels[1]+y*chroma_stride * chroma_bpp, |
655 | 0 | src->pixels[1]+y*src->chroma_stride * chroma_bpp, |
656 | 0 | src->chroma_width * chroma_bpp); |
657 | 0 | memcpy(pixels[2]+y*chroma_stride * chroma_bpp, |
658 | 0 | src->pixels[2]+y*src->chroma_stride * chroma_bpp, |
659 | 0 | src->chroma_width * chroma_bpp); |
660 | 0 | } |
661 | 0 | } |
662 | 0 | } |
663 | 0 | } |
664 | | |
665 | | |
666 | | void de265_image::exchange_pixel_data_with(de265_image& b) |
667 | 0 | { |
668 | 0 | for (int i=0;i<3;i++) { |
669 | 0 | std::swap(pixels[i], b.pixels[i]); |
670 | 0 | std::swap(pixels_confwin[i], b.pixels_confwin[i]); |
671 | 0 | std::swap(plane_user_data[i], b.plane_user_data[i]); |
672 | 0 | } |
673 | |
|
674 | 0 | std::swap(stride, b.stride); |
675 | 0 | std::swap(chroma_stride, b.chroma_stride); |
676 | 0 | std::swap(image_allocation_functions, b.image_allocation_functions); |
677 | 0 | } |
678 | | |
679 | | |
680 | | void de265_image::thread_start(int nThreads) |
681 | 0 | { |
682 | 0 | std::unique_lock<std::mutex> lock(mutex); |
683 | | |
684 | | //printf("nThreads before: %d %d\n",nThreadsQueued, nThreadsTotal); |
685 | |
|
686 | 0 | nThreadsQueued += nThreads; |
687 | 0 | nThreadsTotal += nThreads; |
688 | | |
689 | | //printf("nThreads after: %d %d\n",nThreadsQueued, nThreadsTotal); |
690 | 0 | } |
691 | | |
692 | | void de265_image::thread_run(const thread_task* task) |
693 | 0 | { |
694 | 0 | std::unique_lock<std::mutex> lock(mutex); |
695 | | |
696 | | //printf("run thread %s\n", task->name().c_str()); |
697 | |
|
698 | 0 | nThreadsQueued--; |
699 | 0 | nThreadsRunning++; |
700 | 0 | } |
701 | | |
702 | | void de265_image::thread_blocks() |
703 | 0 | { |
704 | 0 | std::unique_lock<std::mutex> lock(mutex); |
705 | |
|
706 | 0 | nThreadsRunning--; |
707 | 0 | nThreadsBlocked++; |
708 | 0 | } |
709 | | |
710 | | void de265_image::thread_unblocks() |
711 | 0 | { |
712 | 0 | std::unique_lock<std::mutex> lock(mutex); |
713 | |
|
714 | 0 | nThreadsBlocked--; |
715 | 0 | nThreadsRunning++; |
716 | 0 | } |
717 | | |
718 | | void de265_image::thread_finishes(const thread_task* task) |
719 | 0 | { |
720 | | //printf("finish thread %s\n", task->name().c_str()); |
721 | |
|
722 | 0 | std::unique_lock<std::mutex> lock(mutex); |
723 | |
|
724 | 0 | nThreadsRunning--; |
725 | 0 | nThreadsFinished++; |
726 | 0 | assert(nThreadsRunning >= 0); |
727 | | |
728 | 0 | if (nThreadsFinished==nThreadsTotal) { |
729 | 0 | finished_cond.notify_all(); |
730 | 0 | } |
731 | 0 | } |
732 | | |
733 | | void de265_image::wait_for_progress(thread_task* task, int ctbx,int ctby, int progress) |
734 | 0 | { |
735 | 0 | const int ctbW = sps->PicWidthInCtbsY; |
736 | |
|
737 | 0 | wait_for_progress(task, ctbx + ctbW*ctby, progress); |
738 | 0 | } |
739 | | |
740 | | void de265_image::wait_for_progress(thread_task* task, int ctbAddrRS, int progress) |
741 | 0 | { |
742 | 0 | if (task==NULL) { return; } |
743 | | |
744 | 0 | de265_progress_lock* progresslock = &ctb_progress[ctbAddrRS]; |
745 | 0 | if (progresslock->get_progress() < progress) { |
746 | 0 | thread_blocks(); |
747 | |
|
748 | 0 | assert(task!=NULL); |
749 | 0 | task->state = thread_task::Blocked; |
750 | | |
751 | | /* TODO: check whether we are the first blocked task in the list. |
752 | | If we are, we have to conceal input errors. |
753 | | Simplest concealment: do not block. |
754 | | */ |
755 | |
|
756 | 0 | progresslock->wait_for_progress(progress); |
757 | 0 | task->state = thread_task::Running; |
758 | 0 | thread_unblocks(); |
759 | 0 | } |
760 | 0 | } |
761 | | |
762 | | |
763 | | void de265_image::wait_for_completion() |
764 | 0 | { |
765 | 0 | std::unique_lock<std::mutex> lock(mutex); |
766 | |
|
767 | 0 | while (nThreadsFinished!=nThreadsTotal) { |
768 | 0 | finished_cond.wait(lock); |
769 | 0 | } |
770 | 0 | } |
771 | | |
772 | | bool de265_image::debug_is_completed() const |
773 | 0 | { |
774 | 0 | return nThreadsFinished==nThreadsTotal; |
775 | 0 | } |
776 | | |
777 | | |
778 | | |
779 | | void de265_image::clear_metadata() |
780 | 0 | { |
781 | | // TODO: maybe we could avoid the memset by ensuring that all data is written to |
782 | | // during decoding (especially log2CbSize), but it is unlikely to be faster than the memset. |
783 | |
|
784 | 0 | cb_info.clear(); |
785 | 0 | intraPredMode.clear(); |
786 | | //tu_info.clear(); // done on the fly |
787 | 0 | ctb_info.clear(); |
788 | 0 | deblk_info.clear(); |
789 | | |
790 | | // --- reset CTB progresses --- |
791 | |
|
792 | 0 | for (int i=0;i<ctb_info.data_size;i++) { |
793 | 0 | ctb_progress[i].reset(CTB_PROGRESS_NONE); |
794 | 0 | } |
795 | 0 | } |
796 | | |
797 | | |
798 | | void de265_image::set_mv_info(int x,int y, int nPbW,int nPbH, const PBMotion& mv) |
799 | 0 | { |
800 | 0 | int log2PuSize = 2; |
801 | |
|
802 | 0 | int xPu = x >> log2PuSize; |
803 | 0 | int yPu = y >> log2PuSize; |
804 | 0 | int wPu = nPbW >> log2PuSize; |
805 | 0 | int hPu = nPbH >> log2PuSize; |
806 | |
|
807 | 0 | int stride = pb_info.width_in_units; |
808 | |
|
809 | 0 | for (int pby=0;pby<hPu;pby++) |
810 | 0 | for (int pbx=0;pbx<wPu;pbx++) |
811 | 0 | { |
812 | 0 | pb_info[ xPu+pbx + (yPu+pby)*stride ] = mv; |
813 | 0 | } |
814 | 0 | } |
815 | | |
816 | | |
817 | | bool de265_image::available_zscan(int xCurr,int yCurr, int xN,int yN) const |
818 | 0 | { |
819 | 0 | if (xN<0 || yN<0) return false; |
820 | 0 | if (xN>=sps->pic_width_in_luma_samples || |
821 | 0 | yN>=sps->pic_height_in_luma_samples) return false; |
822 | | |
823 | 0 | int minBlockAddrN = pps->MinTbAddrZS[ (xN>>sps->Log2MinTrafoSize) + |
824 | 0 | (yN>>sps->Log2MinTrafoSize) * sps->PicWidthInTbsY ]; |
825 | 0 | int minBlockAddrCurr = pps->MinTbAddrZS[ (xCurr>>sps->Log2MinTrafoSize) + |
826 | 0 | (yCurr>>sps->Log2MinTrafoSize) * sps->PicWidthInTbsY ]; |
827 | |
|
828 | 0 | if (minBlockAddrN > minBlockAddrCurr) return false; |
829 | | |
830 | 0 | int xCurrCtb = xCurr >> sps->Log2CtbSizeY; |
831 | 0 | int yCurrCtb = yCurr >> sps->Log2CtbSizeY; |
832 | 0 | int xNCtb = xN >> sps->Log2CtbSizeY; |
833 | 0 | int yNCtb = yN >> sps->Log2CtbSizeY; |
834 | |
|
835 | 0 | if (get_SliceAddrRS(xCurrCtb,yCurrCtb) != |
836 | 0 | get_SliceAddrRS(xNCtb, yNCtb)) { |
837 | 0 | return false; |
838 | 0 | } |
839 | | |
840 | 0 | if (pps->TileIdRS[xCurrCtb + yCurrCtb*sps->PicWidthInCtbsY] != |
841 | 0 | pps->TileIdRS[xNCtb + yNCtb *sps->PicWidthInCtbsY]) { |
842 | 0 | return false; |
843 | 0 | } |
844 | | |
845 | 0 | return true; |
846 | 0 | } |
847 | | |
848 | | |
849 | | bool de265_image::available_pred_blk(int xC,int yC, int nCbS, int xP, int yP, |
850 | | int nPbW, int nPbH, int partIdx, int xN,int yN) const |
851 | 0 | { |
852 | 0 | logtrace(LogMotion,"C:%d;%d P:%d;%d N:%d;%d size=%d;%d\n",xC,yC,xP,yP,xN,yN,nPbW,nPbH); |
853 | |
|
854 | 0 | int sameCb = (xC <= xN && xN < xC+nCbS && |
855 | 0 | yC <= yN && yN < yC+nCbS); |
856 | |
|
857 | 0 | bool availableN; |
858 | |
|
859 | 0 | if (!sameCb) { |
860 | 0 | availableN = available_zscan(xP,yP,xN,yN); |
861 | 0 | } |
862 | 0 | else { |
863 | 0 | availableN = !(nPbW<<1 == nCbS && nPbH<<1 == nCbS && // NxN |
864 | 0 | partIdx==1 && |
865 | 0 | yN >= yC+nPbH && xN < xC+nPbW); // xN/yN inside partIdx 2 |
866 | 0 | } |
867 | |
|
868 | 0 | if (availableN && get_pred_mode(xN,yN) == MODE_INTRA) { |
869 | 0 | availableN = false; |
870 | 0 | } |
871 | |
|
872 | 0 | return availableN; |
873 | 0 | } |