/src/libde265/libde265/image.cc
Line | Count | Source |
1 | | /* |
2 | | * H.265 video codec. |
3 | | * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de> |
4 | | * |
5 | | * This file is part of libde265. |
6 | | * |
7 | | * libde265 is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as |
9 | | * published by the Free Software Foundation, either version 3 of |
10 | | * the License, or (at your option) any later version. |
11 | | * |
12 | | * libde265 is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libde265. If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "image.h" |
22 | | #include "decctx.h" |
23 | | |
24 | | #include <atomic> |
25 | | |
26 | | #include <stdlib.h> |
27 | | #include <string.h> |
28 | | #include <assert.h> |
29 | | |
30 | | #include <limits> |
31 | | |
32 | | |
33 | | #ifdef HAVE_MALLOC_H |
34 | | #include <malloc.h> |
35 | | #endif |
36 | | |
37 | | #ifdef HAVE_SSE4_1 |
38 | | // SSE code processes 128bit per iteration and thus might read more data |
39 | | // than is later actually used. |
40 | | #define MEMORY_PADDING 16 |
41 | | #else |
42 | | #define MEMORY_PADDING 0 |
43 | | #endif |
44 | | |
45 | 13.8k | #define STANDARD_ALIGNMENT 16 |
46 | | |
47 | | #ifdef HAVE___MINGW_ALIGNED_MALLOC |
48 | | #define ALLOC_ALIGNED(alignment, size) __mingw_aligned_malloc((size), (alignment)) |
49 | | #define FREE_ALIGNED(mem) __mingw_aligned_free((mem)) |
50 | | #elif _WIN32 |
51 | | #define ALLOC_ALIGNED(alignment, size) _aligned_malloc((size), (alignment)) |
52 | | #define FREE_ALIGNED(mem) _aligned_free((mem)) |
53 | | #elif defined(HAVE_POSIX_MEMALIGN) |
54 | 36.0k | static inline void *ALLOC_ALIGNED(size_t alignment, size_t size) { |
55 | 36.0k | void *mem = nullptr; |
56 | 36.0k | if (posix_memalign(&mem, alignment, size) != 0) { |
57 | 0 | return nullptr; |
58 | 0 | } |
59 | 36.0k | return mem; |
60 | 36.0k | }; |
61 | 36.0k | #define FREE_ALIGNED(mem) free((mem)) |
62 | | #else |
63 | | #define ALLOC_ALIGNED(alignment, size) memalign((alignment), (size)) |
64 | | #define FREE_ALIGNED(mem) free((mem)) |
65 | | #endif |
66 | | |
67 | 36.0k | #define ALLOC_ALIGNED_16(size) ALLOC_ALIGNED(16, size) |
68 | | |
69 | | LIBDE265_API void* de265_alloc_image_plane(struct de265_image* img, int cIdx, |
70 | | void* inputdata, int inputstride, void *userdata) |
71 | 0 | { |
72 | 0 | int alignment = STANDARD_ALIGNMENT; |
73 | 0 | int stride = (img->get_width(cIdx) + alignment-1) / alignment * alignment; |
74 | 0 | int height = img->get_height(cIdx); |
75 | |
|
76 | 0 | uint8_t* p = static_cast<uint8_t*>(ALLOC_ALIGNED_16(stride * height + MEMORY_PADDING)); |
77 | |
|
78 | 0 | if (p==nullptr) { return nullptr; } |
79 | | |
80 | 0 | img->set_image_plane(cIdx, p, stride, userdata); |
81 | | |
82 | | // copy input data if provided |
83 | |
|
84 | 0 | if (inputdata != nullptr) { |
85 | 0 | if (inputstride == stride) { |
86 | 0 | memcpy(p, inputdata, stride*height); |
87 | 0 | } |
88 | 0 | else { |
89 | 0 | for (int y=0;y<height;y++) { |
90 | 0 | memcpy(p+y*stride, static_cast<char*>(inputdata) + inputstride*y, inputstride); |
91 | 0 | } |
92 | 0 | } |
93 | 0 | } |
94 | |
|
95 | 0 | return p; |
96 | 0 | } |
97 | | |
98 | | |
99 | | LIBDE265_API void de265_free_image_plane(struct de265_image* img, int cIdx) |
100 | 0 | { |
101 | 0 | uint8_t* p = img->get_image_plane(cIdx); |
102 | 0 | assert(p); |
103 | 0 | FREE_ALIGNED(p); |
104 | 0 | } |
105 | | |
106 | | |
107 | | static int de265_image_get_buffer(de265_decoder_context* ctx, |
108 | | de265_image_spec* spec, de265_image* img, void* userdata) |
109 | 13.8k | { |
110 | 13.8k | const int rawChromaWidth = spec->width / img->SubWidthC; |
111 | 13.8k | const int rawChromaHeight = spec->height / img->SubHeightC; |
112 | | |
113 | 13.8k | int luma_stride = (spec->width + spec->alignment-1) / spec->alignment * spec->alignment; |
114 | 13.8k | int chroma_stride = (rawChromaWidth + spec->alignment-1) / spec->alignment * spec->alignment; |
115 | | |
116 | 13.8k | assert(img->BitDepth_Y >= 8 && img->BitDepth_Y <= 16); |
117 | 13.8k | assert(img->BitDepth_C >= 8 && img->BitDepth_C <= 16); |
118 | | |
119 | 13.8k | int luma_bpl = luma_stride * ((img->BitDepth_Y+7)/8); |
120 | 13.8k | int chroma_bpl = chroma_stride * ((img->BitDepth_C+7)/8); |
121 | | |
122 | 13.8k | int luma_height = spec->height; |
123 | 13.8k | int chroma_height = rawChromaHeight; |
124 | | |
125 | 13.8k | bool alloc_failed = false; |
126 | | |
127 | 13.8k | uint8_t* p[3] = { 0,0,0 }; |
128 | 13.8k | p[0] = static_cast<uint8_t*>(ALLOC_ALIGNED_16(luma_height * luma_bpl + MEMORY_PADDING)); |
129 | 13.8k | if (p[0]==nullptr) { alloc_failed=true; } |
130 | | |
131 | 13.8k | if (img->get_chroma_format() != de265_chroma_mono) { |
132 | 11.0k | p[1] = static_cast<uint8_t*>(ALLOC_ALIGNED_16(chroma_height * chroma_bpl + MEMORY_PADDING)); |
133 | 11.0k | p[2] = static_cast<uint8_t*>(ALLOC_ALIGNED_16(chroma_height * chroma_bpl + MEMORY_PADDING)); |
134 | | |
135 | 11.0k | if (p[1]==nullptr || p[2]==nullptr) { alloc_failed=true; } |
136 | 11.0k | } |
137 | 2.81k | else { |
138 | 2.81k | p[1] = nullptr; |
139 | 2.81k | p[2] = nullptr; |
140 | 2.81k | chroma_stride = 0; |
141 | 2.81k | } |
142 | | |
143 | 13.8k | if (alloc_failed) { |
144 | 0 | for (int i=0;i<3;i++) |
145 | 0 | if (p[i]) { |
146 | 0 | FREE_ALIGNED(p[i]); |
147 | 0 | } |
148 | |
|
149 | 0 | return 0; |
150 | 0 | } |
151 | | |
152 | 13.8k | img->set_image_plane(0, p[0], luma_stride, nullptr); |
153 | 13.8k | img->set_image_plane(1, p[1], chroma_stride, nullptr); |
154 | 13.8k | img->set_image_plane(2, p[2], chroma_stride, nullptr); |
155 | | |
156 | 13.8k | img->fill_image(0,0,0); |
157 | | |
158 | 13.8k | return 1; |
159 | 13.8k | } |
160 | | |
161 | | static void de265_image_release_buffer(de265_decoder_context* ctx, |
162 | | de265_image* img, void* userdata) |
163 | 13.8k | { |
164 | 55.5k | for (int i=0;i<3;i++) { |
165 | 41.6k | uint8_t* p = img->get_image_plane(i); |
166 | 41.6k | if (p) { |
167 | 36.0k | FREE_ALIGNED(p); |
168 | 36.0k | } |
169 | 41.6k | } |
170 | 13.8k | } |
171 | | |
172 | | |
173 | | de265_image_allocation de265_image::default_image_allocation = { |
174 | | de265_image_get_buffer, |
175 | | de265_image_release_buffer |
176 | | }; |
177 | | |
178 | | |
179 | | void de265_image::set_image_plane(int cIdx, uint8_t* mem, int stride, void *userdata) |
180 | 41.6k | { |
181 | 41.6k | pixels[cIdx] = mem; |
182 | 41.6k | plane_user_data[cIdx] = userdata; |
183 | | |
184 | 41.6k | if (cIdx==0) { this->stride = stride; } |
185 | 27.7k | else { this->chroma_stride = stride; } |
186 | 41.6k | } |
187 | | |
188 | | |
189 | 15.2k | de265_image::de265_image() = default; |
190 | | |
191 | | |
192 | | de265_error de265_image::alloc_image(int w,int h, enum de265_chroma c, |
193 | | std::shared_ptr<const seq_parameter_set> sps, bool allocMetadata, |
194 | | decoder_context* dctx, |
195 | | //encoder_context* ectx, |
196 | | de265_PTS pts, void* user_data, |
197 | | bool useCustomAllocFunc) |
198 | 13.8k | { |
199 | | //if (allocMetadata) { assert(sps); } |
200 | 13.8k | if (allocMetadata) { assert(sps); } |
201 | | |
202 | 13.8k | if (sps) { this->sps = sps; } |
203 | | |
204 | 13.8k | release(); /* TODO: review code for efficient allocation when arrays are already |
205 | | allocated to the requested size. Without the release, the old image-data |
206 | | will not be freed. */ |
207 | | |
208 | 13.8k | static std::atomic<uint32_t> s_next_image_ID(0); |
209 | 13.8k | ID = s_next_image_ID++; |
210 | 13.8k | removed_at_picture_id = std::numeric_limits<uint32_t>::max(); |
211 | | |
212 | 13.8k | decctx = dctx; |
213 | | //encctx = ectx; |
214 | | |
215 | | // --- allocate image buffer --- |
216 | | |
217 | 13.8k | chroma_format= c; |
218 | | |
219 | 13.8k | width = w; |
220 | 13.8k | height = h; |
221 | 13.8k | chroma_width = w; |
222 | 13.8k | chroma_height= h; |
223 | | |
224 | 13.8k | this->user_data = user_data; |
225 | 13.8k | this->pts = pts; |
226 | | |
227 | 13.8k | de265_image_spec spec; |
228 | | |
229 | 13.8k | uint8_t WinUnitX, WinUnitY; |
230 | | |
231 | 13.8k | switch (chroma_format) { |
232 | 2.81k | case de265_chroma_mono: WinUnitX=1; WinUnitY=1; break; |
233 | 336 | case de265_chroma_420: WinUnitX=2; WinUnitY=2; break; |
234 | 262 | case de265_chroma_422: WinUnitX=2; WinUnitY=1; break; |
235 | 10.4k | case de265_chroma_444: WinUnitX=1; WinUnitY=1; break; |
236 | 0 | default: |
237 | 0 | assert(0); |
238 | 0 | WinUnitX = WinUnitY = 0; |
239 | 13.8k | } |
240 | | |
241 | 13.8k | switch (chroma_format) { |
242 | 336 | case de265_chroma_420: |
243 | 336 | spec.format = de265_image_format_YUV420P8; |
244 | 336 | chroma_width = (chroma_width +1)/2; |
245 | 336 | chroma_height = (chroma_height+1)/2; |
246 | 336 | SubWidthC = 2; |
247 | 336 | SubHeightC = 2; |
248 | 336 | break; |
249 | | |
250 | 262 | case de265_chroma_422: |
251 | 262 | spec.format = de265_image_format_YUV422P8; |
252 | 262 | chroma_width = (chroma_width+1)/2; |
253 | 262 | SubWidthC = 2; |
254 | 262 | SubHeightC = 1; |
255 | 262 | break; |
256 | | |
257 | 10.4k | case de265_chroma_444: |
258 | 10.4k | spec.format = de265_image_format_YUV444P8; |
259 | 10.4k | SubWidthC = 1; |
260 | 10.4k | SubHeightC = 1; |
261 | 10.4k | break; |
262 | | |
263 | 2.81k | case de265_chroma_mono: |
264 | 2.81k | spec.format = de265_image_format_mono8; |
265 | 2.81k | chroma_width = 0; |
266 | 2.81k | chroma_height= 0; |
267 | 2.81k | SubWidthC = 1; |
268 | 2.81k | SubHeightC = 1; |
269 | 2.81k | break; |
270 | | |
271 | 0 | default: |
272 | 0 | assert(false); |
273 | 0 | break; |
274 | 13.8k | } |
275 | | |
276 | 13.8k | if (chroma_format != de265_chroma_mono && sps) { |
277 | 11.0k | assert(sps->SubWidthC == SubWidthC); |
278 | 11.0k | assert(sps->SubHeightC == SubHeightC); |
279 | 11.0k | } |
280 | | |
281 | 13.8k | spec.width = w; |
282 | 13.8k | spec.height = h; |
283 | 13.8k | spec.alignment = STANDARD_ALIGNMENT; |
284 | | |
285 | | |
286 | | // conformance window cropping |
287 | | |
288 | 13.8k | int left = sps ? sps->conf_win_left_offset : 0; |
289 | 13.8k | int right = sps ? sps->conf_win_right_offset : 0; |
290 | 13.8k | int top = sps ? sps->conf_win_top_offset : 0; |
291 | 13.8k | int bottom = sps ? sps->conf_win_bottom_offset : 0; |
292 | | |
293 | 13.8k | if ((left+right)*WinUnitX >= width) { |
294 | 0 | return DE265_ERROR_CODED_PARAMETER_OUT_OF_RANGE; |
295 | 0 | } |
296 | | |
297 | 13.8k | if ((top+bottom)*WinUnitY >= height) { |
298 | 0 | return DE265_ERROR_CODED_PARAMETER_OUT_OF_RANGE; |
299 | 0 | } |
300 | | |
301 | 13.8k | width_confwin = width - (left+right)*WinUnitX; |
302 | 13.8k | height_confwin= height- (top+bottom)*WinUnitY; |
303 | 13.8k | chroma_width_confwin = chroma_width -left-right; |
304 | 13.8k | chroma_height_confwin= chroma_height-top-bottom; |
305 | | |
306 | 13.8k | spec.crop_left = left *WinUnitX; |
307 | 13.8k | spec.crop_right = right*WinUnitX; |
308 | 13.8k | spec.crop_top = top *WinUnitY; |
309 | 13.8k | spec.crop_bottom= bottom*WinUnitY; |
310 | | |
311 | 13.8k | spec.visible_width = width_confwin; |
312 | 13.8k | spec.visible_height= height_confwin; |
313 | | |
314 | | |
315 | 13.8k | BitDepth_Y = (sps==nullptr) ? 8 : sps->BitDepth_Y; |
316 | 13.8k | BitDepth_C = (sps==nullptr) ? 8 : sps->BitDepth_C; |
317 | | |
318 | 13.8k | bpp_shift[0] = (BitDepth_Y <= 8) ? 0 : 1; |
319 | 13.8k | bpp_shift[1] = (BitDepth_C <= 8) ? 0 : 1; |
320 | 13.8k | bpp_shift[2] = bpp_shift[1]; |
321 | | |
322 | | |
323 | | // allocate memory and set conformance window pointers |
324 | | |
325 | 13.8k | void* alloc_userdata = nullptr; |
326 | 13.8k | if (decctx) alloc_userdata = decctx->param_image_allocation_userdata; |
327 | | // if (encctx) alloc_userdata = encctx->param_image_allocation_userdata; // actually not needed |
328 | | |
329 | | /* |
330 | | if (encctx && useCustomAllocFunc) { |
331 | | encoder_image_release_func = encctx->release_func; |
332 | | |
333 | | // if we do not provide a release function, use our own |
334 | | |
335 | | if (encoder_image_release_func == nullptr) { |
336 | | image_allocation_functions = de265_image::default_image_allocation; |
337 | | } |
338 | | else { |
339 | | image_allocation_functions.get_buffer = nullptr; |
340 | | image_allocation_functions.release_buffer = nullptr; |
341 | | } |
342 | | } |
343 | 13.8k | else*/ if (decctx && useCustomAllocFunc) { |
344 | 4.44k | image_allocation_functions = decctx->param_image_allocation_functions; |
345 | 4.44k | } |
346 | 9.44k | else { |
347 | 9.44k | image_allocation_functions = de265_image::default_image_allocation; |
348 | 9.44k | } |
349 | | |
350 | 13.8k | bool mem_alloc_success = true; |
351 | | |
352 | 13.8k | if (image_allocation_functions.get_buffer != nullptr) { |
353 | 13.8k | mem_alloc_success = image_allocation_functions.get_buffer(decctx, &spec, this, |
354 | 13.8k | alloc_userdata); |
355 | | |
356 | 13.8k | pixels_confwin[0] = pixels[0] + left*WinUnitX + top*WinUnitY*stride; |
357 | | |
358 | 13.8k | if (chroma_format != de265_chroma_mono) { |
359 | 11.0k | pixels_confwin[1] = pixels[1] + left + top*chroma_stride; |
360 | 11.0k | pixels_confwin[2] = pixels[2] + left + top*chroma_stride; |
361 | 11.0k | } |
362 | 2.81k | else { |
363 | 2.81k | pixels_confwin[1] = nullptr; |
364 | 2.81k | pixels_confwin[2] = nullptr; |
365 | 2.81k | } |
366 | | |
367 | | // check for memory shortage |
368 | | |
369 | 13.8k | if (!mem_alloc_success) |
370 | 0 | { |
371 | 0 | return DE265_ERROR_OUT_OF_MEMORY; |
372 | 0 | } |
373 | 13.8k | } |
374 | | |
375 | | //alloc_functions = *allocfunc; |
376 | | //alloc_userdata = userdata; |
377 | | |
378 | | // --- allocate decoding info arrays --- |
379 | | |
380 | 13.8k | if (allocMetadata) { |
381 | | // intra pred mode |
382 | | |
383 | 10.3k | mem_alloc_success &= intraPredMode.alloc(sps->PicWidthInMinPUs, sps->PicHeightInMinPUs, |
384 | 10.3k | sps->Log2MinPUSize); |
385 | | |
386 | 10.3k | mem_alloc_success &= intraPredModeC.alloc(sps->PicWidthInMinPUs, sps->PicHeightInMinPUs, |
387 | 10.3k | sps->Log2MinPUSize); |
388 | | |
389 | | // cb info |
390 | | |
391 | 10.3k | mem_alloc_success &= cb_info.alloc(sps->PicWidthInMinCbsY, sps->PicHeightInMinCbsY, |
392 | 10.3k | sps->Log2MinCbSizeY); |
393 | | |
394 | | // pb info |
395 | | |
396 | 10.3k | int puWidth = sps->PicWidthInMinCbsY << (sps->Log2MinCbSizeY -2); |
397 | 10.3k | int puHeight = sps->PicHeightInMinCbsY << (sps->Log2MinCbSizeY -2); |
398 | | |
399 | 10.3k | mem_alloc_success &= pb_info.alloc(puWidth,puHeight, 2); |
400 | | |
401 | | |
402 | | // tu info |
403 | | |
404 | 10.3k | mem_alloc_success &= tu_info.alloc(sps->PicWidthInTbsY, sps->PicHeightInTbsY, |
405 | 10.3k | sps->Log2MinTrafoSize); |
406 | | |
407 | | // deblk info |
408 | | |
409 | 10.3k | int deblk_w = (sps->pic_width_in_luma_samples +3)/4; |
410 | 10.3k | int deblk_h = (sps->pic_height_in_luma_samples+3)/4; |
411 | | |
412 | 10.3k | mem_alloc_success &= deblk_info.alloc(deblk_w, deblk_h, 2); |
413 | | |
414 | | // CTB info |
415 | | |
416 | 10.3k | if (ctb_info.width_in_units != sps->PicWidthInCtbsY || |
417 | 31 | ctb_info.height_in_units != sps->PicHeightInCtbsY || |
418 | 31 | ctb_info.log2unitSize != sps->Log2CtbSizeY) |
419 | 10.2k | { |
420 | 10.2k | delete[] ctb_progress; |
421 | | |
422 | 10.2k | mem_alloc_success &= ctb_info.alloc(sps->PicWidthInCtbsY, sps->PicHeightInCtbsY, |
423 | 10.2k | sps->Log2CtbSizeY); |
424 | | |
425 | 10.2k | ctb_progress = new de265_progress_lock[ ctb_info.data_size ]; |
426 | 10.2k | } |
427 | | |
428 | | |
429 | | // check for memory shortage |
430 | | |
431 | 10.3k | if (!mem_alloc_success) |
432 | 0 | { |
433 | 0 | return DE265_ERROR_OUT_OF_MEMORY; |
434 | 0 | } |
435 | 10.3k | } |
436 | | |
437 | 13.8k | return DE265_OK; |
438 | 13.8k | } |
439 | | |
440 | | |
441 | | de265_image::~de265_image() |
442 | 15.2k | { |
443 | 15.2k | release(); |
444 | | |
445 | | // free progress locks |
446 | | |
447 | 15.2k | if (ctb_progress) { |
448 | 10.2k | delete[] ctb_progress; |
449 | 10.2k | } |
450 | 15.2k | } |
451 | | |
452 | | |
453 | | void de265_image::release() |
454 | 29.1k | { |
455 | | // free image memory |
456 | | |
457 | 29.1k | if (pixels[0]) |
458 | 13.8k | { |
459 | | /* |
460 | | if (encoder_image_release_func != nullptr) { |
461 | | encoder_image_release_func(encctx, this, |
462 | | encctx->param_image_allocation_userdata); |
463 | | } |
464 | 13.8k | else*/ { |
465 | 13.8k | image_allocation_functions.release_buffer(decctx, this, |
466 | 13.8k | decctx ? |
467 | 13.8k | decctx->param_image_allocation_userdata : |
468 | 13.8k | nullptr); |
469 | 13.8k | } |
470 | | |
471 | 55.5k | for (int i=0;i<3;i++) |
472 | 41.6k | { |
473 | 41.6k | pixels[i] = nullptr; |
474 | 41.6k | pixels_confwin[i] = nullptr; |
475 | 41.6k | } |
476 | 13.8k | } |
477 | | |
478 | | // free slices |
479 | | |
480 | 34.9k | for (size_t i=0;i<slices.size();i++) { |
481 | 5.77k | delete slices[i]; |
482 | 5.77k | } |
483 | 29.1k | slices.clear(); |
484 | 29.1k | } |
485 | | |
486 | | |
487 | | void de265_image::fill_plane(int channel, int value) |
488 | 48.5k | { |
489 | 48.5k | int bytes_per_pixel = get_bytes_per_pixel(channel); |
490 | 48.5k | assert(value >= 0); // needed for the shift operation in the check below |
491 | | |
492 | 48.5k | if (bytes_per_pixel == 1) { |
493 | 33.6k | if (channel==0) { |
494 | 13.1k | memset(pixels[channel], value, stride * height); |
495 | 13.1k | } |
496 | 20.4k | else { |
497 | 20.4k | memset(pixels[channel], value, chroma_stride * chroma_height); |
498 | 20.4k | } |
499 | 33.6k | } |
500 | 14.9k | else if ((value >> 8) == (value & 0xFF)) { |
501 | 11.0k | assert(bytes_per_pixel == 2); |
502 | | |
503 | | // if we fill the same byte value to all bytes, we can still use memset() |
504 | 11.0k | if (channel==0) { |
505 | 4.43k | memset(pixels[channel], 0, stride * height * bytes_per_pixel); |
506 | 4.43k | } |
507 | 6.59k | else { |
508 | 6.59k | memset(pixels[channel], 0, chroma_stride * chroma_height * bytes_per_pixel); |
509 | 6.59k | } |
510 | 11.0k | } |
511 | 3.89k | else { |
512 | 3.89k | assert(bytes_per_pixel == 2); |
513 | 3.89k | uint16_t v = value; |
514 | | |
515 | 3.89k | if (channel==0) { |
516 | | // copy value into first row |
517 | 481k | for (int x = 0; x < width; x++) { |
518 | 479k | *reinterpret_cast<uint16_t*>(&pixels[channel][2 * x]) = v; |
519 | 479k | } |
520 | | |
521 | | // copy first row into remaining rows |
522 | 203k | for (int y = 1; y < height; y++) { |
523 | 201k | memcpy(pixels[channel] + y * stride * 2, pixels[channel], chroma_width * 2); |
524 | 201k | } |
525 | 1.55k | } |
526 | 2.34k | else { |
527 | | // copy value into first row |
528 | 412k | for (int x = 0; x < chroma_width; x++) { |
529 | 410k | *reinterpret_cast<uint16_t*>(&pixels[channel][2 * x]) = v; |
530 | 410k | } |
531 | | |
532 | | // copy first row into remaining rows |
533 | 277k | for (int y = 1; y < chroma_height; y++) { |
534 | 274k | memcpy(pixels[channel] + y * chroma_stride * 2, pixels[channel], chroma_width * 2); |
535 | 274k | } |
536 | 2.34k | } |
537 | 3.89k | } |
538 | 48.5k | } |
539 | | |
540 | | |
541 | | void de265_image::fill_image(int y,int cb,int cr) |
542 | 19.1k | { |
543 | 19.1k | if (pixels[0]) { |
544 | 19.1k | fill_plane(0, y); |
545 | 19.1k | } |
546 | | |
547 | 19.1k | if (pixels[1]) { |
548 | 14.7k | fill_plane(1, cb); |
549 | 14.7k | } |
550 | | |
551 | 19.1k | if (pixels[2]) { |
552 | 14.7k | fill_plane(2, cr); |
553 | 14.7k | } |
554 | 19.1k | } |
555 | | |
556 | | |
557 | | de265_error de265_image::copy_image(const de265_image* src) |
558 | 0 | { |
559 | | /* TODO: actually, since we allocate the image only for internal purpose, we |
560 | | do not have to call the external allocation routines for this. However, then |
561 | | we have to track for each image how to release it again. |
562 | | Another option would be to safe the copied data not in an de265_image at all. |
563 | | */ |
564 | |
|
565 | 0 | de265_error err = alloc_image(src->width, src->height, src->chroma_format, src->sps, false, |
566 | 0 | src->decctx, /*src->encctx,*/ src->pts, src->user_data, false); |
567 | 0 | if (err != DE265_OK) { |
568 | 0 | return err; |
569 | 0 | } |
570 | | |
571 | 0 | copy_lines_from(src, 0, src->height); |
572 | |
|
573 | 0 | return err; |
574 | 0 | } |
575 | | |
576 | | |
577 | | // end = last line + 1 |
578 | | void de265_image::copy_lines_from(const de265_image* src, int first, int end) |
579 | 23.4k | { |
580 | 23.4k | if (end > src->height) end=src->height; |
581 | | |
582 | 23.4k | assert(first % 2 == 0); |
583 | 23.4k | assert(end % 2 == 0); |
584 | | |
585 | 23.4k | int luma_bpp = (sps->BitDepth_Y+7)/8; |
586 | 23.4k | int chroma_bpp = (sps->BitDepth_C+7)/8; |
587 | | |
588 | 23.4k | if (src->stride == stride) { |
589 | 23.4k | memcpy(pixels[0] + first*stride * luma_bpp, |
590 | 23.4k | src->pixels[0] + first*src->stride * luma_bpp, |
591 | 23.4k | (end-first)*stride * luma_bpp); |
592 | 23.4k | } |
593 | 0 | else { |
594 | 0 | for (int yp=first;yp<end;yp++) { |
595 | 0 | memcpy(pixels[0]+yp*stride * luma_bpp, |
596 | 0 | src->pixels[0]+yp*src->stride * luma_bpp, |
597 | 0 | src->width * luma_bpp); |
598 | 0 | } |
599 | 0 | } |
600 | | |
601 | 23.4k | int first_chroma = first / src->SubHeightC; |
602 | 23.4k | int end_chroma = end / src->SubHeightC; |
603 | | |
604 | 23.4k | if (src->chroma_format != de265_chroma_mono) { |
605 | 11.1k | if (src->chroma_stride == chroma_stride) { |
606 | 11.1k | memcpy(pixels[1] + first_chroma*chroma_stride * chroma_bpp, |
607 | 11.1k | src->pixels[1] + first_chroma*chroma_stride * chroma_bpp, |
608 | 11.1k | (end_chroma-first_chroma) * chroma_stride * chroma_bpp); |
609 | 11.1k | memcpy(pixels[2] + first_chroma*chroma_stride * chroma_bpp, |
610 | 11.1k | src->pixels[2] + first_chroma*chroma_stride * chroma_bpp, |
611 | 11.1k | (end_chroma-first_chroma) * chroma_stride * chroma_bpp); |
612 | 11.1k | } |
613 | 0 | else { |
614 | 0 | for (int y=first_chroma;y<end_chroma;y++) { |
615 | 0 | memcpy(pixels[1]+y*chroma_stride * chroma_bpp, |
616 | 0 | src->pixels[1]+y*src->chroma_stride * chroma_bpp, |
617 | 0 | src->chroma_width * chroma_bpp); |
618 | 0 | memcpy(pixels[2]+y*chroma_stride * chroma_bpp, |
619 | 0 | src->pixels[2]+y*src->chroma_stride * chroma_bpp, |
620 | 0 | src->chroma_width * chroma_bpp); |
621 | 0 | } |
622 | 0 | } |
623 | 11.1k | } |
624 | 23.4k | } |
625 | | |
626 | | |
627 | | void de265_image::exchange_pixel_data_with(de265_image& b) |
628 | 3.57k | { |
629 | 14.3k | for (int i=0;i<3;i++) { |
630 | 10.7k | std::swap(pixels[i], b.pixels[i]); |
631 | 10.7k | std::swap(pixels_confwin[i], b.pixels_confwin[i]); |
632 | 10.7k | std::swap(plane_user_data[i], b.plane_user_data[i]); |
633 | 10.7k | } |
634 | | |
635 | 3.57k | std::swap(stride, b.stride); |
636 | 3.57k | std::swap(chroma_stride, b.chroma_stride); |
637 | 3.57k | std::swap(image_allocation_functions, b.image_allocation_functions); |
638 | 3.57k | } |
639 | | |
640 | | |
641 | | void de265_image::thread_start(int nThreads) |
642 | 13.4k | { |
643 | 13.4k | std::unique_lock<std::mutex> lock(mutex); |
644 | | |
645 | | //printf("nThreads before: %d %d\n",nThreadsQueued, nThreadsTotal); |
646 | | |
647 | 13.4k | nThreadsQueued += nThreads; |
648 | 13.4k | nThreadsTotal += nThreads; |
649 | | |
650 | | //printf("nThreads after: %d %d\n",nThreadsQueued, nThreadsTotal); |
651 | 13.4k | } |
652 | | |
653 | | void de265_image::thread_run(const thread_task* task) |
654 | 82.0k | { |
655 | 82.0k | std::unique_lock<std::mutex> lock(mutex); |
656 | | |
657 | | //printf("run thread %s\n", task->name().c_str()); |
658 | | |
659 | 82.0k | nThreadsQueued--; |
660 | 82.0k | nThreadsRunning++; |
661 | 82.0k | } |
662 | | |
663 | | void de265_image::thread_blocks() |
664 | 0 | { |
665 | 0 | std::unique_lock<std::mutex> lock(mutex); |
666 | |
|
667 | 0 | nThreadsRunning--; |
668 | 0 | nThreadsBlocked++; |
669 | 0 | } |
670 | | |
671 | | void de265_image::thread_unblocks() |
672 | 0 | { |
673 | 0 | std::unique_lock<std::mutex> lock(mutex); |
674 | |
|
675 | 0 | nThreadsBlocked--; |
676 | 0 | nThreadsRunning++; |
677 | 0 | } |
678 | | |
679 | | void de265_image::thread_finishes(const thread_task* task) |
680 | 82.0k | { |
681 | | //printf("finish thread %s\n", task->name().c_str()); |
682 | | |
683 | 82.0k | std::unique_lock<std::mutex> lock(mutex); |
684 | | |
685 | 82.0k | nThreadsRunning--; |
686 | 82.0k | nThreadsFinished++; |
687 | 82.0k | assert(nThreadsRunning >= 0); |
688 | | |
689 | 82.0k | if (nThreadsFinished==nThreadsTotal) { |
690 | 5.70k | finished_cond.notify_all(); |
691 | 5.70k | } |
692 | 82.0k | } |
693 | | |
694 | | void de265_image::wait_for_progress(thread_task* task, int ctbx,int ctby, int progress) |
695 | 187k | { |
696 | 187k | const int ctbW = sps->PicWidthInCtbsY; |
697 | | |
698 | 187k | wait_for_progress(task, ctbx + ctbW*ctby, progress); |
699 | 187k | } |
700 | | |
701 | | void de265_image::wait_for_progress(thread_task* task, int ctbAddrRS, int progress) |
702 | 187k | { |
703 | 187k | if (task==nullptr) { return; } |
704 | | |
705 | 187k | de265_progress_lock* progresslock = &ctb_progress[ctbAddrRS]; |
706 | 187k | if (progresslock->get_progress() < progress) { |
707 | 0 | thread_blocks(); |
708 | |
|
709 | 0 | assert(task!=nullptr); |
710 | 0 | task->state = thread_task::Blocked; |
711 | | |
712 | | /* TODO: check whether we are the first blocked task in the list. |
713 | | If we are, we have to conceal input errors. |
714 | | Simplest concealment: do not block. |
715 | | */ |
716 | |
|
717 | 0 | progresslock->wait_for_progress(progress); |
718 | 0 | task->state = thread_task::Running; |
719 | 0 | thread_unblocks(); |
720 | 0 | } |
721 | 187k | } |
722 | | |
723 | | |
724 | | void de265_image::wait_for_completion() |
725 | 9.31k | { |
726 | 9.31k | std::unique_lock<std::mutex> lock(mutex); |
727 | | |
728 | 14.9k | while (nThreadsFinished!=nThreadsTotal) { |
729 | 5.67k | finished_cond.wait(lock); |
730 | 5.67k | } |
731 | 9.31k | } |
732 | | |
733 | | bool de265_image::debug_is_completed() const |
734 | 0 | { |
735 | 0 | return nThreadsFinished==nThreadsTotal; |
736 | 0 | } |
737 | | |
738 | | |
739 | | |
740 | | void de265_image::clear_metadata() |
741 | 5.06k | { |
742 | | // TODO: maybe we could avoid the memset by ensuring that all data is written to |
743 | | // during decoding (especially log2CbSize), but it is unlikely to be faster than the memset. |
744 | | |
745 | 5.06k | cb_info.clear(); |
746 | 5.06k | intraPredMode.clear(); |
747 | | //tu_info.clear(); // done on the fly |
748 | 5.06k | ctb_info.clear(); |
749 | 5.06k | deblk_info.clear(); |
750 | | |
751 | | // --- reset CTB progresses --- |
752 | | |
753 | 315k | for (int i=0;i<ctb_info.data_size;i++) { |
754 | 310k | ctb_progress[i].reset(CTB_PROGRESS_NONE); |
755 | 310k | } |
756 | 5.06k | } |
757 | | |
758 | | |
759 | | void de265_image::set_mv_info(int x,int y, int nPbW,int nPbH, const PBMotion& mv) |
760 | 427k | { |
761 | 427k | int log2PuSize = 2; |
762 | | |
763 | 427k | int xPu = x >> log2PuSize; |
764 | 427k | int yPu = y >> log2PuSize; |
765 | 427k | int wPu = nPbW >> log2PuSize; |
766 | 427k | int hPu = nPbH >> log2PuSize; |
767 | | |
768 | 427k | int stride = pb_info.width_in_units; |
769 | | |
770 | 1.36M | for (int pby=0;pby<hPu;pby++) |
771 | 3.48M | for (int pbx=0;pbx<wPu;pbx++) |
772 | 2.55M | { |
773 | 2.55M | pb_info[ xPu+pbx + (yPu+pby)*stride ] = mv; |
774 | 2.55M | } |
775 | 427k | } |
776 | | |
777 | | |
778 | | bool de265_image::available_zscan(int xCurr,int yCurr, int xN,int yN) const |
779 | 3.53M | { |
780 | 3.53M | if (xN<0 || yN<0) return false; |
781 | 2.85M | if (xN>=sps->pic_width_in_luma_samples || |
782 | 2.83M | yN>=sps->pic_height_in_luma_samples) return false; |
783 | | |
784 | 2.80M | int minBlockAddrN = pps->MinTbAddrZS[ (xN>>sps->Log2MinTrafoSize) + |
785 | 2.80M | (yN>>sps->Log2MinTrafoSize) * sps->PicWidthInTbsY ]; |
786 | 2.80M | int minBlockAddrCurr = pps->MinTbAddrZS[ (xCurr>>sps->Log2MinTrafoSize) + |
787 | 2.80M | (yCurr>>sps->Log2MinTrafoSize) * sps->PicWidthInTbsY ]; |
788 | | |
789 | 2.80M | if (minBlockAddrN > minBlockAddrCurr) return false; |
790 | | |
791 | 2.61M | int xCurrCtb = xCurr >> sps->Log2CtbSizeY; |
792 | 2.61M | int yCurrCtb = yCurr >> sps->Log2CtbSizeY; |
793 | 2.61M | int xNCtb = xN >> sps->Log2CtbSizeY; |
794 | 2.61M | int yNCtb = yN >> sps->Log2CtbSizeY; |
795 | | |
796 | 2.61M | if (get_SliceAddrRS(xCurrCtb,yCurrCtb) != |
797 | 2.61M | get_SliceAddrRS(xNCtb, yNCtb)) { |
798 | 24.0k | return false; |
799 | 24.0k | } |
800 | | |
801 | 2.59M | if (pps->TileIdRS[xCurrCtb + yCurrCtb*sps->PicWidthInCtbsY] != |
802 | 2.59M | pps->TileIdRS[xNCtb + yNCtb *sps->PicWidthInCtbsY]) { |
803 | 2.00k | return false; |
804 | 2.00k | } |
805 | | |
806 | 2.59M | return true; |
807 | 2.59M | } |
808 | | |
809 | | |
810 | | bool de265_image::available_pred_blk(int xC,int yC, int nCbS, int xP, int yP, |
811 | | int nPbW, int nPbH, int partIdx, int xN,int yN) const |
812 | 1.41M | { |
813 | 1.41M | logtrace(LogMotion,"C:%d;%d P:%d;%d N:%d;%d size=%d;%d\n",xC,yC,xP,yP,xN,yN,nPbW,nPbH); |
814 | | |
815 | 1.41M | int sameCb = (xC <= xN && xN < xC+nCbS && |
816 | 460k | yC <= yN && yN < yC+nCbS); |
817 | | |
818 | 1.41M | bool availableN; |
819 | | |
820 | 1.41M | if (!sameCb) { |
821 | 1.34M | availableN = available_zscan(xP,yP,xN,yN); |
822 | 1.34M | } |
823 | 68.3k | else { |
824 | 68.3k | availableN = !(nPbW<<1 == nCbS && nPbH<<1 == nCbS && // NxN |
825 | 3.22k | partIdx==1 && |
826 | 956 | yN >= yC+nPbH && xN < xC+nPbW); // xN/yN inside partIdx 2 |
827 | 68.3k | } |
828 | | |
829 | 1.41M | if (availableN && get_pred_mode(xN,yN) == MODE_INTRA) { |
830 | 31.9k | availableN = false; |
831 | 31.9k | } |
832 | | |
833 | 1.41M | return availableN; |
834 | 1.41M | } |