/src/ffmpeg/libswscale/slice.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com> |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "libavutil/mem.h" |
22 | | #include "swscale_internal.h" |
23 | | |
24 | | static void free_lines(SwsSlice *s) |
25 | 0 | { |
26 | 0 | int i; |
27 | 0 | for (i = 0; i < 2; ++i) { |
28 | 0 | int n = s->plane[i].available_lines; |
29 | 0 | int j; |
30 | 0 | for (j = 0; j < n; ++j) { |
31 | 0 | av_freep(&s->plane[i].line[j]); |
32 | 0 | if (s->is_ring) |
33 | 0 | s->plane[i].line[j+n] = NULL; |
34 | 0 | } |
35 | 0 | } |
36 | |
|
37 | 0 | for (i = 0; i < 4; ++i) |
38 | 0 | memset(s->plane[i].line, 0, sizeof(uint8_t*) * s->plane[i].available_lines * (s->is_ring ? 3 : 1)); |
39 | 0 | s->should_free_lines = 0; |
40 | 0 | } |
41 | | |
42 | | /* |
43 | | slice lines contains extra bytes for vectorial code thus @size |
44 | | is the allocated memory size and @width is the number of pixels |
45 | | */ |
46 | | static int alloc_lines(SwsSlice *s, int size, int width) |
47 | 0 | { |
48 | 0 | int i; |
49 | 0 | int idx[2] = {3, 2}; |
50 | |
|
51 | 0 | s->should_free_lines = 1; |
52 | 0 | s->width = width; |
53 | |
|
54 | 0 | for (i = 0; i < 2; ++i) { |
55 | 0 | int n = s->plane[i].available_lines; |
56 | 0 | int j; |
57 | 0 | int ii = idx[i]; |
58 | |
|
59 | 0 | av_assert0(n == s->plane[ii].available_lines); |
60 | 0 | for (j = 0; j < n; ++j) { |
61 | | // chroma plane line U and V are expected to be contiguous in memory |
62 | | // by mmx vertical scaler code |
63 | 0 | s->plane[i].line[j] = av_mallocz(size * 2 + 32); |
64 | 0 | if (!s->plane[i].line[j]) { |
65 | 0 | free_lines(s); |
66 | 0 | return AVERROR(ENOMEM); |
67 | 0 | } |
68 | 0 | s->plane[ii].line[j] = s->plane[i].line[j] + size + 16; |
69 | 0 | if (s->is_ring) { |
70 | 0 | s->plane[i].line[j+n] = s->plane[i].line[j]; |
71 | 0 | s->plane[ii].line[j+n] = s->plane[ii].line[j]; |
72 | 0 | } |
73 | 0 | } |
74 | 0 | } |
75 | | |
76 | 0 | return 0; |
77 | 0 | } |
78 | | |
79 | | static int alloc_slice(SwsSlice *s, enum AVPixelFormat fmt, int lumLines, int chrLines, int h_sub_sample, int v_sub_sample, int ring) |
80 | 0 | { |
81 | 0 | int i; |
82 | 0 | int size[4] = { lumLines, |
83 | 0 | chrLines, |
84 | 0 | chrLines, |
85 | 0 | lumLines }; |
86 | |
|
87 | 0 | s->h_chr_sub_sample = h_sub_sample; |
88 | 0 | s->v_chr_sub_sample = v_sub_sample; |
89 | 0 | s->fmt = fmt; |
90 | 0 | s->is_ring = ring; |
91 | 0 | s->should_free_lines = 0; |
92 | |
|
93 | 0 | for (i = 0; i < 4; ++i) { |
94 | 0 | int n = size[i] * ( ring == 0 ? 1 : 3); |
95 | 0 | s->plane[i].line = av_calloc(n, sizeof(*s->plane[i].line)); |
96 | 0 | if (!s->plane[i].line) |
97 | 0 | return AVERROR(ENOMEM); |
98 | | |
99 | 0 | s->plane[i].tmp = ring ? s->plane[i].line + size[i] * 2 : NULL; |
100 | 0 | s->plane[i].available_lines = size[i]; |
101 | 0 | s->plane[i].sliceY = 0; |
102 | 0 | s->plane[i].sliceH = 0; |
103 | 0 | } |
104 | 0 | return 0; |
105 | 0 | } |
106 | | |
107 | | static void free_slice(SwsSlice *s) |
108 | 0 | { |
109 | 0 | int i; |
110 | 0 | if (s) { |
111 | 0 | if (s->should_free_lines) |
112 | 0 | free_lines(s); |
113 | 0 | for (i = 0; i < 4; ++i) { |
114 | 0 | av_freep(&s->plane[i].line); |
115 | 0 | s->plane[i].tmp = NULL; |
116 | 0 | } |
117 | 0 | } |
118 | 0 | } |
119 | | |
120 | | int ff_rotate_slice(SwsSlice *s, int lum, int chr) |
121 | 0 | { |
122 | 0 | int i; |
123 | 0 | if (lum) { |
124 | 0 | for (i = 0; i < 4; i+=3) { |
125 | 0 | int n = s->plane[i].available_lines; |
126 | 0 | int l = lum - s->plane[i].sliceY; |
127 | |
|
128 | 0 | if (l >= n * 2) { |
129 | 0 | s->plane[i].sliceY += n; |
130 | 0 | s->plane[i].sliceH -= n; |
131 | 0 | } |
132 | 0 | } |
133 | 0 | } |
134 | 0 | if (chr) { |
135 | 0 | for (i = 1; i < 3; ++i) { |
136 | 0 | int n = s->plane[i].available_lines; |
137 | 0 | int l = chr - s->plane[i].sliceY; |
138 | |
|
139 | 0 | if (l >= n * 2) { |
140 | 0 | s->plane[i].sliceY += n; |
141 | 0 | s->plane[i].sliceH -= n; |
142 | 0 | } |
143 | 0 | } |
144 | 0 | } |
145 | 0 | return 0; |
146 | 0 | } |
147 | | |
148 | | int ff_init_slice_from_src(SwsSlice * s, uint8_t *const src[4], const int stride[4], |
149 | | int srcW, int lumY, int lumH, int chrY, int chrH, int relative) |
150 | 0 | { |
151 | 0 | int i = 0; |
152 | |
|
153 | 0 | const int start[4] = {lumY, |
154 | 0 | chrY, |
155 | 0 | chrY, |
156 | 0 | lumY}; |
157 | |
|
158 | 0 | const int end[4] = {lumY +lumH, |
159 | 0 | chrY + chrH, |
160 | 0 | chrY + chrH, |
161 | 0 | lumY + lumH}; |
162 | |
|
163 | 0 | s->width = srcW; |
164 | |
|
165 | 0 | for (i = 0; i < 4 && src[i] != NULL; ++i) { |
166 | 0 | uint8_t *const src_i = src[i] + (relative ? 0 : start[i]) * stride[i]; |
167 | 0 | int j; |
168 | 0 | int first = s->plane[i].sliceY; |
169 | 0 | int n = s->plane[i].available_lines; |
170 | 0 | int lines = end[i] - start[i]; |
171 | 0 | int tot_lines = end[i] - first; |
172 | |
|
173 | 0 | if (start[i] >= first && n >= tot_lines) { |
174 | 0 | s->plane[i].sliceH = FFMAX(tot_lines, s->plane[i].sliceH); |
175 | 0 | for (j = 0; j < lines; j+= 1) |
176 | 0 | s->plane[i].line[start[i] - first + j] = src_i + j * stride[i]; |
177 | 0 | } else { |
178 | 0 | s->plane[i].sliceY = start[i]; |
179 | 0 | lines = lines > n ? n : lines; |
180 | 0 | s->plane[i].sliceH = lines; |
181 | 0 | for (j = 0; j < lines; j+= 1) |
182 | 0 | s->plane[i].line[j] = src_i + j * stride[i]; |
183 | 0 | } |
184 | |
|
185 | 0 | } |
186 | |
|
187 | 0 | return 0; |
188 | 0 | } |
189 | | |
190 | | static void fill_ones(SwsSlice *s, int n, int bpc) |
191 | 0 | { |
192 | 0 | int i, j, k, size, end; |
193 | |
|
194 | 0 | for (i = 0; i < 4; ++i) { |
195 | 0 | size = s->plane[i].available_lines; |
196 | 0 | for (j = 0; j < size; ++j) { |
197 | 0 | if (bpc >= 16) { |
198 | 0 | end = (n>>1) + 1; |
199 | 0 | for (k = 0; k < end; ++k) |
200 | 0 | ((int32_t*)(s->plane[i].line[j]))[k] = 1<<18; |
201 | 0 | } else { |
202 | 0 | end = n + 1; |
203 | 0 | for (k = 0; k < end; ++k) |
204 | 0 | ((int16_t*)(s->plane[i].line[j]))[k] = 1<<14; |
205 | 0 | } |
206 | 0 | } |
207 | 0 | } |
208 | 0 | } |
209 | | |
210 | | /* |
211 | | Calculates the minimum ring buffer size, it should be able to store vFilterSize |
212 | | more n lines where n is the max difference between each adjacent slice which |
213 | | outputs a line. |
214 | | The n lines are needed only when there is not enough src lines to output a single |
215 | | dst line, then we should buffer these lines to process them on the next call to scale. |
216 | | */ |
217 | | static void get_min_buffer_size(SwsInternal *c, int *out_lum_size, int *out_chr_size) |
218 | 0 | { |
219 | 0 | int lumY; |
220 | 0 | int dstH = c->opts.dst_h; |
221 | 0 | int chrDstH = c->chrDstH; |
222 | 0 | int *lumFilterPos = c->vLumFilterPos; |
223 | 0 | int *chrFilterPos = c->vChrFilterPos; |
224 | 0 | int lumFilterSize = c->vLumFilterSize; |
225 | 0 | int chrFilterSize = c->vChrFilterSize; |
226 | 0 | int chrSubSample = c->chrSrcVSubSample; |
227 | |
|
228 | 0 | *out_lum_size = lumFilterSize; |
229 | 0 | *out_chr_size = chrFilterSize; |
230 | |
|
231 | 0 | for (lumY = 0; lumY < dstH; lumY++) { |
232 | 0 | int chrY = (int64_t)lumY * chrDstH / dstH; |
233 | 0 | int nextSlice = FFMAX(lumFilterPos[lumY] + lumFilterSize - 1, |
234 | 0 | ((chrFilterPos[chrY] + chrFilterSize - 1) |
235 | 0 | << chrSubSample)); |
236 | |
|
237 | 0 | nextSlice >>= chrSubSample; |
238 | 0 | nextSlice <<= chrSubSample; |
239 | 0 | (*out_lum_size) = FFMAX((*out_lum_size), nextSlice - lumFilterPos[lumY]); |
240 | 0 | (*out_chr_size) = FFMAX((*out_chr_size), (nextSlice >> chrSubSample) - chrFilterPos[chrY]); |
241 | 0 | } |
242 | 0 | } |
243 | | |
244 | | |
245 | | |
246 | | int ff_init_filters(SwsInternal * c) |
247 | 0 | { |
248 | 0 | int i; |
249 | 0 | int index; |
250 | 0 | int num_ydesc; |
251 | 0 | int num_cdesc; |
252 | 0 | int num_vdesc = isPlanarYUV(c->opts.dst_format) && !isGray(c->opts.dst_format) ? 2 : 1; |
253 | 0 | int need_lum_conv = c->lumToYV12 || c->readLumPlanar || c->alpToYV12 || c->readAlpPlanar; |
254 | 0 | int need_chr_conv = c->chrToYV12 || c->readChrPlanar; |
255 | 0 | int need_gamma = c->is_internal_gamma; |
256 | 0 | int srcIdx, dstIdx; |
257 | 0 | int dst_stride = FFALIGN(c->opts.dst_w * sizeof(int16_t) + 66, 16); |
258 | |
|
259 | 0 | uint32_t * pal = usePal(c->opts.src_format) ? c->pal_yuv : (uint32_t*)c->input_rgb2yuv_table; |
260 | 0 | int res = 0; |
261 | |
|
262 | 0 | int lumBufSize; |
263 | 0 | int chrBufSize; |
264 | |
|
265 | 0 | get_min_buffer_size(c, &lumBufSize, &chrBufSize); |
266 | 0 | lumBufSize = FFMAX(lumBufSize, c->vLumFilterSize + MAX_LINES_AHEAD); |
267 | 0 | chrBufSize = FFMAX(chrBufSize, c->vChrFilterSize + MAX_LINES_AHEAD); |
268 | |
|
269 | 0 | if (c->dstBpc == 16) |
270 | 0 | dst_stride <<= 1; |
271 | |
|
272 | 0 | if (c->dstBpc == 32) |
273 | 0 | dst_stride <<= 2; |
274 | |
|
275 | 0 | num_ydesc = need_lum_conv ? 2 : 1; |
276 | 0 | num_cdesc = need_chr_conv ? 2 : 1; |
277 | |
|
278 | 0 | c->numSlice = FFMAX(num_ydesc, num_cdesc) + 2; |
279 | 0 | c->numDesc = num_ydesc + num_cdesc + num_vdesc + (need_gamma ? 2 : 0); |
280 | 0 | c->descIndex[0] = num_ydesc + (need_gamma ? 1 : 0); |
281 | 0 | c->descIndex[1] = num_ydesc + num_cdesc + (need_gamma ? 1 : 0); |
282 | |
|
283 | 0 | if (isFloat16(c->opts.src_format)) { |
284 | 0 | c->h2f_tables = av_malloc(sizeof(*c->h2f_tables)); |
285 | 0 | if (!c->h2f_tables) |
286 | 0 | return AVERROR(ENOMEM); |
287 | 0 | ff_init_half2float_tables(c->h2f_tables); |
288 | 0 | c->input_opaque = c->h2f_tables; |
289 | 0 | } |
290 | | |
291 | 0 | c->desc = av_calloc(c->numDesc, sizeof(*c->desc)); |
292 | 0 | if (!c->desc) |
293 | 0 | return AVERROR(ENOMEM); |
294 | 0 | c->slice = av_calloc(c->numSlice, sizeof(*c->slice)); |
295 | 0 | if (!c->slice) { |
296 | 0 | res = AVERROR(ENOMEM); |
297 | 0 | goto cleanup; |
298 | 0 | } |
299 | | |
300 | 0 | res = alloc_slice(&c->slice[0], c->opts.src_format, c->opts.src_h, c->chrSrcH, c->chrSrcHSubSample, c->chrSrcVSubSample, 0); |
301 | 0 | if (res < 0) goto cleanup; |
302 | 0 | for (i = 1; i < c->numSlice-2; ++i) { |
303 | 0 | res = alloc_slice(&c->slice[i], c->opts.src_format, lumBufSize, chrBufSize, c->chrSrcHSubSample, c->chrSrcVSubSample, 0); |
304 | 0 | if (res < 0) goto cleanup; |
305 | 0 | res = alloc_lines(&c->slice[i], FFALIGN(c->opts.src_w*2+78, 16), c->opts.src_w); |
306 | 0 | if (res < 0) goto cleanup; |
307 | 0 | } |
308 | | // horizontal scaler output |
309 | 0 | res = alloc_slice(&c->slice[i], c->opts.src_format, lumBufSize, chrBufSize, c->chrDstHSubSample, c->chrDstVSubSample, 1); |
310 | 0 | if (res < 0) goto cleanup; |
311 | 0 | res = alloc_lines(&c->slice[i], dst_stride, c->opts.dst_w); |
312 | 0 | if (res < 0) goto cleanup; |
313 | | |
314 | 0 | fill_ones(&c->slice[i], dst_stride>>1, c->dstBpc); |
315 | | |
316 | | // vertical scaler output |
317 | 0 | ++i; |
318 | 0 | res = alloc_slice(&c->slice[i], c->opts.dst_format, c->opts.dst_h, c->chrDstH, c->chrDstHSubSample, c->chrDstVSubSample, 0); |
319 | 0 | if (res < 0) goto cleanup; |
320 | | |
321 | 0 | index = 0; |
322 | 0 | srcIdx = 0; |
323 | 0 | dstIdx = 1; |
324 | |
|
325 | 0 | if (need_gamma) { |
326 | 0 | res = ff_init_gamma_convert(c->desc + index, c->slice + srcIdx, c->inv_gamma); |
327 | 0 | if (res < 0) goto cleanup; |
328 | 0 | ++index; |
329 | 0 | } |
330 | | |
331 | 0 | if (need_lum_conv) { |
332 | 0 | res = ff_init_desc_fmt_convert(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], pal); |
333 | 0 | if (res < 0) goto cleanup; |
334 | 0 | c->desc[index].alpha = c->needAlpha; |
335 | 0 | ++index; |
336 | 0 | srcIdx = dstIdx; |
337 | 0 | } |
338 | | |
339 | | |
340 | 0 | dstIdx = FFMAX(num_ydesc, num_cdesc); |
341 | 0 | res = ff_init_desc_hscale(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], c->hLumFilter, c->hLumFilterPos, c->hLumFilterSize, c->lumXInc); |
342 | 0 | if (res < 0) goto cleanup; |
343 | 0 | c->desc[index].alpha = c->needAlpha; |
344 | | |
345 | |
|
346 | 0 | ++index; |
347 | 0 | { |
348 | 0 | srcIdx = 0; |
349 | 0 | dstIdx = 1; |
350 | 0 | if (need_chr_conv) { |
351 | 0 | res = ff_init_desc_cfmt_convert(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], pal); |
352 | 0 | if (res < 0) goto cleanup; |
353 | 0 | ++index; |
354 | 0 | srcIdx = dstIdx; |
355 | 0 | } |
356 | | |
357 | 0 | dstIdx = FFMAX(num_ydesc, num_cdesc); |
358 | 0 | if (c->needs_hcscale) |
359 | 0 | res = ff_init_desc_chscale(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], c->hChrFilter, c->hChrFilterPos, c->hChrFilterSize, c->chrXInc); |
360 | 0 | else |
361 | 0 | res = ff_init_desc_no_chr(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx]); |
362 | 0 | if (res < 0) goto cleanup; |
363 | 0 | } |
364 | | |
365 | 0 | ++index; |
366 | 0 | { |
367 | 0 | srcIdx = c->numSlice - 2; |
368 | 0 | dstIdx = c->numSlice - 1; |
369 | 0 | res = ff_init_vscale(c, c->desc + index, c->slice + srcIdx, c->slice + dstIdx); |
370 | 0 | if (res < 0) goto cleanup; |
371 | 0 | } |
372 | | |
373 | 0 | ++index; |
374 | 0 | if (need_gamma) { |
375 | 0 | res = ff_init_gamma_convert(c->desc + index, c->slice + dstIdx, c->gamma); |
376 | 0 | if (res < 0) goto cleanup; |
377 | 0 | } |
378 | | |
379 | 0 | return 0; |
380 | | |
381 | 0 | cleanup: |
382 | 0 | ff_free_filters(c); |
383 | 0 | return res; |
384 | 0 | } |
385 | | |
386 | | int ff_free_filters(SwsInternal *c) |
387 | 0 | { |
388 | 0 | int i; |
389 | 0 | if (c->desc) { |
390 | 0 | for (i = 0; i < c->numDesc; ++i) |
391 | 0 | av_freep(&c->desc[i].instance); |
392 | 0 | av_freep(&c->desc); |
393 | 0 | } |
394 | |
|
395 | 0 | if (c->slice) { |
396 | 0 | for (i = 0; i < c->numSlice; ++i) |
397 | 0 | free_slice(&c->slice[i]); |
398 | 0 | av_freep(&c->slice); |
399 | 0 | } |
400 | 0 | av_freep(&c->h2f_tables); |
401 | 0 | return 0; |
402 | 0 | } |