/src/ffmpeg/libavcodec/speedhqdec.c
Line | Count | Source |
1 | | /* |
2 | | * NewTek SpeedHQ codec |
3 | | * Copyright 2017 Steinar H. Gunderson |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | /** |
23 | | * @file |
24 | | * NewTek SpeedHQ decoder. |
25 | | */ |
26 | | |
27 | | #define BITSTREAM_READER_LE |
28 | | |
29 | | #include "libavutil/attributes.h" |
30 | | #include "libavutil/mem_internal.h" |
31 | | |
32 | | #include "avcodec.h" |
33 | | #include "blockdsp.h" |
34 | | #include "codec_internal.h" |
35 | | #include "decode.h" |
36 | | #include "get_bits.h" |
37 | | #include "idctdsp.h" |
38 | | #include "libavutil/thread.h" |
39 | | #include "mathops.h" |
40 | | #include "mpeg12data.h" |
41 | | #include "mpeg12vlc.h" |
42 | | #include "speedhq.h" |
43 | | #include "thread.h" |
44 | | |
45 | 1.07M | #define MAX_INDEX (64 - 1) |
46 | | |
47 | | /* |
48 | | * 5 bits makes for very small tables, with no more than two lookups needed |
49 | | * for the longest (10-bit) codes. |
50 | | */ |
51 | | #define ALPHA_VLC_BITS 5 |
52 | | |
53 | | typedef struct SHQContext { |
54 | | BlockDSPContext bdsp; |
55 | | IDCTDSPContext idsp; |
56 | | uint8_t permutated_intra_scantable[64]; |
57 | | int quant_matrix[64]; |
58 | | enum { SHQ_SUBSAMPLING_420, SHQ_SUBSAMPLING_422, SHQ_SUBSAMPLING_444 } |
59 | | subsampling; |
60 | | enum { SHQ_NO_ALPHA, SHQ_RLE_ALPHA, SHQ_DCT_ALPHA } alpha_type; |
61 | | AVPacket *avpkt; |
62 | | uint32_t second_field_offset; |
63 | | } SHQContext; |
64 | | |
65 | | /* NOTE: The first element is always 16, unscaled. */ |
66 | | static const uint8_t unscaled_quant_matrix[64] = { |
67 | | 16, 16, 19, 22, 26, 27, 29, 34, |
68 | | 16, 16, 22, 24, 27, 29, 34, 37, |
69 | | 19, 22, 26, 27, 29, 34, 34, 38, |
70 | | 22, 22, 26, 27, 29, 34, 37, 40, |
71 | | 22, 26, 27, 29, 32, 35, 40, 48, |
72 | | 26, 27, 29, 32, 35, 40, 48, 58, |
73 | | 26, 27, 29, 34, 38, 46, 56, 69, |
74 | | 27, 29, 35, 38, 46, 56, 69, 83 |
75 | | }; |
76 | | |
77 | | static VLCElem dc_lum_vlc_le[512]; |
78 | | static VLCElem dc_chroma_vlc_le[514]; |
79 | | static VLCElem dc_alpha_run_vlc_le[160]; |
80 | | static VLCElem dc_alpha_level_vlc_le[288]; |
81 | | |
82 | | static RL_VLC_ELEM speedhq_rl_vlc[674]; |
83 | | |
84 | | static inline int decode_dc_le(GetBitContext *gb, int component) |
85 | 247k | { |
86 | 247k | int code, diff; |
87 | | |
88 | 247k | if (component == 0 || component == 3) { |
89 | 142k | code = get_vlc2(gb, dc_lum_vlc_le, DC_VLC_BITS, 2); |
90 | 142k | } else { |
91 | 104k | code = get_vlc2(gb, dc_chroma_vlc_le, DC_VLC_BITS, 2); |
92 | 104k | } |
93 | 247k | if (!code) { |
94 | 8.59k | diff = 0; |
95 | 238k | } else { |
96 | 238k | diff = get_xbits_le(gb, code); |
97 | 238k | } |
98 | 247k | return diff; |
99 | 247k | } |
100 | | |
101 | | static inline int decode_alpha_block(const SHQContext *s, GetBitContext *gb, uint8_t last_alpha[16], uint8_t *dest, int linesize) |
102 | 13.3k | { |
103 | 13.3k | uint8_t block[128]; |
104 | 13.3k | int i = 0, x, y; |
105 | | |
106 | 13.3k | memset(block, 0, sizeof(block)); |
107 | | |
108 | 13.3k | { |
109 | 13.3k | OPEN_READER(re, gb); |
110 | | |
111 | 74.4k | for ( ;; ) { |
112 | 74.4k | int run, level; |
113 | | |
114 | 74.4k | UPDATE_CACHE_LE(re, gb); |
115 | 74.4k | GET_VLC(run, re, gb, dc_alpha_run_vlc_le, ALPHA_VLC_BITS, 2); |
116 | | |
117 | 74.4k | if (run < 0) break; |
118 | 63.3k | i += run; |
119 | 63.3k | if (i >= 128) |
120 | 2.22k | return AVERROR_INVALIDDATA; |
121 | | |
122 | 61.0k | UPDATE_CACHE_LE(re, gb); |
123 | 61.0k | GET_VLC(level, re, gb, dc_alpha_level_vlc_le, ALPHA_VLC_BITS, 2); |
124 | 61.0k | block[i++] = level; |
125 | 61.0k | } |
126 | | |
127 | 11.1k | CLOSE_READER(re, gb); |
128 | 11.1k | } |
129 | | |
130 | 100k | for (y = 0; y < 8; y++) { |
131 | 1.51M | for (x = 0; x < 16; x++) { |
132 | 1.42M | last_alpha[x] -= block[y * 16 + x]; |
133 | 1.42M | } |
134 | 88.9k | memcpy(dest, last_alpha, 16); |
135 | 88.9k | dest += linesize; |
136 | 88.9k | } |
137 | | |
138 | 11.1k | return 0; |
139 | 13.3k | } |
140 | | |
141 | | static inline int decode_dct_block(const SHQContext *s, GetBitContext *gb, int last_dc[4], int component, uint8_t *dest, int linesize) |
142 | 247k | { |
143 | 247k | const int *quant_matrix = s->quant_matrix; |
144 | 247k | const uint8_t *scantable = s->permutated_intra_scantable; |
145 | 247k | LOCAL_ALIGNED_32(int16_t, block, [64]); |
146 | 247k | int dc_offset; |
147 | | |
148 | 247k | s->bdsp.clear_block(block); |
149 | | |
150 | 247k | dc_offset = decode_dc_le(gb, component); |
151 | 247k | last_dc[component] -= dc_offset; /* Note: Opposite of most codecs. */ |
152 | 247k | block[scantable[0]] = last_dc[component]; /* quant_matrix[0] is always 16. */ |
153 | | |
154 | | /* Read AC coefficients. */ |
155 | 247k | { |
156 | 247k | int i = 0; |
157 | 247k | OPEN_READER(re, gb); |
158 | 1.29M | for ( ;; ) { |
159 | 1.29M | int level, run; |
160 | 1.29M | UPDATE_CACHE_LE(re, gb); |
161 | 1.29M | GET_RL_VLC(level, run, re, gb, speedhq_rl_vlc, |
162 | 1.29M | TEX_VLC_BITS, 2, 0); |
163 | 1.29M | if (level == 127) { |
164 | 224k | break; |
165 | 1.07M | } else if (level) { |
166 | 1.05M | i += run; |
167 | 1.05M | if (i > MAX_INDEX) |
168 | 21.8k | return AVERROR_INVALIDDATA; |
169 | | /* If next bit is 1, level = -level */ |
170 | 1.02M | level = (level ^ SHOW_SBITS(re, gb, 1)) - |
171 | 1.02M | SHOW_SBITS(re, gb, 1); |
172 | 1.02M | LAST_SKIP_BITS(re, gb, 1); |
173 | 1.02M | } else { |
174 | | /* Escape. */ |
175 | | #if MIN_CACHE_BITS < 6 + 6 + 12 |
176 | | #error MIN_CACHE_BITS is too small for the escape code, add UPDATE_CACHE |
177 | | #endif |
178 | 20.0k | run = SHOW_UBITS(re, gb, 6) + 1; |
179 | 20.0k | SKIP_BITS(re, gb, 6); |
180 | 20.0k | level = SHOW_UBITS(re, gb, 12) - 2048; |
181 | 20.0k | LAST_SKIP_BITS(re, gb, 12); |
182 | | |
183 | 20.0k | i += run; |
184 | 20.0k | if (i > MAX_INDEX) |
185 | 380 | return AVERROR_INVALIDDATA; |
186 | 20.0k | } |
187 | | |
188 | 1.04M | block[scantable[i]] = (level * quant_matrix[i]) >> 4; |
189 | 1.04M | } |
190 | 224k | CLOSE_READER(re, gb); |
191 | 224k | } |
192 | | |
193 | 0 | s->idsp.idct_put(dest, linesize, block); |
194 | | |
195 | 224k | return 0; |
196 | 247k | } |
197 | | |
198 | | static int decode_speedhq_border(const SHQContext *s, GetBitContext *gb, AVFrame *frame, int field_number, int line_stride) |
199 | 5.47k | { |
200 | 5.47k | int linesize_y = frame->linesize[0] * line_stride; |
201 | 5.47k | int linesize_cb = frame->linesize[1] * line_stride; |
202 | 5.47k | int linesize_cr = frame->linesize[2] * line_stride; |
203 | 5.47k | int linesize_a; |
204 | 5.47k | int ret; |
205 | | |
206 | 5.47k | if (s->alpha_type != SHQ_NO_ALPHA) |
207 | 4.21k | linesize_a = frame->linesize[3] * line_stride; |
208 | | |
209 | 10.5k | for (int y = 0; y < frame->height; y += 16 * line_stride) { |
210 | 9.85k | int last_dc[4] = { 1024, 1024, 1024, 1024 }; |
211 | 9.85k | uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a; |
212 | 9.85k | uint8_t last_alpha[16]; |
213 | 9.85k | int x = frame->width - 8; |
214 | | |
215 | 9.85k | dest_y = frame->data[0] + frame->linesize[0] * (y + field_number) + x; |
216 | 9.85k | if (s->subsampling == SHQ_SUBSAMPLING_420) { |
217 | 3.23k | dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number) + x / 2; |
218 | 3.23k | dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number) + x / 2; |
219 | 6.61k | } else { |
220 | 6.61k | av_assert2(s->subsampling == SHQ_SUBSAMPLING_422); |
221 | 6.61k | dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number) + x / 2; |
222 | 6.61k | dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number) + x / 2; |
223 | 6.61k | } |
224 | 9.85k | if (s->alpha_type != SHQ_NO_ALPHA) { |
225 | 6.41k | memset(last_alpha, 255, sizeof(last_alpha)); |
226 | 6.41k | dest_a = frame->data[3] + frame->linesize[3] * (y + field_number) + x; |
227 | 6.41k | } |
228 | | |
229 | 9.85k | if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y, linesize_y)) < 0) |
230 | 534 | return ret; |
231 | 9.32k | if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8, linesize_y)) < 0) |
232 | 477 | return ret; |
233 | 8.84k | if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0) |
234 | 382 | return ret; |
235 | 8.46k | if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0) |
236 | 424 | return ret; |
237 | 8.03k | if ((ret = decode_dct_block(s, gb, last_dc, 1, dest_cb, linesize_cb)) < 0) |
238 | 374 | return ret; |
239 | 7.66k | if ((ret = decode_dct_block(s, gb, last_dc, 2, dest_cr, linesize_cr)) < 0) |
240 | 396 | return ret; |
241 | | |
242 | 7.26k | if (s->subsampling != SHQ_SUBSAMPLING_420) { |
243 | 4.76k | if ((ret = decode_dct_block(s, gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0) |
244 | 408 | return ret; |
245 | 4.36k | if ((ret = decode_dct_block(s, gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0) |
246 | 365 | return ret; |
247 | 4.36k | } |
248 | | |
249 | 6.49k | if (s->alpha_type == SHQ_RLE_ALPHA) { |
250 | | /* Alpha coded using 16x8 RLE blocks. */ |
251 | 2.14k | if ((ret = decode_alpha_block(s, gb, last_alpha, dest_a, linesize_a)) < 0) |
252 | 261 | return ret; |
253 | 1.88k | if ((ret = decode_alpha_block(s, gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0) |
254 | 312 | return ret; |
255 | 4.34k | } else if (s->alpha_type == SHQ_DCT_ALPHA) { |
256 | | /* Alpha encoded exactly like luma. */ |
257 | 2.07k | if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a, linesize_a)) < 0) |
258 | 210 | return ret; |
259 | 1.86k | if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8, linesize_a)) < 0) |
260 | 202 | return ret; |
261 | 1.66k | if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0) |
262 | 197 | return ret; |
263 | 1.47k | if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0) |
264 | 213 | return ret; |
265 | 1.47k | } |
266 | 6.49k | } |
267 | | |
268 | 724 | return 0; |
269 | 5.47k | } |
270 | | |
271 | | static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf_size, AVFrame *frame, int field_number, int start, int end, int line_stride, int slice_number) |
272 | 135k | { |
273 | 135k | int ret, x, y, slice_offsets[5]; |
274 | 135k | uint32_t slice_begin, slice_end; |
275 | 135k | int linesize_y = frame->linesize[0] * line_stride; |
276 | 135k | int linesize_cb = frame->linesize[1] * line_stride; |
277 | 135k | int linesize_cr = frame->linesize[2] * line_stride; |
278 | 135k | int linesize_a; |
279 | 135k | GetBitContext gb; |
280 | | |
281 | 135k | if (s->alpha_type != SHQ_NO_ALPHA) |
282 | 85.1k | linesize_a = frame->linesize[3] * line_stride; |
283 | | |
284 | 135k | if (end < start || end - start < 3 || end > buf_size) |
285 | 63.8k | return AVERROR_INVALIDDATA; |
286 | | |
287 | 71.8k | slice_offsets[0] = start; |
288 | 71.8k | slice_offsets[4] = end; |
289 | 192k | for (x = 1; x < 4; x++) { |
290 | 154k | uint32_t last_offset, slice_len; |
291 | | |
292 | 154k | last_offset = slice_offsets[x - 1]; |
293 | 154k | slice_len = AV_RL24(buf + last_offset); |
294 | 154k | slice_offsets[x] = last_offset + slice_len; |
295 | | |
296 | 154k | if (slice_len < 3 || slice_offsets[x] > end - 3) |
297 | 33.7k | return AVERROR_INVALIDDATA; |
298 | 154k | } |
299 | | |
300 | 38.0k | slice_begin = slice_offsets[slice_number]; |
301 | 38.0k | slice_end = slice_offsets[slice_number + 1]; |
302 | | |
303 | 38.0k | if ((ret = init_get_bits8(&gb, buf + slice_begin + 3, slice_end - slice_begin - 3)) < 0) |
304 | 0 | return ret; |
305 | | |
306 | 158k | for (y = slice_number * 16 * line_stride; y < frame->height; y += line_stride * 64) { |
307 | 140k | uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a; |
308 | 140k | int last_dc[4] = { 1024, 1024, 1024, 1024 }; |
309 | 140k | uint8_t last_alpha[16]; |
310 | | |
311 | 140k | memset(last_alpha, 255, sizeof(last_alpha)); |
312 | | |
313 | 140k | dest_y = frame->data[0] + frame->linesize[0] * (y + field_number); |
314 | 140k | if (s->subsampling == SHQ_SUBSAMPLING_420) { |
315 | 22.0k | dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number); |
316 | 22.0k | dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number); |
317 | 118k | } else { |
318 | 118k | dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number); |
319 | 118k | dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number); |
320 | 118k | } |
321 | 140k | if (s->alpha_type != SHQ_NO_ALPHA) { |
322 | 66.7k | dest_a = frame->data[3] + frame->linesize[3] * (y + field_number); |
323 | 66.7k | } |
324 | | |
325 | 151k | for (x = 0; x < frame->width - 8 * (s->subsampling != SHQ_SUBSAMPLING_444); x += 16) { |
326 | | /* Decode the four luma blocks. */ |
327 | 31.1k | if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y, linesize_y)) < 0) |
328 | 10.3k | return ret; |
329 | 20.7k | if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8, linesize_y)) < 0) |
330 | 710 | return ret; |
331 | 20.0k | if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0) |
332 | 502 | return ret; |
333 | 19.5k | if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0) |
334 | 564 | return ret; |
335 | | |
336 | | /* |
337 | | * Decode the first chroma block. For 4:2:0, this is the only one; |
338 | | * for 4:2:2, it's the top block; for 4:4:4, it's the top-left block. |
339 | | */ |
340 | 19.0k | if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb, linesize_cb)) < 0) |
341 | 1.27k | return ret; |
342 | 17.7k | if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr, linesize_cr)) < 0) |
343 | 482 | return ret; |
344 | | |
345 | 17.2k | if (s->subsampling != SHQ_SUBSAMPLING_420) { |
346 | | /* For 4:2:2, this is the bottom block; for 4:4:4, it's the bottom-left block. */ |
347 | 12.2k | if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0) |
348 | 327 | return ret; |
349 | 11.9k | if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0) |
350 | 442 | return ret; |
351 | | |
352 | 11.5k | if (s->subsampling == SHQ_SUBSAMPLING_444) { |
353 | | /* Top-right and bottom-right blocks. */ |
354 | 5.31k | if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8, linesize_cb)) < 0) |
355 | 488 | return ret; |
356 | 4.82k | if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8, linesize_cr)) < 0) |
357 | 599 | return ret; |
358 | 4.22k | if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb + 8, linesize_cb)) < 0) |
359 | 333 | return ret; |
360 | 3.89k | if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr + 8, linesize_cr)) < 0) |
361 | 250 | return ret; |
362 | | |
363 | 3.64k | dest_cb += 8; |
364 | 3.64k | dest_cr += 8; |
365 | 3.64k | } |
366 | 11.5k | } |
367 | 14.8k | dest_y += 16; |
368 | 14.8k | dest_cb += 8; |
369 | 14.8k | dest_cr += 8; |
370 | | |
371 | 14.8k | if (s->alpha_type == SHQ_RLE_ALPHA) { |
372 | | /* Alpha coded using 16x8 RLE blocks. */ |
373 | 5.28k | if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a, linesize_a)) < 0) |
374 | 1.26k | return ret; |
375 | 4.02k | if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0) |
376 | 387 | return ret; |
377 | 3.63k | dest_a += 16; |
378 | 9.51k | } else if (s->alpha_type == SHQ_DCT_ALPHA) { |
379 | | /* Alpha encoded exactly like luma. */ |
380 | 2.76k | if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a, linesize_a)) < 0) |
381 | 457 | return ret; |
382 | 2.30k | if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8, linesize_a)) < 0) |
383 | 811 | return ret; |
384 | 1.49k | if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0) |
385 | 218 | return ret; |
386 | 1.27k | if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0) |
387 | 202 | return ret; |
388 | 1.07k | dest_a += 16; |
389 | 1.07k | } |
390 | 14.8k | } |
391 | 140k | } |
392 | | |
393 | 18.4k | if (s->subsampling != SHQ_SUBSAMPLING_444 && (frame->width & 15) && slice_number == 3) |
394 | 5.47k | return decode_speedhq_border(s, &gb, frame, field_number, line_stride); |
395 | | |
396 | 12.9k | return 0; |
397 | 18.4k | } |
398 | | |
399 | | static int decode_slice_progressive(AVCodecContext *avctx, void *arg, int jobnr, int threadnr) |
400 | 8.80k | { |
401 | 8.80k | SHQContext *s = avctx->priv_data; |
402 | 8.80k | (void)threadnr; |
403 | | |
404 | 8.80k | return decode_speedhq_field(avctx->priv_data, s->avpkt->data, s->avpkt->size, arg, 0, 4, s->avpkt->size, 1, jobnr); |
405 | 8.80k | } |
406 | | |
407 | | static int decode_slice_interlaced(AVCodecContext *avctx, void *arg, int jobnr, int threadnr) |
408 | 126k | { |
409 | 126k | SHQContext *s = avctx->priv_data; |
410 | 126k | int field_number = jobnr / 4; |
411 | 126k | int slice_number = jobnr % 4; |
412 | 126k | (void)threadnr; |
413 | | |
414 | 126k | if (field_number == 0) |
415 | 63.4k | return decode_speedhq_field(avctx->priv_data, s->avpkt->data, s->avpkt->size, arg, 0, 4, s->second_field_offset, 2, slice_number); |
416 | 63.4k | else |
417 | 63.4k | return decode_speedhq_field(avctx->priv_data, s->avpkt->data, s->avpkt->size, arg, 1, s->second_field_offset, s->avpkt->size, 2, slice_number); |
418 | 126k | } |
419 | | |
420 | | static void compute_quant_matrix(int *output, int qscale) |
421 | 19.6k | { |
422 | 19.6k | int i; |
423 | 1.28M | for (i = 0; i < 64; i++) output[i] = unscaled_quant_matrix[ff_zigzag_direct[i]] * qscale; |
424 | 19.6k | } |
425 | | |
426 | | static int speedhq_decode_frame(AVCodecContext *avctx, AVFrame *frame, |
427 | | int *got_frame, AVPacket *avpkt) |
428 | 222k | { |
429 | 222k | SHQContext * const s = avctx->priv_data; |
430 | 222k | const uint8_t *buf = avpkt->data; |
431 | 222k | int buf_size = avpkt->size; |
432 | 222k | uint8_t quality; |
433 | 222k | int ret; |
434 | | |
435 | 222k | if (buf_size < 4 || avctx->width < 8 || avctx->width % 8 != 0) |
436 | 98.1k | return AVERROR_INVALIDDATA; |
437 | 124k | if (buf_size < avctx->width*avctx->height / 64 / 4) |
438 | 19.3k | return AVERROR_INVALIDDATA; |
439 | | |
440 | 105k | quality = buf[0]; |
441 | 105k | if (quality >= 100) { |
442 | 4.84k | return AVERROR_INVALIDDATA; |
443 | 4.84k | } |
444 | | |
445 | 100k | if (avctx->skip_frame >= AVDISCARD_ALL) |
446 | 80.5k | return avpkt->size; |
447 | | |
448 | 19.6k | compute_quant_matrix(s->quant_matrix, 100 - quality); |
449 | | |
450 | 19.6k | s->second_field_offset = AV_RL24(buf + 1); |
451 | 19.6k | if (s->second_field_offset >= buf_size - 3) { |
452 | 1.57k | return AVERROR_INVALIDDATA; |
453 | 1.57k | } |
454 | | |
455 | 18.1k | avctx->coded_width = FFALIGN(avctx->width, 16); |
456 | 18.1k | avctx->coded_height = FFALIGN(avctx->height, 16); |
457 | | |
458 | 18.1k | if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0) { |
459 | 66 | return ret; |
460 | 66 | } |
461 | | |
462 | 18.0k | s->avpkt = avpkt; |
463 | | |
464 | 18.0k | if (s->second_field_offset == 4 || s->second_field_offset == (buf_size-4)) { |
465 | | /* |
466 | | * Overlapping first and second fields is used to signal |
467 | | * encoding only a single field. In this case, "height" |
468 | | * is ambiguous; it could mean either the height of the |
469 | | * frame as a whole, or of the field. The former would make |
470 | | * more sense for compatibility with legacy decoders, |
471 | | * but this matches the convention used in NDI, which is |
472 | | * the primary user of this trick. |
473 | | */ |
474 | 2.20k | if ((ret = avctx->execute2(avctx, decode_slice_progressive, frame, NULL, 4)) < 0) |
475 | 0 | return ret; |
476 | 15.8k | } else { |
477 | 15.8k | if ((ret = avctx->execute2(avctx, decode_slice_interlaced, frame, NULL, 8)) < 0) |
478 | 0 | return ret; |
479 | 15.8k | } |
480 | | |
481 | 18.0k | *got_frame = 1; |
482 | 18.0k | return buf_size; |
483 | 18.0k | } |
484 | | |
485 | | /* |
486 | | * Alpha VLC. Run and level are independently coded, and would be |
487 | | * outside the default limits for MAX_RUN/MAX_LEVEL, so we don't |
488 | | * bother with combining them into one table. |
489 | | */ |
490 | | static av_cold void compute_alpha_vlcs(void) |
491 | 1 | { |
492 | 1 | uint16_t run_code[134], level_code[266]; |
493 | 1 | uint8_t run_bits[134], level_bits[266]; |
494 | 1 | int16_t run_symbols[134], level_symbols[266]; |
495 | 1 | int entry, i, sign; |
496 | | |
497 | | /* Initialize VLC for alpha run. */ |
498 | 1 | entry = 0; |
499 | | |
500 | | /* 0 -> 0. */ |
501 | 1 | run_code[entry] = 0; |
502 | 1 | run_bits[entry] = 1; |
503 | 1 | run_symbols[entry] = 0; |
504 | 1 | ++entry; |
505 | | |
506 | | /* 10xx -> xx plus 1. */ |
507 | 5 | for (i = 0; i < 4; ++i) { |
508 | 4 | run_code[entry] = (i << 2) | 1; |
509 | 4 | run_bits[entry] = 4; |
510 | 4 | run_symbols[entry] = i + 1; |
511 | 4 | ++entry; |
512 | 4 | } |
513 | | |
514 | | /* 111xxxxxxx -> xxxxxxx. */ |
515 | 129 | for (i = 0; i < 128; ++i) { |
516 | 128 | run_code[entry] = (i << 3) | 7; |
517 | 128 | run_bits[entry] = 10; |
518 | 128 | run_symbols[entry] = i; |
519 | 128 | ++entry; |
520 | 128 | } |
521 | | |
522 | | /* 110 -> EOB. */ |
523 | 1 | run_code[entry] = 3; |
524 | 1 | run_bits[entry] = 3; |
525 | 1 | run_symbols[entry] = -1; |
526 | 1 | ++entry; |
527 | | |
528 | 1 | av_assert0(entry == FF_ARRAY_ELEMS(run_code)); |
529 | | |
530 | 1 | VLC_INIT_STATIC_SPARSE_TABLE(dc_alpha_run_vlc_le, ALPHA_VLC_BITS, |
531 | 1 | FF_ARRAY_ELEMS(run_code), |
532 | 1 | run_bits, 1, 1, |
533 | 1 | run_code, 2, 2, |
534 | 1 | run_symbols, 2, 2, VLC_INIT_LE); |
535 | | |
536 | | /* Initialize VLC for alpha level. */ |
537 | 1 | entry = 0; |
538 | | |
539 | 3 | for (sign = 0; sign <= 1; ++sign) { |
540 | | /* 1s -> -1 or +1 (depending on sign bit). */ |
541 | 2 | level_code[entry] = (sign << 1) | 1; |
542 | 2 | level_bits[entry] = 2; |
543 | 2 | level_symbols[entry] = sign ? -1 : 1; |
544 | 2 | ++entry; |
545 | | |
546 | | /* 01sxx -> xx plus 2 (2..5 or -2..-5, depending on sign bit). */ |
547 | 10 | for (i = 0; i < 4; ++i) { |
548 | 8 | level_code[entry] = (i << 3) | (sign << 2) | 2; |
549 | 8 | level_bits[entry] = 5; |
550 | 8 | level_symbols[entry] = sign ? -(i + 2) : (i + 2); |
551 | 8 | ++entry; |
552 | 8 | } |
553 | 2 | } |
554 | | |
555 | | /* |
556 | | * 00xxxxxxxx -> xxxxxxxx, in two's complement. There are many codes |
557 | | * here that would better be encoded in other ways (e.g. 0 would be |
558 | | * encoded by increasing run, and +/- 1 would be encoded with a |
559 | | * shorter code), but it doesn't hurt to allow everything. |
560 | | */ |
561 | 257 | for (i = 0; i < 256; ++i) { |
562 | 256 | level_code[entry] = i << 2; |
563 | 256 | level_bits[entry] = 10; |
564 | 256 | level_symbols[entry] = i; |
565 | 256 | ++entry; |
566 | 256 | } |
567 | | |
568 | 1 | av_assert0(entry == FF_ARRAY_ELEMS(level_code)); |
569 | | |
570 | 1 | VLC_INIT_STATIC_SPARSE_TABLE(dc_alpha_level_vlc_le, ALPHA_VLC_BITS, |
571 | 1 | FF_ARRAY_ELEMS(level_code), |
572 | 1 | level_bits, 1, 1, |
573 | 1 | level_code, 2, 2, |
574 | 1 | level_symbols, 2, 2, VLC_INIT_LE); |
575 | 1 | } |
576 | | |
577 | | static av_cold void speedhq_static_init(void) |
578 | 1 | { |
579 | | /* Exactly the same as MPEG-2, except for a little-endian reader. */ |
580 | 1 | VLC_INIT_STATIC_TABLE(dc_lum_vlc_le, DC_VLC_BITS, 12, |
581 | 1 | ff_mpeg12_vlc_dc_lum_bits, 1, 1, |
582 | 1 | ff_mpeg12_vlc_dc_lum_code, 2, 2, |
583 | 1 | VLC_INIT_OUTPUT_LE); |
584 | 1 | VLC_INIT_STATIC_TABLE(dc_chroma_vlc_le, DC_VLC_BITS, 12, |
585 | 1 | ff_mpeg12_vlc_dc_chroma_bits, 1, 1, |
586 | 1 | ff_mpeg12_vlc_dc_chroma_code, 2, 2, |
587 | 1 | VLC_INIT_OUTPUT_LE); |
588 | | |
589 | 1 | ff_init_2d_vlc_rl(ff_speedhq_vlc_table, speedhq_rl_vlc, ff_speedhq_run, |
590 | 1 | ff_speedhq_level, SPEEDHQ_RL_NB_ELEMS, |
591 | 1 | FF_ARRAY_ELEMS(speedhq_rl_vlc), VLC_INIT_LE); |
592 | | |
593 | 1 | compute_alpha_vlcs(); |
594 | 1 | } |
595 | | |
596 | | static av_cold int speedhq_decode_init(AVCodecContext *avctx) |
597 | 1.87k | { |
598 | 1.87k | int ret; |
599 | 1.87k | static AVOnce init_once = AV_ONCE_INIT; |
600 | 1.87k | SHQContext * const s = avctx->priv_data; |
601 | | |
602 | 1.87k | ret = ff_thread_once(&init_once, speedhq_static_init); |
603 | 1.87k | if (ret) |
604 | 0 | return AVERROR_UNKNOWN; |
605 | | |
606 | 1.87k | ff_blockdsp_init(&s->bdsp); |
607 | 1.87k | ff_idctdsp_init(&s->idsp, avctx); |
608 | 1.87k | ff_permute_scantable(s->permutated_intra_scantable, ff_zigzag_direct, |
609 | 1.87k | s->idsp.idct_permutation); |
610 | | |
611 | 1.87k | switch (avctx->codec_tag) { |
612 | 240 | case MKTAG('S', 'H', 'Q', '0'): |
613 | 240 | s->subsampling = SHQ_SUBSAMPLING_420; |
614 | 240 | s->alpha_type = SHQ_NO_ALPHA; |
615 | 240 | avctx->pix_fmt = AV_PIX_FMT_YUV420P; |
616 | 240 | break; |
617 | 225 | case MKTAG('S', 'H', 'Q', '1'): |
618 | 225 | s->subsampling = SHQ_SUBSAMPLING_420; |
619 | 225 | s->alpha_type = SHQ_RLE_ALPHA; |
620 | 225 | avctx->pix_fmt = AV_PIX_FMT_YUVA420P; |
621 | 225 | break; |
622 | 231 | case MKTAG('S', 'H', 'Q', '2'): |
623 | 231 | s->subsampling = SHQ_SUBSAMPLING_422; |
624 | 231 | s->alpha_type = SHQ_NO_ALPHA; |
625 | 231 | avctx->pix_fmt = AV_PIX_FMT_YUV422P; |
626 | 231 | break; |
627 | 439 | case MKTAG('S', 'H', 'Q', '3'): |
628 | 439 | s->subsampling = SHQ_SUBSAMPLING_422; |
629 | 439 | s->alpha_type = SHQ_RLE_ALPHA; |
630 | 439 | avctx->pix_fmt = AV_PIX_FMT_YUVA422P; |
631 | 439 | break; |
632 | 264 | case MKTAG('S', 'H', 'Q', '4'): |
633 | 264 | s->subsampling = SHQ_SUBSAMPLING_444; |
634 | 264 | s->alpha_type = SHQ_NO_ALPHA; |
635 | 264 | avctx->pix_fmt = AV_PIX_FMT_YUV444P; |
636 | 264 | break; |
637 | 57 | case MKTAG('S', 'H', 'Q', '5'): |
638 | 57 | s->subsampling = SHQ_SUBSAMPLING_444; |
639 | 57 | s->alpha_type = SHQ_RLE_ALPHA; |
640 | 57 | avctx->pix_fmt = AV_PIX_FMT_YUVA444P; |
641 | 57 | break; |
642 | 180 | case MKTAG('S', 'H', 'Q', '7'): |
643 | 180 | s->subsampling = SHQ_SUBSAMPLING_422; |
644 | 180 | s->alpha_type = SHQ_DCT_ALPHA; |
645 | 180 | avctx->pix_fmt = AV_PIX_FMT_YUVA422P; |
646 | 180 | break; |
647 | 35 | case MKTAG('S', 'H', 'Q', '9'): |
648 | 35 | s->subsampling = SHQ_SUBSAMPLING_444; |
649 | 35 | s->alpha_type = SHQ_DCT_ALPHA; |
650 | 35 | avctx->pix_fmt = AV_PIX_FMT_YUVA444P; |
651 | 35 | break; |
652 | 206 | default: |
653 | 206 | av_log(avctx, AV_LOG_ERROR, "Unknown NewTek SpeedHQ FOURCC provided (%08X)\n", |
654 | 206 | avctx->codec_tag); |
655 | 206 | return AVERROR_INVALIDDATA; |
656 | 1.87k | } |
657 | | |
658 | | /* This matches what NDI's RGB -> Y'CbCr 4:2:2 converter uses. */ |
659 | 1.67k | avctx->colorspace = AVCOL_SPC_BT470BG; |
660 | 1.67k | avctx->chroma_sample_location = AVCHROMA_LOC_CENTER; |
661 | | |
662 | 1.67k | return 0; |
663 | 1.87k | } |
664 | | |
665 | | const FFCodec ff_speedhq_decoder = { |
666 | | .p.name = "speedhq", |
667 | | CODEC_LONG_NAME("NewTek SpeedHQ"), |
668 | | .p.type = AVMEDIA_TYPE_VIDEO, |
669 | | .p.id = AV_CODEC_ID_SPEEDHQ, |
670 | | .priv_data_size = sizeof(SHQContext), |
671 | | .init = speedhq_decode_init, |
672 | | FF_CODEC_DECODE_CB(speedhq_decode_frame), |
673 | | .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS, |
674 | | }; |