/src/ffmpeg/libavcodec/speedhqdec.c
Line | Count | Source |
1 | | /* |
2 | | * NewTek SpeedHQ codec |
3 | | * Copyright 2017 Steinar H. Gunderson |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | /** |
23 | | * @file |
24 | | * NewTek SpeedHQ decoder. |
25 | | */ |
26 | | |
27 | | #define BITSTREAM_READER_LE |
28 | | |
29 | | #include "libavutil/attributes.h" |
30 | | #include "libavutil/mem_internal.h" |
31 | | |
32 | | #include "avcodec.h" |
33 | | #include "blockdsp.h" |
34 | | #include "codec_internal.h" |
35 | | #include "decode.h" |
36 | | #include "get_bits.h" |
37 | | #include "idctdsp.h" |
38 | | #include "libavutil/thread.h" |
39 | | #include "mathops.h" |
40 | | #include "mpeg12data.h" |
41 | | #include "mpeg12vlc.h" |
42 | | #include "speedhq.h" |
43 | | #include "thread.h" |
44 | | |
45 | 969k | #define MAX_INDEX (64 - 1) |
46 | | |
47 | | /* |
48 | | * 5 bits makes for very small tables, with no more than two lookups needed |
49 | | * for the longest (10-bit) codes. |
50 | | */ |
51 | | #define ALPHA_VLC_BITS 5 |
52 | | |
53 | | typedef struct SHQContext { |
54 | | BlockDSPContext bdsp; |
55 | | IDCTDSPContext idsp; |
56 | | uint8_t permutated_intra_scantable[64]; |
57 | | int quant_matrix[64]; |
58 | | enum { SHQ_SUBSAMPLING_420, SHQ_SUBSAMPLING_422, SHQ_SUBSAMPLING_444 } |
59 | | subsampling; |
60 | | enum { SHQ_NO_ALPHA, SHQ_RLE_ALPHA, SHQ_DCT_ALPHA } alpha_type; |
61 | | AVPacket *avpkt; |
62 | | uint32_t second_field_offset; |
63 | | } SHQContext; |
64 | | |
65 | | /* NOTE: The first element is always 16, unscaled. */ |
66 | | static const uint8_t unscaled_quant_matrix[64] = { |
67 | | 16, 16, 19, 22, 26, 27, 29, 34, |
68 | | 16, 16, 22, 24, 27, 29, 34, 37, |
69 | | 19, 22, 26, 27, 29, 34, 34, 38, |
70 | | 22, 22, 26, 27, 29, 34, 37, 40, |
71 | | 22, 26, 27, 29, 32, 35, 40, 48, |
72 | | 26, 27, 29, 32, 35, 40, 48, 58, |
73 | | 26, 27, 29, 34, 38, 46, 56, 69, |
74 | | 27, 29, 35, 38, 46, 56, 69, 83 |
75 | | }; |
76 | | |
77 | | static VLCElem dc_lum_vlc_le[512]; |
78 | | static VLCElem dc_chroma_vlc_le[514]; |
79 | | static VLCElem dc_alpha_run_vlc_le[160]; |
80 | | static VLCElem dc_alpha_level_vlc_le[288]; |
81 | | |
82 | | static RL_VLC_ELEM speedhq_rl_vlc[674]; |
83 | | |
84 | | static inline int decode_dc_le(GetBitContext *gb, int component) |
85 | 227k | { |
86 | 227k | int code, diff; |
87 | | |
88 | 227k | if (component == 0 || component == 3) { |
89 | 133k | code = get_vlc2(gb, dc_lum_vlc_le, DC_VLC_BITS, 2); |
90 | 133k | } else { |
91 | 93.8k | code = get_vlc2(gb, dc_chroma_vlc_le, DC_VLC_BITS, 2); |
92 | 93.8k | } |
93 | 227k | if (!code) { |
94 | 7.26k | diff = 0; |
95 | 219k | } else { |
96 | 219k | diff = get_xbits_le(gb, code); |
97 | 219k | } |
98 | 227k | return diff; |
99 | 227k | } |
100 | | |
101 | | static inline int decode_alpha_block(const SHQContext *s, GetBitContext *gb, uint8_t last_alpha[16], uint8_t *dest, int linesize) |
102 | 13.9k | { |
103 | 13.9k | uint8_t block[128]; |
104 | 13.9k | int i = 0, x, y; |
105 | | |
106 | 13.9k | memset(block, 0, sizeof(block)); |
107 | | |
108 | 13.9k | { |
109 | 13.9k | OPEN_READER(re, gb); |
110 | | |
111 | 84.5k | for ( ;; ) { |
112 | 84.5k | int run, level; |
113 | | |
114 | 84.5k | UPDATE_CACHE_LE(re, gb); |
115 | 84.5k | GET_VLC(run, re, gb, dc_alpha_run_vlc_le, ALPHA_VLC_BITS, 2); |
116 | | |
117 | 84.5k | if (run < 0) break; |
118 | 72.2k | i += run; |
119 | 72.2k | if (i >= 128) |
120 | 1.65k | return AVERROR_INVALIDDATA; |
121 | | |
122 | 70.6k | UPDATE_CACHE_LE(re, gb); |
123 | 70.6k | GET_VLC(level, re, gb, dc_alpha_level_vlc_le, ALPHA_VLC_BITS, 2); |
124 | 70.6k | block[i++] = level; |
125 | 70.6k | } |
126 | | |
127 | 12.2k | CLOSE_READER(re, gb); |
128 | 12.2k | } |
129 | | |
130 | 110k | for (y = 0; y < 8; y++) { |
131 | 1.67M | for (x = 0; x < 16; x++) { |
132 | 1.57M | last_alpha[x] -= block[y * 16 + x]; |
133 | 1.57M | } |
134 | 98.3k | memcpy(dest, last_alpha, 16); |
135 | 98.3k | dest += linesize; |
136 | 98.3k | } |
137 | | |
138 | 12.2k | return 0; |
139 | 13.9k | } |
140 | | |
141 | | static inline int decode_dct_block(const SHQContext *s, GetBitContext *gb, int last_dc[4], int component, uint8_t *dest, int linesize) |
142 | 227k | { |
143 | 227k | const int *quant_matrix = s->quant_matrix; |
144 | 227k | const uint8_t *scantable = s->permutated_intra_scantable; |
145 | 227k | LOCAL_ALIGNED_32(int16_t, block, [64]); |
146 | 227k | int dc_offset; |
147 | | |
148 | 227k | s->bdsp.clear_block(block); |
149 | | |
150 | 227k | dc_offset = decode_dc_le(gb, component); |
151 | 227k | last_dc[component] -= dc_offset; /* Note: Opposite of most codecs. */ |
152 | 227k | block[scantable[0]] = last_dc[component]; /* quant_matrix[0] is always 16. */ |
153 | | |
154 | | /* Read AC coefficients. */ |
155 | 227k | { |
156 | 227k | int i = 0; |
157 | 227k | OPEN_READER(re, gb); |
158 | 1.17M | for ( ;; ) { |
159 | 1.17M | int level, run; |
160 | 1.17M | UPDATE_CACHE_LE(re, gb); |
161 | 1.17M | GET_RL_VLC(level, run, re, gb, speedhq_rl_vlc, |
162 | 1.17M | TEX_VLC_BITS, 2, 0); |
163 | 1.17M | if (level == 127) { |
164 | 206k | break; |
165 | 969k | } else if (level) { |
166 | 950k | i += run; |
167 | 950k | if (i > MAX_INDEX) |
168 | 20.0k | return AVERROR_INVALIDDATA; |
169 | | /* If next bit is 1, level = -level */ |
170 | 930k | level = (level ^ SHOW_SBITS(re, gb, 1)) - |
171 | 930k | SHOW_SBITS(re, gb, 1); |
172 | 930k | LAST_SKIP_BITS(re, gb, 1); |
173 | 930k | } else { |
174 | | /* Escape. */ |
175 | | #if MIN_CACHE_BITS < 6 + 6 + 12 |
176 | | #error MIN_CACHE_BITS is too small for the escape code, add UPDATE_CACHE |
177 | | #endif |
178 | 18.3k | run = SHOW_UBITS(re, gb, 6) + 1; |
179 | 18.3k | SKIP_BITS(re, gb, 6); |
180 | 18.3k | level = SHOW_UBITS(re, gb, 12) - 2048; |
181 | 18.3k | LAST_SKIP_BITS(re, gb, 12); |
182 | | |
183 | 18.3k | i += run; |
184 | 18.3k | if (i > MAX_INDEX) |
185 | 533 | return AVERROR_INVALIDDATA; |
186 | 18.3k | } |
187 | | |
188 | 948k | block[scantable[i]] = (level * quant_matrix[i]) >> 4; |
189 | 948k | } |
190 | 206k | CLOSE_READER(re, gb); |
191 | 206k | } |
192 | | |
193 | 0 | s->idsp.idct_put(dest, linesize, block); |
194 | | |
195 | 206k | return 0; |
196 | 227k | } |
197 | | |
198 | | static int decode_speedhq_border(const SHQContext *s, GetBitContext *gb, AVFrame *frame, int field_number, int line_stride) |
199 | 5.28k | { |
200 | 5.28k | int linesize_y = frame->linesize[0] * line_stride; |
201 | 5.28k | int linesize_cb = frame->linesize[1] * line_stride; |
202 | 5.28k | int linesize_cr = frame->linesize[2] * line_stride; |
203 | 5.28k | int linesize_a; |
204 | 5.28k | int ret; |
205 | | |
206 | 5.28k | if (s->alpha_type != SHQ_NO_ALPHA) |
207 | 3.41k | linesize_a = frame->linesize[3] * line_stride; |
208 | | |
209 | 10.4k | for (int y = 0; y < frame->height; y += 16 * line_stride) { |
210 | 9.94k | int last_dc[4] = { 1024, 1024, 1024, 1024 }; |
211 | 9.94k | uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a; |
212 | 9.94k | uint8_t last_alpha[16]; |
213 | 9.94k | int x = frame->width - 8; |
214 | | |
215 | 9.94k | dest_y = frame->data[0] + frame->linesize[0] * (y + field_number) + x; |
216 | 9.94k | if (s->subsampling == SHQ_SUBSAMPLING_420) { |
217 | 3.04k | dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number) + x / 2; |
218 | 3.04k | dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number) + x / 2; |
219 | 6.90k | } else { |
220 | 6.90k | av_assert2(s->subsampling == SHQ_SUBSAMPLING_422); |
221 | 6.90k | dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number) + x / 2; |
222 | 6.90k | dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number) + x / 2; |
223 | 6.90k | } |
224 | 9.94k | if (s->alpha_type != SHQ_NO_ALPHA) { |
225 | 6.07k | memset(last_alpha, 255, sizeof(last_alpha)); |
226 | 6.07k | dest_a = frame->data[3] + frame->linesize[3] * (y + field_number) + x; |
227 | 6.07k | } |
228 | | |
229 | 9.94k | if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y, linesize_y)) < 0) |
230 | 568 | return ret; |
231 | 9.37k | if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8, linesize_y)) < 0) |
232 | 549 | return ret; |
233 | 8.82k | if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0) |
234 | 632 | return ret; |
235 | 8.19k | if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0) |
236 | 385 | return ret; |
237 | 7.80k | if ((ret = decode_dct_block(s, gb, last_dc, 1, dest_cb, linesize_cb)) < 0) |
238 | 368 | return ret; |
239 | 7.44k | if ((ret = decode_dct_block(s, gb, last_dc, 2, dest_cr, linesize_cr)) < 0) |
240 | 336 | return ret; |
241 | | |
242 | 7.10k | if (s->subsampling != SHQ_SUBSAMPLING_420) { |
243 | 4.85k | if ((ret = decode_dct_block(s, gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0) |
244 | 311 | return ret; |
245 | 4.53k | if ((ret = decode_dct_block(s, gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0) |
246 | 280 | return ret; |
247 | 4.53k | } |
248 | | |
249 | 6.51k | if (s->alpha_type == SHQ_RLE_ALPHA) { |
250 | | /* Alpha coded using 16x8 RLE blocks. */ |
251 | 2.42k | if ((ret = decode_alpha_block(s, gb, last_alpha, dest_a, linesize_a)) < 0) |
252 | 251 | return ret; |
253 | 2.17k | if ((ret = decode_alpha_block(s, gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0) |
254 | 229 | return ret; |
255 | 4.08k | } else if (s->alpha_type == SHQ_DCT_ALPHA) { |
256 | | /* Alpha encoded exactly like luma. */ |
257 | 1.94k | if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a, linesize_a)) < 0) |
258 | 230 | return ret; |
259 | 1.71k | if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8, linesize_a)) < 0) |
260 | 220 | return ret; |
261 | 1.49k | if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0) |
262 | 199 | return ret; |
263 | 1.29k | if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0) |
264 | 204 | return ret; |
265 | 1.29k | } |
266 | 6.51k | } |
267 | | |
268 | 526 | return 0; |
269 | 5.28k | } |
270 | | |
271 | | static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf_size, AVFrame *frame, int field_number, int start, int end, int line_stride, int slice_number) |
272 | 339k | { |
273 | 339k | int ret, x, y, slice_offsets[5]; |
274 | 339k | uint32_t slice_begin, slice_end; |
275 | 339k | int linesize_y = frame->linesize[0] * line_stride; |
276 | 339k | int linesize_cb = frame->linesize[1] * line_stride; |
277 | 339k | int linesize_cr = frame->linesize[2] * line_stride; |
278 | 339k | int linesize_a; |
279 | 339k | GetBitContext gb; |
280 | | |
281 | 339k | if (s->alpha_type != SHQ_NO_ALPHA) |
282 | 289k | linesize_a = frame->linesize[3] * line_stride; |
283 | | |
284 | 339k | if (end < start || end - start < 3 || end > buf_size) |
285 | 197k | return AVERROR_INVALIDDATA; |
286 | | |
287 | 142k | slice_offsets[0] = start; |
288 | 142k | slice_offsets[4] = end; |
289 | 252k | for (x = 1; x < 4; x++) { |
290 | 217k | uint32_t last_offset, slice_len; |
291 | | |
292 | 217k | last_offset = slice_offsets[x - 1]; |
293 | 217k | slice_len = AV_RL24(buf + last_offset); |
294 | 217k | slice_offsets[x] = last_offset + slice_len; |
295 | | |
296 | 217k | if (slice_len < 3 || slice_offsets[x] > end - 3) |
297 | 107k | return AVERROR_INVALIDDATA; |
298 | 217k | } |
299 | | |
300 | 34.8k | slice_begin = slice_offsets[slice_number]; |
301 | 34.8k | slice_end = slice_offsets[slice_number + 1]; |
302 | | |
303 | 34.8k | if ((ret = init_get_bits8(&gb, buf + slice_begin + 3, slice_end - slice_begin - 3)) < 0) |
304 | 0 | return ret; |
305 | | |
306 | 241k | for (y = slice_number * 16 * line_stride; y < frame->height; y += line_stride * 64) { |
307 | 223k | uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a; |
308 | 223k | int last_dc[4] = { 1024, 1024, 1024, 1024 }; |
309 | 223k | uint8_t last_alpha[16]; |
310 | | |
311 | 223k | memset(last_alpha, 255, sizeof(last_alpha)); |
312 | | |
313 | 223k | dest_y = frame->data[0] + frame->linesize[0] * (y + field_number); |
314 | 223k | if (s->subsampling == SHQ_SUBSAMPLING_420) { |
315 | 40.3k | dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number); |
316 | 40.3k | dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number); |
317 | 183k | } else { |
318 | 183k | dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number); |
319 | 183k | dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number); |
320 | 183k | } |
321 | 223k | if (s->alpha_type != SHQ_NO_ALPHA) { |
322 | 137k | dest_a = frame->data[3] + frame->linesize[3] * (y + field_number); |
323 | 137k | } |
324 | | |
325 | 235k | for (x = 0; x < frame->width - 8 * (s->subsampling != SHQ_SUBSAMPLING_444); x += 16) { |
326 | | /* Decode the four luma blocks. */ |
327 | 28.6k | if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y, linesize_y)) < 0) |
328 | 9.28k | return ret; |
329 | 19.3k | if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8, linesize_y)) < 0) |
330 | 512 | return ret; |
331 | 18.8k | if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0) |
332 | 634 | return ret; |
333 | 18.1k | if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0) |
334 | 702 | return ret; |
335 | | |
336 | | /* |
337 | | * Decode the first chroma block. For 4:2:0, this is the only one; |
338 | | * for 4:2:2, it's the top block; for 4:4:4, it's the top-left block. |
339 | | */ |
340 | 17.4k | if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb, linesize_cb)) < 0) |
341 | 879 | return ret; |
342 | 16.6k | if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr, linesize_cr)) < 0) |
343 | 629 | return ret; |
344 | | |
345 | 15.9k | if (s->subsampling != SHQ_SUBSAMPLING_420) { |
346 | | /* For 4:2:2, this is the bottom block; for 4:4:4, it's the bottom-left block. */ |
347 | 11.8k | if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0) |
348 | 401 | return ret; |
349 | 11.4k | if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0) |
350 | 522 | return ret; |
351 | | |
352 | 10.9k | if (s->subsampling == SHQ_SUBSAMPLING_444) { |
353 | | /* Top-right and bottom-right blocks. */ |
354 | 3.43k | if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8, linesize_cb)) < 0) |
355 | 369 | return ret; |
356 | 3.06k | if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8, linesize_cr)) < 0) |
357 | 331 | return ret; |
358 | 2.73k | if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb + 8, linesize_cb)) < 0) |
359 | 239 | return ret; |
360 | 2.49k | if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr + 8, linesize_cr)) < 0) |
361 | 331 | return ret; |
362 | | |
363 | 2.16k | dest_cb += 8; |
364 | 2.16k | dest_cr += 8; |
365 | 2.16k | } |
366 | 10.9k | } |
367 | 13.7k | dest_y += 16; |
368 | 13.7k | dest_cb += 8; |
369 | 13.7k | dest_cr += 8; |
370 | | |
371 | 13.7k | if (s->alpha_type == SHQ_RLE_ALPHA) { |
372 | | /* Alpha coded using 16x8 RLE blocks. */ |
373 | 5.07k | if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a, linesize_a)) < 0) |
374 | 796 | return ret; |
375 | 4.27k | if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0) |
376 | 374 | return ret; |
377 | 3.90k | dest_a += 16; |
378 | 8.72k | } else if (s->alpha_type == SHQ_DCT_ALPHA) { |
379 | | /* Alpha encoded exactly like luma. */ |
380 | 2.04k | if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a, linesize_a)) < 0) |
381 | 473 | return ret; |
382 | 1.56k | if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8, linesize_a)) < 0) |
383 | 462 | return ret; |
384 | 1.10k | if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0) |
385 | 229 | return ret; |
386 | 877 | if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0) |
387 | 259 | return ret; |
388 | 618 | dest_a += 16; |
389 | 618 | } |
390 | 13.7k | } |
391 | 223k | } |
392 | | |
393 | 17.4k | if (s->subsampling != SHQ_SUBSAMPLING_444 && (frame->width & 15) && slice_number == 3) |
394 | 5.28k | return decode_speedhq_border(s, &gb, frame, field_number, line_stride); |
395 | | |
396 | 12.1k | return 0; |
397 | 17.4k | } |
398 | | |
399 | | static int decode_slice_progressive(AVCodecContext *avctx, void *arg, int jobnr, int threadnr) |
400 | 65.9k | { |
401 | 65.9k | SHQContext *s = avctx->priv_data; |
402 | 65.9k | (void)threadnr; |
403 | | |
404 | 65.9k | return decode_speedhq_field(avctx->priv_data, s->avpkt->data, s->avpkt->size, arg, 0, 4, s->avpkt->size, 1, jobnr); |
405 | 65.9k | } |
406 | | |
407 | | static int decode_slice_interlaced(AVCodecContext *avctx, void *arg, int jobnr, int threadnr) |
408 | 274k | { |
409 | 274k | SHQContext *s = avctx->priv_data; |
410 | 274k | int field_number = jobnr / 4; |
411 | 274k | int slice_number = jobnr % 4; |
412 | 274k | (void)threadnr; |
413 | | |
414 | 274k | if (field_number == 0) |
415 | 137k | return decode_speedhq_field(avctx->priv_data, s->avpkt->data, s->avpkt->size, arg, 0, 4, s->second_field_offset, 2, slice_number); |
416 | 137k | else |
417 | 137k | return decode_speedhq_field(avctx->priv_data, s->avpkt->data, s->avpkt->size, arg, 1, s->second_field_offset, s->avpkt->size, 2, slice_number); |
418 | 274k | } |
419 | | |
420 | | static void compute_quant_matrix(int *output, int qscale) |
421 | 52.3k | { |
422 | 52.3k | int i; |
423 | 3.40M | for (i = 0; i < 64; i++) output[i] = unscaled_quant_matrix[ff_zigzag_direct[i]] * qscale; |
424 | 52.3k | } |
425 | | |
426 | | static int speedhq_decode_frame(AVCodecContext *avctx, AVFrame *frame, |
427 | | int *got_frame, AVPacket *avpkt) |
428 | 259k | { |
429 | 259k | SHQContext * const s = avctx->priv_data; |
430 | 259k | const uint8_t *buf = avpkt->data; |
431 | 259k | int buf_size = avpkt->size; |
432 | 259k | uint8_t quality; |
433 | 259k | int ret; |
434 | | |
435 | 259k | if (buf_size < 4 || avctx->width < 8 || avctx->width % 8 != 0) |
436 | 171k | return AVERROR_INVALIDDATA; |
437 | 87.4k | if (buf_size < avctx->width*avctx->height / 64 / 4) |
438 | 14.0k | return AVERROR_INVALIDDATA; |
439 | | |
440 | 73.4k | quality = buf[0]; |
441 | 73.4k | if (quality >= 100) { |
442 | 817 | return AVERROR_INVALIDDATA; |
443 | 817 | } |
444 | | |
445 | 72.6k | if (avctx->skip_frame >= AVDISCARD_ALL) |
446 | 20.3k | return avpkt->size; |
447 | | |
448 | 52.3k | compute_quant_matrix(s->quant_matrix, 100 - quality); |
449 | | |
450 | 52.3k | s->second_field_offset = AV_RL24(buf + 1); |
451 | 52.3k | if (s->second_field_offset >= buf_size - 3) { |
452 | 1.60k | return AVERROR_INVALIDDATA; |
453 | 1.60k | } |
454 | | |
455 | 50.7k | avctx->coded_width = FFALIGN(avctx->width, 16); |
456 | 50.7k | avctx->coded_height = FFALIGN(avctx->height, 16); |
457 | | |
458 | 50.7k | if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0) { |
459 | 0 | return ret; |
460 | 0 | } |
461 | | |
462 | 50.7k | s->avpkt = avpkt; |
463 | | |
464 | 50.7k | if (s->second_field_offset == 4 || s->second_field_offset == (buf_size-4)) { |
465 | | /* |
466 | | * Overlapping first and second fields is used to signal |
467 | | * encoding only a single field. In this case, "height" |
468 | | * is ambiguous; it could mean either the height of the |
469 | | * frame as a whole, or of the field. The former would make |
470 | | * more sense for compatibility with legacy decoders, |
471 | | * but this matches the convention used in NDI, which is |
472 | | * the primary user of this trick. |
473 | | */ |
474 | 16.4k | if ((ret = avctx->execute2(avctx, decode_slice_progressive, frame, NULL, 4)) < 0) |
475 | 0 | return ret; |
476 | 34.2k | } else { |
477 | 34.2k | if ((ret = avctx->execute2(avctx, decode_slice_interlaced, frame, NULL, 8)) < 0) |
478 | 0 | return ret; |
479 | 34.2k | } |
480 | | |
481 | 50.7k | *got_frame = 1; |
482 | 50.7k | return buf_size; |
483 | 50.7k | } |
484 | | |
485 | | /* |
486 | | * Alpha VLC. Run and level are independently coded, and would be |
487 | | * outside the default limits for MAX_RUN/MAX_LEVEL, so we don't |
488 | | * bother with combining them into one table. |
489 | | */ |
490 | | static av_cold void compute_alpha_vlcs(void) |
491 | 1 | { |
492 | 1 | uint16_t run_code[134], level_code[266]; |
493 | 1 | uint8_t run_bits[134], level_bits[266]; |
494 | 1 | int16_t run_symbols[134], level_symbols[266]; |
495 | 1 | int entry, i, sign; |
496 | | |
497 | | /* Initialize VLC for alpha run. */ |
498 | 1 | entry = 0; |
499 | | |
500 | | /* 0 -> 0. */ |
501 | 1 | run_code[entry] = 0; |
502 | 1 | run_bits[entry] = 1; |
503 | 1 | run_symbols[entry] = 0; |
504 | 1 | ++entry; |
505 | | |
506 | | /* 10xx -> xx plus 1. */ |
507 | 5 | for (i = 0; i < 4; ++i) { |
508 | 4 | run_code[entry] = (i << 2) | 1; |
509 | 4 | run_bits[entry] = 4; |
510 | 4 | run_symbols[entry] = i + 1; |
511 | 4 | ++entry; |
512 | 4 | } |
513 | | |
514 | | /* 111xxxxxxx -> xxxxxxx. */ |
515 | 129 | for (i = 0; i < 128; ++i) { |
516 | 128 | run_code[entry] = (i << 3) | 7; |
517 | 128 | run_bits[entry] = 10; |
518 | 128 | run_symbols[entry] = i; |
519 | 128 | ++entry; |
520 | 128 | } |
521 | | |
522 | | /* 110 -> EOB. */ |
523 | 1 | run_code[entry] = 3; |
524 | 1 | run_bits[entry] = 3; |
525 | 1 | run_symbols[entry] = -1; |
526 | 1 | ++entry; |
527 | | |
528 | 1 | av_assert0(entry == FF_ARRAY_ELEMS(run_code)); |
529 | | |
530 | 1 | VLC_INIT_STATIC_SPARSE_TABLE(dc_alpha_run_vlc_le, ALPHA_VLC_BITS, |
531 | 1 | FF_ARRAY_ELEMS(run_code), |
532 | 1 | run_bits, 1, 1, |
533 | 1 | run_code, 2, 2, |
534 | 1 | run_symbols, 2, 2, VLC_INIT_LE); |
535 | | |
536 | | /* Initialize VLC for alpha level. */ |
537 | 1 | entry = 0; |
538 | | |
539 | 3 | for (sign = 0; sign <= 1; ++sign) { |
540 | | /* 1s -> -1 or +1 (depending on sign bit). */ |
541 | 2 | level_code[entry] = (sign << 1) | 1; |
542 | 2 | level_bits[entry] = 2; |
543 | 2 | level_symbols[entry] = sign ? -1 : 1; |
544 | 2 | ++entry; |
545 | | |
546 | | /* 01sxx -> xx plus 2 (2..5 or -2..-5, depending on sign bit). */ |
547 | 10 | for (i = 0; i < 4; ++i) { |
548 | 8 | level_code[entry] = (i << 3) | (sign << 2) | 2; |
549 | 8 | level_bits[entry] = 5; |
550 | 8 | level_symbols[entry] = sign ? -(i + 2) : (i + 2); |
551 | 8 | ++entry; |
552 | 8 | } |
553 | 2 | } |
554 | | |
555 | | /* |
556 | | * 00xxxxxxxx -> xxxxxxxx, in two's complement. There are many codes |
557 | | * here that would better be encoded in other ways (e.g. 0 would be |
558 | | * encoded by increasing run, and +/- 1 would be encoded with a |
559 | | * shorter code), but it doesn't hurt to allow everything. |
560 | | */ |
561 | 257 | for (i = 0; i < 256; ++i) { |
562 | 256 | level_code[entry] = i << 2; |
563 | 256 | level_bits[entry] = 10; |
564 | 256 | level_symbols[entry] = i; |
565 | 256 | ++entry; |
566 | 256 | } |
567 | | |
568 | 1 | av_assert0(entry == FF_ARRAY_ELEMS(level_code)); |
569 | | |
570 | 1 | VLC_INIT_STATIC_SPARSE_TABLE(dc_alpha_level_vlc_le, ALPHA_VLC_BITS, |
571 | 1 | FF_ARRAY_ELEMS(level_code), |
572 | 1 | level_bits, 1, 1, |
573 | 1 | level_code, 2, 2, |
574 | 1 | level_symbols, 2, 2, VLC_INIT_LE); |
575 | 1 | } |
576 | | |
577 | | static av_cold void speedhq_static_init(void) |
578 | 1 | { |
579 | | /* Exactly the same as MPEG-2, except for a little-endian reader. */ |
580 | 1 | VLC_INIT_STATIC_TABLE(dc_lum_vlc_le, DC_VLC_BITS, 12, |
581 | 1 | ff_mpeg12_vlc_dc_lum_bits, 1, 1, |
582 | 1 | ff_mpeg12_vlc_dc_lum_code, 2, 2, |
583 | 1 | VLC_INIT_OUTPUT_LE); |
584 | 1 | VLC_INIT_STATIC_TABLE(dc_chroma_vlc_le, DC_VLC_BITS, 12, |
585 | 1 | ff_mpeg12_vlc_dc_chroma_bits, 1, 1, |
586 | 1 | ff_mpeg12_vlc_dc_chroma_code, 2, 2, |
587 | 1 | VLC_INIT_OUTPUT_LE); |
588 | | |
589 | 1 | ff_init_2d_vlc_rl(ff_speedhq_vlc_table, speedhq_rl_vlc, ff_speedhq_run, |
590 | 1 | ff_speedhq_level, SPEEDHQ_RL_NB_ELEMS, |
591 | 1 | FF_ARRAY_ELEMS(speedhq_rl_vlc), VLC_INIT_LE); |
592 | | |
593 | 1 | compute_alpha_vlcs(); |
594 | 1 | } |
595 | | |
596 | | static av_cold int speedhq_decode_init(AVCodecContext *avctx) |
597 | 1.72k | { |
598 | 1.72k | int ret; |
599 | 1.72k | static AVOnce init_once = AV_ONCE_INIT; |
600 | 1.72k | SHQContext * const s = avctx->priv_data; |
601 | | |
602 | 1.72k | ret = ff_thread_once(&init_once, speedhq_static_init); |
603 | 1.72k | if (ret) |
604 | 0 | return AVERROR_UNKNOWN; |
605 | | |
606 | 1.72k | ff_blockdsp_init(&s->bdsp); |
607 | 1.72k | ff_idctdsp_init(&s->idsp, avctx); |
608 | 1.72k | ff_permute_scantable(s->permutated_intra_scantable, ff_zigzag_direct, |
609 | 1.72k | s->idsp.idct_permutation); |
610 | | |
611 | 1.72k | switch (avctx->codec_tag) { |
612 | 257 | case MKTAG('S', 'H', 'Q', '0'): |
613 | 257 | s->subsampling = SHQ_SUBSAMPLING_420; |
614 | 257 | s->alpha_type = SHQ_NO_ALPHA; |
615 | 257 | avctx->pix_fmt = AV_PIX_FMT_YUV420P; |
616 | 257 | break; |
617 | 221 | case MKTAG('S', 'H', 'Q', '1'): |
618 | 221 | s->subsampling = SHQ_SUBSAMPLING_420; |
619 | 221 | s->alpha_type = SHQ_RLE_ALPHA; |
620 | 221 | avctx->pix_fmt = AV_PIX_FMT_YUVA420P; |
621 | 221 | break; |
622 | 250 | case MKTAG('S', 'H', 'Q', '2'): |
623 | 250 | s->subsampling = SHQ_SUBSAMPLING_422; |
624 | 250 | s->alpha_type = SHQ_NO_ALPHA; |
625 | 250 | avctx->pix_fmt = AV_PIX_FMT_YUV422P; |
626 | 250 | break; |
627 | 384 | case MKTAG('S', 'H', 'Q', '3'): |
628 | 384 | s->subsampling = SHQ_SUBSAMPLING_422; |
629 | 384 | s->alpha_type = SHQ_RLE_ALPHA; |
630 | 384 | avctx->pix_fmt = AV_PIX_FMT_YUVA422P; |
631 | 384 | break; |
632 | 168 | case MKTAG('S', 'H', 'Q', '4'): |
633 | 168 | s->subsampling = SHQ_SUBSAMPLING_444; |
634 | 168 | s->alpha_type = SHQ_NO_ALPHA; |
635 | 168 | avctx->pix_fmt = AV_PIX_FMT_YUV444P; |
636 | 168 | break; |
637 | 77 | case MKTAG('S', 'H', 'Q', '5'): |
638 | 77 | s->subsampling = SHQ_SUBSAMPLING_444; |
639 | 77 | s->alpha_type = SHQ_RLE_ALPHA; |
640 | 77 | avctx->pix_fmt = AV_PIX_FMT_YUVA444P; |
641 | 77 | break; |
642 | 140 | case MKTAG('S', 'H', 'Q', '7'): |
643 | 140 | s->subsampling = SHQ_SUBSAMPLING_422; |
644 | 140 | s->alpha_type = SHQ_DCT_ALPHA; |
645 | 140 | avctx->pix_fmt = AV_PIX_FMT_YUVA422P; |
646 | 140 | break; |
647 | 35 | case MKTAG('S', 'H', 'Q', '9'): |
648 | 35 | s->subsampling = SHQ_SUBSAMPLING_444; |
649 | 35 | s->alpha_type = SHQ_DCT_ALPHA; |
650 | 35 | avctx->pix_fmt = AV_PIX_FMT_YUVA444P; |
651 | 35 | break; |
652 | 188 | default: |
653 | 188 | av_log(avctx, AV_LOG_ERROR, "Unknown NewTek SpeedHQ FOURCC provided (%08X)\n", |
654 | 188 | avctx->codec_tag); |
655 | 188 | return AVERROR_INVALIDDATA; |
656 | 1.72k | } |
657 | | |
658 | | /* This matches what NDI's RGB -> Y'CbCr 4:2:2 converter uses. */ |
659 | 1.53k | avctx->colorspace = AVCOL_SPC_BT470BG; |
660 | 1.53k | avctx->chroma_sample_location = AVCHROMA_LOC_CENTER; |
661 | | |
662 | 1.53k | return 0; |
663 | 1.72k | } |
664 | | |
665 | | const FFCodec ff_speedhq_decoder = { |
666 | | .p.name = "speedhq", |
667 | | CODEC_LONG_NAME("NewTek SpeedHQ"), |
668 | | .p.type = AVMEDIA_TYPE_VIDEO, |
669 | | .p.id = AV_CODEC_ID_SPEEDHQ, |
670 | | .priv_data_size = sizeof(SHQContext), |
671 | | .init = speedhq_decode_init, |
672 | | FF_CODEC_DECODE_CB(speedhq_decode_frame), |
673 | | .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS, |
674 | | }; |