/src/ffmpeg/libavcodec/dxvenc.c
Line | Count | Source |
1 | | /* |
2 | | * Resolume DXV encoder |
3 | | * Copyright (C) 2024 Emma Worley <emma@emma.gg> |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | #include <stdint.h> |
23 | | |
24 | | #include "libavcodec/hashtable.h" |
25 | | #include "libavutil/imgutils.h" |
26 | | #include "libavutil/mem.h" |
27 | | #include "libavutil/opt.h" |
28 | | |
29 | | #include "bytestream.h" |
30 | | #include "codec_internal.h" |
31 | | #include "dxv.h" |
32 | | #include "encode.h" |
33 | | #include "texturedsp.h" |
34 | | |
35 | 8.57k | #define DXV_HEADER_LENGTH 12 |
36 | | |
37 | | /* |
38 | | * Resolume will refuse to display frames that are not padded to 16x16 pixels. |
39 | | */ |
40 | 145k | #define DXV_ALIGN(x) FFALIGN(x, 16) |
41 | | |
42 | | /* |
43 | | * DXV uses LZ-like back-references to avoid copying words that have already |
44 | | * appeared in the decompressed stream. Using a simple hash table (HT) |
45 | | * significantly speeds up the lookback process while encoding. |
46 | | */ |
47 | 1.98k | #define LOOKBACK_HT_ELEMS 0x20202 |
48 | 40.1M | #define LOOKBACK_WORDS 0x20202 |
49 | | |
50 | | typedef struct DXVEncContext { |
51 | | AVClass *class; |
52 | | |
53 | | PutByteContext pbc; |
54 | | |
55 | | uint8_t *tex_data; // Compressed texture |
56 | | int64_t tex_size; // Texture size |
57 | | |
58 | | /* Optimal number of slices for parallel decoding */ |
59 | | int slice_count; |
60 | | |
61 | | TextureDSPThreadContext enc; |
62 | | |
63 | | DXVTextureFormat tex_fmt; |
64 | | int (*compress_tex)(AVCodecContext *avctx); |
65 | | |
66 | | FFHashtableContext *color_ht; |
67 | | FFHashtableContext *lut_ht; |
68 | | FFHashtableContext *combo_ht; |
69 | | } DXVEncContext; |
70 | | |
71 | | /* Converts an index offset value to a 2-bit opcode and pushes it to a stream. |
72 | | * Inverse of CHECKPOINT in dxv.c. */ |
73 | | #define PUSH_OP(x) \ |
74 | 14.1M | do { \ |
75 | 14.1M | if (state == 16) { \ |
76 | 888k | if (bytestream2_get_bytes_left_p(pbc) < 4) { \ |
77 | 0 | return AVERROR_INVALIDDATA; \ |
78 | 0 | } \ |
79 | 888k | value = pbc->buffer; \ |
80 | 888k | bytestream2_put_le32(pbc, 0); \ |
81 | 888k | state = 0; \ |
82 | 888k | } \ |
83 | 14.1M | if (idx >= 0x102 * x) { \ |
84 | 79.6k | op = 3; \ |
85 | 79.6k | bytestream2_put_le16(pbc, (idx / x) - 0x102); \ |
86 | 14.0M | } else if (idx >= 2 * x) { \ |
87 | 247k | op = 2; \ |
88 | 247k | bytestream2_put_byte(pbc, (idx / x) - 2); \ |
89 | 13.7M | } else if (idx == x) { \ |
90 | 12.9M | op = 1; \ |
91 | 12.9M | } else { \ |
92 | 821k | op = 0; \ |
93 | 821k | } \ |
94 | 14.1M | AV_WL32(value, AV_RL32(value) | (op << (state * 2))); \ |
95 | 14.1M | state++; \ |
96 | 14.1M | } while (0) |
97 | | |
98 | | static int dxv_compress_dxt1(AVCodecContext *avctx) |
99 | 8.57k | { |
100 | 8.57k | DXVEncContext *ctx = avctx->priv_data; |
101 | 8.57k | PutByteContext *pbc = &ctx->pbc; |
102 | 8.57k | void *value; |
103 | 8.57k | uint32_t idx, combo_idx, prev_pos, old_pos, state = 16, pos = 0, op = 0; |
104 | | |
105 | 8.57k | ff_hashtable_clear(ctx->color_ht); |
106 | 8.57k | ff_hashtable_clear(ctx->lut_ht); |
107 | 8.57k | ff_hashtable_clear(ctx->combo_ht); |
108 | | |
109 | 8.57k | ff_hashtable_set(ctx->combo_ht, ctx->tex_data, &pos); |
110 | | |
111 | 8.57k | bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data)); |
112 | 8.57k | ff_hashtable_set(ctx->color_ht, ctx->tex_data, &pos); |
113 | 8.57k | pos++; |
114 | 8.57k | bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data + 4)); |
115 | 8.57k | ff_hashtable_set(ctx->lut_ht, ctx->tex_data + 4, &pos); |
116 | 8.57k | pos++; |
117 | | |
118 | 13.3M | while (pos + 2 <= ctx->tex_size / 4) { |
119 | 13.3M | combo_idx = ff_hashtable_get(ctx->combo_ht, ctx->tex_data + pos * 4, &prev_pos) ? pos - prev_pos : 0; |
120 | 13.3M | idx = combo_idx; |
121 | 13.3M | PUSH_OP(2); |
122 | 13.3M | if (pos >= LOOKBACK_WORDS) { |
123 | 0 | old_pos = pos - LOOKBACK_WORDS; |
124 | 0 | if (ff_hashtable_get(ctx->combo_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos) |
125 | 0 | ff_hashtable_delete(ctx->combo_ht, ctx->tex_data + old_pos * 4); |
126 | 0 | } |
127 | 13.3M | ff_hashtable_set(ctx->combo_ht, ctx->tex_data + pos * 4, &pos); |
128 | | |
129 | 13.3M | if (!combo_idx) { |
130 | 367k | idx = ff_hashtable_get(ctx->color_ht, ctx->tex_data + pos * 4, &prev_pos) ? pos - prev_pos : 0; |
131 | 367k | PUSH_OP(2); |
132 | 367k | if (!idx) |
133 | 328k | bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data + pos * 4)); |
134 | 367k | } |
135 | 13.3M | if (pos >= LOOKBACK_WORDS) { |
136 | 0 | old_pos = pos - LOOKBACK_WORDS; |
137 | 0 | if (ff_hashtable_get(ctx->color_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos) |
138 | 0 | ff_hashtable_delete(ctx->color_ht, ctx->tex_data + old_pos * 4); |
139 | 0 | } |
140 | 13.3M | ff_hashtable_set(ctx->color_ht, ctx->tex_data + pos * 4, &pos); |
141 | 13.3M | pos++; |
142 | | |
143 | 13.3M | if (!combo_idx) { |
144 | 367k | idx = ff_hashtable_get(ctx->lut_ht, ctx->tex_data + pos * 4, &prev_pos) ? pos - prev_pos : 0; |
145 | 367k | PUSH_OP(2); |
146 | 367k | if (!idx) |
147 | 125k | bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data + pos * 4)); |
148 | 367k | } |
149 | 13.3M | if (pos >= LOOKBACK_WORDS) { |
150 | 0 | old_pos = pos - LOOKBACK_WORDS; |
151 | 0 | if (ff_hashtable_get(ctx->lut_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos) |
152 | 0 | ff_hashtable_delete(ctx->lut_ht, ctx->tex_data + old_pos * 4); |
153 | 0 | } |
154 | 13.3M | ff_hashtable_set(ctx->lut_ht, ctx->tex_data + pos * 4, &pos); |
155 | 13.3M | pos++; |
156 | 13.3M | } |
157 | | |
158 | 8.57k | return 0; |
159 | 8.57k | } |
160 | | |
161 | | static int dxv_encode(AVCodecContext *avctx, AVPacket *pkt, |
162 | | const AVFrame *frame, int *got_packet) |
163 | 8.57k | { |
164 | 8.57k | DXVEncContext *ctx = avctx->priv_data; |
165 | 8.57k | PutByteContext *pbc = &ctx->pbc; |
166 | 8.57k | int ret; |
167 | | |
168 | | /* unimplemented: needs to depend on compression ratio of tex format */ |
169 | | /* under DXT1, we need 3 words to encode load ops for 32 words. |
170 | | * the first 2 words of the texture do not need load ops. */ |
171 | 8.57k | ret = ff_alloc_packet(avctx, pkt, DXV_HEADER_LENGTH + ctx->tex_size + AV_CEIL_RSHIFT(ctx->tex_size - 8, 7) * 12); |
172 | 8.57k | if (ret < 0) |
173 | 0 | return ret; |
174 | | |
175 | 8.57k | if (ctx->enc.tex_funct) { |
176 | 8.57k | uint8_t *safe_data[4] = {frame->data[0], 0, 0, 0}; |
177 | 8.57k | int safe_linesize[4] = {frame->linesize[0], 0, 0, 0}; |
178 | | |
179 | 8.57k | if (avctx->width != DXV_ALIGN(avctx->width) || avctx->height != DXV_ALIGN(avctx->height)) { |
180 | 7.70k | ret = av_image_alloc( |
181 | 7.70k | safe_data, |
182 | 7.70k | safe_linesize, |
183 | 7.70k | DXV_ALIGN(avctx->width), |
184 | 7.70k | DXV_ALIGN(avctx->height), |
185 | 7.70k | avctx->pix_fmt, |
186 | 7.70k | 1); |
187 | 7.70k | if (ret < 0) |
188 | 0 | return ret; |
189 | | |
190 | 7.70k | av_image_copy2( |
191 | 7.70k | safe_data, |
192 | 7.70k | safe_linesize, |
193 | 7.70k | frame->data, |
194 | 7.70k | frame->linesize, |
195 | 7.70k | avctx->pix_fmt, |
196 | 7.70k | avctx->width, |
197 | 7.70k | avctx->height); |
198 | | |
199 | 7.70k | if (avctx->width != DXV_ALIGN(avctx->width)) { |
200 | 7.39k | av_assert0(frame->format == AV_PIX_FMT_RGBA); |
201 | 1.58M | for (int y = 0; y < avctx->height; y++) { |
202 | 1.58M | memset(safe_data[0] + y * safe_linesize[0] + 4*avctx->width, 0, safe_linesize[0] - 4*avctx->width); |
203 | 1.58M | } |
204 | 7.39k | } |
205 | 7.70k | if (avctx->height != DXV_ALIGN(avctx->height)) { |
206 | 85.5k | for (int y = avctx->height; y < DXV_ALIGN(avctx->height); y++) { |
207 | 78.8k | memset(safe_data[0] + y * safe_linesize[0], 0, safe_linesize[0]); |
208 | 78.8k | } |
209 | 6.70k | } |
210 | 7.70k | } |
211 | | |
212 | 8.57k | ctx->enc.tex_data.out = ctx->tex_data; |
213 | 8.57k | ctx->enc.frame_data.in = safe_data[0]; |
214 | 8.57k | ctx->enc.stride = safe_linesize[0]; |
215 | 8.57k | ctx->enc.width = DXV_ALIGN(avctx->width); |
216 | 8.57k | ctx->enc.height = DXV_ALIGN(avctx->height); |
217 | 8.57k | ff_texturedsp_exec_compress_threads(avctx, &ctx->enc); |
218 | | |
219 | 8.57k | if (safe_data[0] != frame->data[0]) |
220 | 7.70k | av_freep(&safe_data[0]); |
221 | 8.57k | } else { |
222 | | /* unimplemented: YCoCg formats */ |
223 | 0 | return AVERROR_INVALIDDATA; |
224 | 0 | } |
225 | | |
226 | 8.57k | bytestream2_init_writer(pbc, pkt->data, pkt->size); |
227 | | |
228 | 8.57k | bytestream2_put_le32(pbc, ctx->tex_fmt); |
229 | 8.57k | bytestream2_put_byte(pbc, 4); |
230 | 8.57k | bytestream2_put_byte(pbc, 0); |
231 | 8.57k | bytestream2_put_byte(pbc, 0); |
232 | 8.57k | bytestream2_put_byte(pbc, 0); |
233 | | /* Fill in compressed size later */ |
234 | 8.57k | bytestream2_skip_p(pbc, 4); |
235 | | |
236 | 8.57k | ret = ctx->compress_tex(avctx); |
237 | 8.57k | if (ret < 0) |
238 | 0 | return ret; |
239 | | |
240 | 8.57k | AV_WL32(pkt->data + 8, bytestream2_tell_p(pbc) - DXV_HEADER_LENGTH); |
241 | 8.57k | av_shrink_packet(pkt, bytestream2_tell_p(pbc)); |
242 | | |
243 | 8.57k | *got_packet = 1; |
244 | 8.57k | return 0; |
245 | 8.57k | } |
246 | | |
247 | | static av_cold int dxv_init(AVCodecContext *avctx) |
248 | 662 | { |
249 | 662 | DXVEncContext *ctx = avctx->priv_data; |
250 | 662 | TextureDSPEncContext texdsp; |
251 | 662 | int ret = av_image_check_size(avctx->width, avctx->height, 0, avctx); |
252 | | |
253 | 662 | if (ret < 0) { |
254 | 0 | av_log(avctx, AV_LOG_ERROR, "Invalid image size %dx%d.\n", |
255 | 0 | avctx->width, avctx->height); |
256 | 0 | return ret; |
257 | 0 | } |
258 | | |
259 | 662 | ff_texturedspenc_init(&texdsp); |
260 | | |
261 | 662 | switch (ctx->tex_fmt) { |
262 | 662 | case DXV_FMT_DXT1: |
263 | 662 | ctx->compress_tex = dxv_compress_dxt1; |
264 | 662 | ctx->enc.tex_funct = texdsp.dxt1_block; |
265 | 662 | ctx->enc.tex_ratio = 8; |
266 | 662 | break; |
267 | 0 | default: |
268 | 0 | av_log(avctx, AV_LOG_ERROR, "Invalid format %08X\n", ctx->tex_fmt); |
269 | 0 | return AVERROR_INVALIDDATA; |
270 | 662 | } |
271 | 662 | ctx->enc.raw_ratio = 16; |
272 | 662 | ctx->tex_size = DXV_ALIGN(avctx->width) / TEXTURE_BLOCK_W * |
273 | 662 | DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H * |
274 | 662 | ctx->enc.tex_ratio; |
275 | 662 | ctx->enc.slice_count = av_clip(avctx->thread_count, 1, DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H); |
276 | | |
277 | 662 | ctx->tex_data = av_malloc(ctx->tex_size); |
278 | 662 | if (!ctx->tex_data) { |
279 | 0 | return AVERROR(ENOMEM); |
280 | 0 | } |
281 | | |
282 | 662 | ret = ff_hashtable_alloc(&ctx->color_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS); |
283 | 662 | if (ret < 0) |
284 | 0 | return ret; |
285 | 662 | ret = ff_hashtable_alloc(&ctx->lut_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS); |
286 | 662 | if (ret < 0) |
287 | 0 | return ret; |
288 | 662 | ret = ff_hashtable_alloc(&ctx->combo_ht, sizeof(uint64_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS); |
289 | 662 | if (ret < 0) |
290 | 0 | return ret; |
291 | | |
292 | 662 | return 0; |
293 | 662 | } |
294 | | |
295 | | static av_cold int dxv_close(AVCodecContext *avctx) |
296 | 662 | { |
297 | 662 | DXVEncContext *ctx = avctx->priv_data; |
298 | | |
299 | 662 | av_freep(&ctx->tex_data); |
300 | | |
301 | 662 | ff_hashtable_freep(&ctx->color_ht); |
302 | 662 | ff_hashtable_freep(&ctx->lut_ht); |
303 | 662 | ff_hashtable_freep(&ctx->combo_ht); |
304 | | |
305 | 662 | return 0; |
306 | 662 | } |
307 | | |
308 | | #define OFFSET(x) offsetof(DXVEncContext, x) |
309 | | #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM |
310 | | static const AVOption options[] = { |
311 | | { "format", NULL, OFFSET(tex_fmt), AV_OPT_TYPE_INT, { .i64 = DXV_FMT_DXT1 }, DXV_FMT_DXT1, DXV_FMT_DXT1, FLAGS, .unit = "format" }, |
312 | | { "dxt1", "DXT1 (Normal Quality, No Alpha)", 0, AV_OPT_TYPE_CONST, { .i64 = DXV_FMT_DXT1 }, 0, 0, FLAGS, .unit = "format" }, |
313 | | { NULL }, |
314 | | }; |
315 | | |
316 | | static const AVClass dxvenc_class = { |
317 | | .class_name = "DXV encoder", |
318 | | .option = options, |
319 | | .version = LIBAVUTIL_VERSION_INT, |
320 | | }; |
321 | | |
322 | | const FFCodec ff_dxv_encoder = { |
323 | | .p.name = "dxv", |
324 | | CODEC_LONG_NAME("Resolume DXV"), |
325 | | .p.type = AVMEDIA_TYPE_VIDEO, |
326 | | .p.id = AV_CODEC_ID_DXV, |
327 | | .init = dxv_init, |
328 | | FF_CODEC_ENCODE_CB(dxv_encode), |
329 | | .close = dxv_close, |
330 | | .priv_data_size = sizeof(DXVEncContext), |
331 | | .p.capabilities = AV_CODEC_CAP_DR1 | |
332 | | AV_CODEC_CAP_SLICE_THREADS | |
333 | | AV_CODEC_CAP_FRAME_THREADS, |
334 | | .p.priv_class = &dxvenc_class, |
335 | | CODEC_PIXFMTS(AV_PIX_FMT_RGBA), |
336 | | .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, |
337 | | }; |