Coverage Report

Created: 2024-09-06 07:53

/src/ffmpeg/libavcodec/proresenc_kostya.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Apple ProRes encoder
3
 *
4
 * Copyright (c) 2011 Anatoliy Wasserman
5
 * Copyright (c) 2012 Konstantin Shishkov
6
 *
7
 * This file is part of FFmpeg.
8
 *
9
 * FFmpeg is free software; you can redistribute it and/or
10
 * modify it under the terms of the GNU Lesser General Public
11
 * License as published by the Free Software Foundation; either
12
 * version 2.1 of the License, or (at your option) any later version.
13
 *
14
 * FFmpeg is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 * Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with FFmpeg; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
 */
23
24
#include "libavutil/mem.h"
25
#include "libavutil/mem_internal.h"
26
#include "libavutil/opt.h"
27
#include "libavutil/pixdesc.h"
28
#include "avcodec.h"
29
#include "codec_internal.h"
30
#include "encode.h"
31
#include "fdctdsp.h"
32
#include "put_bits.h"
33
#include "profiles.h"
34
#include "bytestream.h"
35
#include "proresdata.h"
36
37
8.85k
#define CFACTOR_Y422 2
38
923k
#define CFACTOR_Y444 3
39
40
#define MAX_MBS_PER_SLICE 8
41
42
#define MAX_PLANES 4
43
44
enum {
45
    PRORES_PROFILE_AUTO  = -1,
46
    PRORES_PROFILE_PROXY = 0,
47
    PRORES_PROFILE_LT,
48
    PRORES_PROFILE_STANDARD,
49
    PRORES_PROFILE_HQ,
50
    PRORES_PROFILE_4444,
51
    PRORES_PROFILE_4444XQ,
52
};
53
54
enum {
55
    QUANT_MAT_PROXY = 0,
56
    QUANT_MAT_PROXY_CHROMA,
57
    QUANT_MAT_LT,
58
    QUANT_MAT_STANDARD,
59
    QUANT_MAT_HQ,
60
    QUANT_MAT_XQ_LUMA,
61
    QUANT_MAT_DEFAULT,
62
};
63
64
static const uint8_t prores_quant_matrices[][64] = {
65
    { // proxy
66
         4,  7,  9, 11, 13, 14, 15, 63,
67
         7,  7, 11, 12, 14, 15, 63, 63,
68
         9, 11, 13, 14, 15, 63, 63, 63,
69
        11, 11, 13, 14, 63, 63, 63, 63,
70
        11, 13, 14, 63, 63, 63, 63, 63,
71
        13, 14, 63, 63, 63, 63, 63, 63,
72
        13, 63, 63, 63, 63, 63, 63, 63,
73
        63, 63, 63, 63, 63, 63, 63, 63,
74
    },
75
    { // proxy chromas
76
        4,  7,  9, 11, 13, 14, 63, 63,
77
        7,  7, 11, 12, 14, 63, 63, 63,
78
        9, 11, 13, 14, 63, 63, 63, 63,
79
        11, 11, 13, 14, 63, 63, 63, 63,
80
        11, 13, 14, 63, 63, 63, 63, 63,
81
        13, 14, 63, 63, 63, 63, 63, 63,
82
        13, 63, 63, 63, 63, 63, 63, 63,
83
        63, 63, 63, 63, 63, 63, 63, 63
84
    },
85
    { // LT
86
         4,  5,  6,  7,  9, 11, 13, 15,
87
         5,  5,  7,  8, 11, 13, 15, 17,
88
         6,  7,  9, 11, 13, 15, 15, 17,
89
         7,  7,  9, 11, 13, 15, 17, 19,
90
         7,  9, 11, 13, 14, 16, 19, 23,
91
         9, 11, 13, 14, 16, 19, 23, 29,
92
         9, 11, 13, 15, 17, 21, 28, 35,
93
        11, 13, 16, 17, 21, 28, 35, 41,
94
    },
95
    { // standard
96
         4,  4,  5,  5,  6,  7,  7,  9,
97
         4,  4,  5,  6,  7,  7,  9,  9,
98
         5,  5,  6,  7,  7,  9,  9, 10,
99
         5,  5,  6,  7,  7,  9,  9, 10,
100
         5,  6,  7,  7,  8,  9, 10, 12,
101
         6,  7,  7,  8,  9, 10, 12, 15,
102
         6,  7,  7,  9, 10, 11, 14, 17,
103
         7,  7,  9, 10, 11, 14, 17, 21,
104
    },
105
    { // high quality
106
         4,  4,  4,  4,  4,  4,  4,  4,
107
         4,  4,  4,  4,  4,  4,  4,  4,
108
         4,  4,  4,  4,  4,  4,  4,  4,
109
         4,  4,  4,  4,  4,  4,  4,  5,
110
         4,  4,  4,  4,  4,  4,  5,  5,
111
         4,  4,  4,  4,  4,  5,  5,  6,
112
         4,  4,  4,  4,  5,  5,  6,  7,
113
         4,  4,  4,  4,  5,  6,  7,  7,
114
    },
115
    { // XQ luma
116
        2,  2,  2,  2,  2,  2,  2,  2,
117
        2,  2,  2,  2,  2,  2,  2,  2,
118
        2,  2,  2,  2,  2,  2,  2,  2,
119
        2,  2,  2,  2,  2,  2,  2,  3,
120
        2,  2,  2,  2,  2,  2,  3,  3,
121
        2,  2,  2,  2,  2,  3,  3,  3,
122
        2,  2,  2,  2,  3,  3,  3,  4,
123
        2,  2,  2,  2,  3,  3,  4,  4,
124
    },
125
    { // codec default
126
         4,  4,  4,  4,  4,  4,  4,  4,
127
         4,  4,  4,  4,  4,  4,  4,  4,
128
         4,  4,  4,  4,  4,  4,  4,  4,
129
         4,  4,  4,  4,  4,  4,  4,  4,
130
         4,  4,  4,  4,  4,  4,  4,  4,
131
         4,  4,  4,  4,  4,  4,  4,  4,
132
         4,  4,  4,  4,  4,  4,  4,  4,
133
         4,  4,  4,  4,  4,  4,  4,  4,
134
    },
135
};
136
137
1.21k
#define NUM_MB_LIMITS 4
138
static const int prores_mb_limits[NUM_MB_LIMITS] = {
139
    1620, // up to 720x576
140
    2700, // up to 960x720
141
    6075, // up to 1440x1080
142
    9216, // up to 2048x1152
143
};
144
145
static const struct prores_profile {
146
    const char *full_name;
147
    uint32_t    tag;
148
    int         min_quant;
149
    int         max_quant;
150
    int         br_tab[NUM_MB_LIMITS];
151
    int         quant;
152
    int         quant_chroma;
153
} prores_profile_info[6] = {
154
    {
155
        .full_name = "proxy",
156
        .tag       = MKTAG('a', 'p', 'c', 'o'),
157
        .min_quant = 4,
158
        .max_quant = 8,
159
        .br_tab    = { 300, 242, 220, 194 },
160
        .quant     = QUANT_MAT_PROXY,
161
        .quant_chroma = QUANT_MAT_PROXY_CHROMA,
162
    },
163
    {
164
        .full_name = "LT",
165
        .tag       = MKTAG('a', 'p', 'c', 's'),
166
        .min_quant = 1,
167
        .max_quant = 9,
168
        .br_tab    = { 720, 560, 490, 440 },
169
        .quant     = QUANT_MAT_LT,
170
        .quant_chroma = QUANT_MAT_LT,
171
    },
172
    {
173
        .full_name = "standard",
174
        .tag       = MKTAG('a', 'p', 'c', 'n'),
175
        .min_quant = 1,
176
        .max_quant = 6,
177
        .br_tab    = { 1050, 808, 710, 632 },
178
        .quant     = QUANT_MAT_STANDARD,
179
        .quant_chroma = QUANT_MAT_STANDARD,
180
    },
181
    {
182
        .full_name = "high quality",
183
        .tag       = MKTAG('a', 'p', 'c', 'h'),
184
        .min_quant = 1,
185
        .max_quant = 6,
186
        .br_tab    = { 1566, 1216, 1070, 950 },
187
        .quant     = QUANT_MAT_HQ,
188
        .quant_chroma = QUANT_MAT_HQ,
189
    },
190
    {
191
        .full_name = "4444",
192
        .tag       = MKTAG('a', 'p', '4', 'h'),
193
        .min_quant = 1,
194
        .max_quant = 6,
195
        .br_tab    = { 2350, 1828, 1600, 1425 },
196
        .quant     = QUANT_MAT_HQ,
197
        .quant_chroma = QUANT_MAT_HQ,
198
    },
199
    {
200
        .full_name = "4444XQ",
201
        .tag       = MKTAG('a', 'p', '4', 'x'),
202
        .min_quant = 1,
203
        .max_quant = 6,
204
        .br_tab    = { 3525, 2742, 2400, 2137 },
205
        .quant     = QUANT_MAT_HQ, /* Fix me : use QUANT_MAT_XQ_LUMA */
206
        .quant_chroma = QUANT_MAT_HQ,
207
    }
208
};
209
210
1.84M
#define TRELLIS_WIDTH 16
211
34.4M
#define SCORE_LIMIT   INT_MAX / 2
212
213
struct TrellisNode {
214
    int prev_node;
215
    int quant;
216
    int bits;
217
    int score;
218
};
219
220
725k
#define MAX_STORED_Q 16
221
222
typedef struct ProresThreadData {
223
    DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
224
    DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
225
    int16_t custom_q[64];
226
    int16_t custom_chroma_q[64];
227
    struct TrellisNode *nodes;
228
} ProresThreadData;
229
230
typedef struct ProresContext {
231
    AVClass *class;
232
    DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
233
    DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
234
    int16_t quants[MAX_STORED_Q][64];
235
    int16_t quants_chroma[MAX_STORED_Q][64];
236
    int16_t custom_q[64];
237
    int16_t custom_chroma_q[64];
238
    const uint8_t *quant_mat;
239
    const uint8_t *quant_chroma_mat;
240
    const uint8_t *scantable;
241
242
    void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
243
                 ptrdiff_t linesize, int16_t *block);
244
    FDCTDSPContext fdsp;
245
246
    const AVFrame *pic;
247
    int mb_width, mb_height;
248
    int mbs_per_slice;
249
    int num_chroma_blocks, chroma_factor;
250
    int slices_width;
251
    int slices_per_picture;
252
    int pictures_per_frame; // 1 for progressive, 2 for interlaced
253
    int cur_picture_idx;
254
    int num_planes;
255
    int bits_per_mb;
256
    int force_quant;
257
    int alpha_bits;
258
    int warn;
259
260
    char *vendor;
261
    int quant_sel;
262
263
    int frame_size_upper_bound;
264
265
    int profile;
266
    const struct prores_profile *profile_info;
267
268
    int *slice_q;
269
270
    ProresThreadData *tdata;
271
} ProresContext;
272
273
static void get_slice_data(ProresContext *ctx, const uint16_t *src,
274
                           ptrdiff_t linesize, int x, int y, int w, int h,
275
                           int16_t *blocks, uint16_t *emu_buf,
276
                           int mbs_per_slice, int blocks_per_mb, int is_chroma)
277
1.38M
{
278
1.38M
    const uint16_t *esrc;
279
1.38M
    const int mb_width = 4 * blocks_per_mb;
280
1.38M
    ptrdiff_t elinesize;
281
1.38M
    int i, j, k;
282
283
5.15M
    for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
284
3.91M
        if (x >= w) {
285
135k
            memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
286
135k
                              * sizeof(*blocks));
287
135k
            return;
288
135k
        }
289
3.77M
        if (x + mb_width <= w && y + 16 <= h) {
290
359k
            esrc      = src;
291
359k
            elinesize = linesize;
292
3.41M
        } else {
293
3.41M
            int bw, bh, pix;
294
295
3.41M
            esrc      = emu_buf;
296
3.41M
            elinesize = 16 * sizeof(*emu_buf);
297
298
3.41M
            bw = FFMIN(w - x, mb_width);
299
3.41M
            bh = FFMIN(h - y, 16);
300
301
24.4M
            for (j = 0; j < bh; j++) {
302
21.0M
                memcpy(emu_buf + j * 16,
303
21.0M
                       (const uint8_t*)src + j * linesize,
304
21.0M
                       bw * sizeof(*src));
305
21.0M
                pix = emu_buf[j * 16 + bw - 1];
306
211M
                for (k = bw; k < mb_width; k++)
307
189M
                    emu_buf[j * 16 + k] = pix;
308
21.0M
            }
309
37.0M
            for (; j < 16; j++)
310
33.5M
                memcpy(emu_buf + j * 16,
311
33.5M
                       emu_buf + (bh - 1) * 16,
312
33.5M
                       mb_width * sizeof(*emu_buf));
313
3.41M
        }
314
3.77M
        if (!is_chroma) {
315
1.30M
            ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
316
1.30M
            blocks += 64;
317
1.30M
            if (blocks_per_mb > 2) {
318
1.30M
                ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
319
1.30M
                blocks += 64;
320
1.30M
            }
321
1.30M
            ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
322
1.30M
            blocks += 64;
323
1.30M
            if (blocks_per_mb > 2) {
324
1.30M
                ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
325
1.30M
                blocks += 64;
326
1.30M
            }
327
2.47M
        } else {
328
2.47M
            ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
329
2.47M
            blocks += 64;
330
2.47M
            ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
331
2.47M
            blocks += 64;
332
2.47M
            if (blocks_per_mb > 2) {
333
1.16M
                ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
334
1.16M
                blocks += 64;
335
1.16M
                ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
336
1.16M
                blocks += 64;
337
1.16M
            }
338
2.47M
        }
339
340
3.77M
        x += mb_width;
341
3.77M
    }
342
1.38M
}
343
344
static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
345
                           ptrdiff_t linesize, int x, int y, int w, int h,
346
                           uint16_t *blocks, int mbs_per_slice, int abits)
347
265k
{
348
265k
    const int slice_width = 16 * mbs_per_slice;
349
265k
    int i, j, copy_w, copy_h;
350
351
265k
    copy_w = FFMIN(w - x, slice_width);
352
265k
    copy_h = FFMIN(h - y, 16);
353
4.04M
    for (i = 0; i < copy_h; i++) {
354
3.77M
        memcpy(blocks, src, copy_w * sizeof(*src));
355
3.77M
        if (abits == 8)
356
0
            for (j = 0; j < copy_w; j++)
357
0
                blocks[j] >>= 2;
358
3.77M
        else
359
28.2M
            for (j = 0; j < copy_w; j++)
360
24.4M
                blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
361
56.2M
        for (j = copy_w; j < slice_width; j++)
362
52.4M
            blocks[j] = blocks[copy_w - 1];
363
3.77M
        blocks += slice_width;
364
3.77M
        src    += linesize >> 1;
365
3.77M
    }
366
740k
    for (; i < 16; i++) {
367
474k
        memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
368
474k
        blocks += slice_width;
369
474k
    }
370
265k
}
371
372
/**
373
 * Write an unsigned rice/exp golomb codeword.
374
 */
375
static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
376
35.4M
{
377
35.4M
    unsigned int rice_order, exp_order, switch_bits, switch_val;
378
35.4M
    int exponent;
379
380
    /* number of prefix bits to switch between Rice and expGolomb */
381
35.4M
    switch_bits = (codebook & 3) + 1;
382
35.4M
    rice_order  =  codebook >> 5;       /* rice code order */
383
35.4M
    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
384
385
35.4M
    switch_val  = switch_bits << rice_order;
386
387
35.4M
    if (val >= switch_val) {
388
15.4M
        val -= switch_val - (1 << exp_order);
389
15.4M
        exponent = av_log2(val);
390
391
15.4M
        put_bits(pb, exponent - exp_order + switch_bits, 0);
392
15.4M
        put_bits(pb, exponent + 1, val);
393
20.0M
    } else {
394
20.0M
        exponent = val >> rice_order;
395
396
20.0M
        if (exponent)
397
3.89M
            put_bits(pb, exponent, 0);
398
20.0M
        put_bits(pb, 1, 1);
399
20.0M
        if (rice_order)
400
2.40M
            put_sbits(pb, rice_order, val);
401
20.0M
    }
402
35.4M
}
403
404
142M
#define GET_SIGN(x)  ((x) >> 31)
405
67.0M
#define MAKE_CODE(x) (((x) * 2) ^ GET_SIGN(x))
406
407
static void encode_dcs(PutBitContext *pb, int16_t *blocks,
408
                       int blocks_per_slice, int scale)
409
691k
{
410
691k
    int i;
411
691k
    int codebook = 5, code, dc, prev_dc, delta, sign, new_sign;
412
413
691k
    prev_dc = (blocks[0] - 0x4000) / scale;
414
691k
    encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
415
691k
    sign     = 0;
416
691k
    blocks  += 64;
417
418
6.37M
    for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
419
5.68M
        dc       = (blocks[0] - 0x4000) / scale;
420
5.68M
        delta    = dc - prev_dc;
421
5.68M
        new_sign = GET_SIGN(delta);
422
5.68M
        delta    = (delta ^ sign) - sign;
423
5.68M
        code     = MAKE_CODE(delta);
424
5.68M
        encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
425
5.68M
        codebook = FFMIN(code, 6);
426
5.68M
        sign     = new_sign;
427
5.68M
        prev_dc  = dc;
428
5.68M
    }
429
691k
}
430
431
static void encode_acs(PutBitContext *pb, int16_t *blocks,
432
                       int blocks_per_slice,
433
                       const uint8_t *scan, const int16_t *qmat)
434
691k
{
435
691k
    int idx, i;
436
691k
    int prev_run = 4;
437
691k
    int prev_level = 2;
438
691k
    int run = 0, level;
439
691k
    int max_coeffs, abs_level;
440
441
691k
    max_coeffs = blocks_per_slice << 6;
442
443
44.2M
    for (i = 1; i < 64; i++) {
444
445M
        for (idx = scan[i]; idx < max_coeffs; idx += 64) {
445
401M
            level = blocks[idx] / qmat[scan[i]];
446
401M
            if (level) {
447
14.5M
                abs_level = FFABS(level);
448
14.5M
                encode_vlc_codeword(pb, ff_prores_run_to_cb[prev_run], run);
449
14.5M
                encode_vlc_codeword(pb, ff_prores_level_to_cb[prev_level], abs_level - 1);
450
14.5M
                put_sbits(pb, 1, GET_SIGN(level));
451
452
14.5M
                prev_run   = FFMIN(run, 15);
453
14.5M
                prev_level = FFMIN(abs_level, 9);
454
14.5M
                run        = 0;
455
387M
            } else {
456
387M
                run++;
457
387M
            }
458
401M
        }
459
43.5M
    }
460
691k
}
461
462
static void encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
463
                              const uint16_t *src, ptrdiff_t linesize,
464
                              int mbs_per_slice, int16_t *blocks,
465
                              int blocks_per_mb,
466
                              const int16_t *qmat)
467
691k
{
468
691k
    int blocks_per_slice = mbs_per_slice * blocks_per_mb;
469
470
691k
    encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
471
691k
    encode_acs(pb, blocks, blocks_per_slice, ctx->scantable, qmat);
472
691k
}
473
474
static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
475
581k
{
476
581k
    const int dbits = (abits == 8) ? 4 : 7;
477
581k
    const int dsize = 1 << dbits - 1;
478
581k
    int diff = cur - prev;
479
480
581k
    diff = av_zero_extend(diff, abits);
481
581k
    if (diff >= (1 << abits) - dsize)
482
34.2k
        diff -= 1 << abits;
483
581k
    if (diff < -dsize || diff > dsize || !diff) {
484
394k
        put_bits(pb, 1, 1);
485
394k
        put_bits(pb, abits, diff);
486
394k
    } else {
487
187k
        put_bits(pb, 1, 0);
488
187k
        put_bits(pb, dbits - 1, FFABS(diff) - 1);
489
187k
        put_bits(pb, 1, diff < 0);
490
187k
    }
491
581k
}
492
493
static void put_alpha_run(PutBitContext *pb, int run)
494
581k
{
495
581k
    if (run) {
496
280k
        put_bits(pb, 1, 0);
497
280k
        if (run < 0x10)
498
146k
            put_bits(pb, 4, run);
499
134k
        else
500
134k
            put_bits(pb, 15, run);
501
300k
    } else {
502
300k
        put_bits(pb, 1, 1);
503
300k
    }
504
581k
}
505
506
// todo alpha quantisation for high quants
507
static void encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
508
                              int mbs_per_slice, uint16_t *blocks,
509
                              int quant)
510
132k
{
511
132k
    const int abits = ctx->alpha_bits;
512
132k
    const int mask  = (1 << abits) - 1;
513
132k
    const int num_coeffs = mbs_per_slice * 256;
514
132k
    int prev = mask, cur;
515
132k
    int idx = 0;
516
132k
    int run = 0;
517
518
132k
    cur = blocks[idx++];
519
132k
    put_alpha_diff(pb, cur, prev, abits);
520
132k
    prev = cur;
521
67.1M
    do {
522
67.1M
        cur = blocks[idx++];
523
67.1M
        if (cur != prev) {
524
448k
            put_alpha_run (pb, run);
525
448k
            put_alpha_diff(pb, cur, prev, abits);
526
448k
            prev = cur;
527
448k
            run  = 0;
528
66.6M
        } else {
529
66.6M
            run++;
530
66.6M
        }
531
67.1M
    } while (idx < num_coeffs);
532
132k
    put_alpha_run(pb, run);
533
132k
}
534
535
static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
536
                        PutBitContext *pb,
537
                        int sizes[4], int x, int y, int quant,
538
                        int mbs_per_slice)
539
230k
{
540
230k
    ProresContext *ctx = avctx->priv_data;
541
230k
    int i, xp, yp;
542
230k
    int total_size = 0;
543
230k
    const uint16_t *src;
544
230k
    int num_cblocks, pwidth, line_add;
545
230k
    ptrdiff_t linesize;
546
230k
    int is_chroma;
547
230k
    uint16_t *qmat;
548
230k
    uint16_t *qmat_chroma;
549
550
230k
    if (ctx->pictures_per_frame == 1)
551
230k
        line_add = 0;
552
0
    else
553
0
        line_add = ctx->cur_picture_idx ^ !(pic->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST);
554
555
230k
    if (ctx->force_quant) {
556
0
        qmat = ctx->quants[0];
557
0
        qmat_chroma = ctx->quants_chroma[0];
558
230k
    } else if (quant < MAX_STORED_Q) {
559
223k
        qmat = ctx->quants[quant];
560
223k
        qmat_chroma = ctx->quants_chroma[quant];
561
223k
    } else {
562
6.91k
        qmat = ctx->custom_q;
563
6.91k
        qmat_chroma = ctx->custom_chroma_q;
564
449k
        for (i = 0; i < 64; i++) {
565
442k
            qmat[i] = ctx->quant_mat[i] * quant;
566
442k
            qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
567
442k
        }
568
6.91k
    }
569
570
1.05M
    for (i = 0; i < ctx->num_planes; i++) {
571
824k
        is_chroma    = (i == 1 || i == 2);
572
824k
        if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
573
642k
            xp          = x << 4;
574
642k
            yp          = y << 4;
575
642k
            num_cblocks = 4;
576
642k
            pwidth      = avctx->width;
577
642k
        } else {
578
182k
            xp          = x << 3;
579
182k
            yp          = y << 4;
580
182k
            num_cblocks = 2;
581
182k
            pwidth      = avctx->width >> 1;
582
182k
        }
583
584
824k
        linesize = pic->linesize[i] * ctx->pictures_per_frame;
585
824k
        src = (const uint16_t*)(pic->data[i] + yp * linesize +
586
824k
                                line_add * pic->linesize[i]) + xp;
587
588
824k
        if (i < 3) {
589
691k
            get_slice_data(ctx, src, linesize, xp, yp,
590
691k
                           pwidth, avctx->height / ctx->pictures_per_frame,
591
691k
                           ctx->blocks[0], ctx->emu_buf,
592
691k
                           mbs_per_slice, num_cblocks, is_chroma);
593
691k
            if (!is_chroma) {/* luma quant */
594
230k
                encode_slice_plane(ctx, pb, src, linesize,
595
230k
                                   mbs_per_slice, ctx->blocks[0],
596
230k
                                   num_cblocks, qmat);
597
461k
            } else { /* chroma plane */
598
461k
                encode_slice_plane(ctx, pb, src, linesize,
599
461k
                                   mbs_per_slice, ctx->blocks[0],
600
461k
                                   num_cblocks, qmat_chroma);
601
461k
            }
602
691k
        } else {
603
132k
            get_alpha_data(ctx, src, linesize, xp, yp,
604
132k
                           pwidth, avctx->height / ctx->pictures_per_frame,
605
132k
                           ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
606
132k
            encode_alpha_plane(ctx, pb, mbs_per_slice, ctx->blocks[0], quant);
607
132k
        }
608
824k
        flush_put_bits(pb);
609
824k
        sizes[i]   = put_bytes_output(pb) - total_size;
610
824k
        total_size = put_bytes_output(pb);
611
824k
    }
612
230k
    return total_size;
613
230k
}
614
615
static inline int estimate_vlc(unsigned codebook, int val)
616
1.62G
{
617
1.62G
    unsigned int rice_order, exp_order, switch_bits, switch_val;
618
1.62G
    int exponent;
619
620
    /* number of prefix bits to switch between Rice and expGolomb */
621
1.62G
    switch_bits = (codebook & 3) + 1;
622
1.62G
    rice_order  =  codebook >> 5;       /* rice code order */
623
1.62G
    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
624
625
1.62G
    switch_val  = switch_bits << rice_order;
626
627
1.62G
    if (val >= switch_val) {
628
753M
        val -= switch_val - (1 << exp_order);
629
753M
        exponent = av_log2(val);
630
631
753M
        return exponent * 2 - exp_order + switch_bits + 1;
632
872M
    } else {
633
872M
        return (val >> rice_order) + rice_order + 1;
634
872M
    }
635
1.62G
}
636
637
static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
638
                        int scale)
639
5.58M
{
640
5.58M
    int i;
641
5.58M
    int codebook = 5, code, dc, prev_dc, delta, sign, new_sign;
642
5.58M
    int bits;
643
644
5.58M
    prev_dc  = (blocks[0] - 0x4000) / scale;
645
5.58M
    bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
646
5.58M
    sign     = 0;
647
5.58M
    blocks  += 64;
648
5.58M
    *error  += FFABS(blocks[0] - 0x4000) % scale;
649
650
60.7M
    for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
651
55.1M
        dc       = (blocks[0] - 0x4000) / scale;
652
55.1M
        *error  += FFABS(blocks[0] - 0x4000) % scale;
653
55.1M
        delta    = dc - prev_dc;
654
55.1M
        new_sign = GET_SIGN(delta);
655
55.1M
        delta    = (delta ^ sign) - sign;
656
55.1M
        code     = MAKE_CODE(delta);
657
55.1M
        bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
658
55.1M
        codebook = FFMIN(code, 6);
659
55.1M
        sign     = new_sign;
660
55.1M
        prev_dc  = dc;
661
55.1M
    }
662
663
5.58M
    return bits;
664
5.58M
}
665
666
static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
667
                        const uint8_t *scan, const int16_t *qmat)
668
5.58M
{
669
5.58M
    int idx, i;
670
5.58M
    int prev_run = 4;
671
5.58M
    int prev_level = 2;
672
5.58M
    int run, level;
673
5.58M
    int max_coeffs, abs_level;
674
5.58M
    int bits = 0;
675
676
5.58M
    max_coeffs = blocks_per_slice << 6;
677
5.58M
    run        = 0;
678
679
357M
    for (i = 1; i < 64; i++) {
680
4.17G
        for (idx = scan[i]; idx < max_coeffs; idx += 64) {
681
3.82G
            level   = blocks[idx] / qmat[scan[i]];
682
3.82G
            *error += FFABS(blocks[idx]) % qmat[scan[i]];
683
3.82G
            if (level) {
684
782M
                abs_level = FFABS(level);
685
782M
                bits += estimate_vlc(ff_prores_run_to_cb[prev_run], run);
686
782M
                bits += estimate_vlc(ff_prores_level_to_cb[prev_level],
687
782M
                                     abs_level - 1) + 1;
688
689
782M
                prev_run   = FFMIN(run, 15);
690
782M
                prev_level = FFMIN(abs_level, 9);
691
782M
                run    = 0;
692
3.04G
            } else {
693
3.04G
                run++;
694
3.04G
            }
695
3.82G
        }
696
352M
    }
697
698
5.58M
    return bits;
699
5.58M
}
700
701
static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
702
                                const uint16_t *src, ptrdiff_t linesize,
703
                                int mbs_per_slice,
704
                                int blocks_per_mb,
705
                                const int16_t *qmat, ProresThreadData *td)
706
5.58M
{
707
5.58M
    int blocks_per_slice;
708
5.58M
    int bits;
709
710
5.58M
    blocks_per_slice = mbs_per_slice * blocks_per_mb;
711
712
5.58M
    bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
713
5.58M
    bits += estimate_acs(error, td->blocks[plane], blocks_per_slice, ctx->scantable, qmat);
714
715
5.58M
    return FFALIGN(bits, 8);
716
5.58M
}
717
718
static int est_alpha_diff(int cur, int prev, int abits)
719
581k
{
720
581k
    const int dbits = (abits == 8) ? 4 : 7;
721
581k
    const int dsize = 1 << dbits - 1;
722
581k
    int diff = cur - prev;
723
724
581k
    diff = av_zero_extend(diff, abits);
725
581k
    if (diff >= (1 << abits) - dsize)
726
34.2k
        diff -= 1 << abits;
727
581k
    if (diff < -dsize || diff > dsize || !diff)
728
394k
        return abits + 1;
729
187k
    else
730
187k
        return dbits + 1;
731
581k
}
732
733
static int estimate_alpha_plane(ProresContext *ctx,
734
                                const uint16_t *src, ptrdiff_t linesize,
735
                                int mbs_per_slice, int16_t *blocks)
736
132k
{
737
132k
    const int abits = ctx->alpha_bits;
738
132k
    const int mask  = (1 << abits) - 1;
739
132k
    const int num_coeffs = mbs_per_slice * 256;
740
132k
    int prev = mask, cur;
741
132k
    int idx = 0;
742
132k
    int run = 0;
743
132k
    int bits;
744
745
132k
    cur = blocks[idx++];
746
132k
    bits = est_alpha_diff(cur, prev, abits);
747
132k
    prev = cur;
748
67.1M
    do {
749
67.1M
        cur = blocks[idx++];
750
67.1M
        if (cur != prev) {
751
448k
            if (!run)
752
300k
                bits++;
753
148k
            else if (run < 0x10)
754
145k
                bits += 4;
755
3.46k
            else
756
3.46k
                bits += 15;
757
448k
            bits += est_alpha_diff(cur, prev, abits);
758
448k
            prev = cur;
759
448k
            run  = 0;
760
66.6M
        } else {
761
66.6M
            run++;
762
66.6M
        }
763
67.1M
    } while (idx < num_coeffs);
764
765
132k
    if (run) {
766
132k
        if (run < 0x10)
767
740
            bits += 4;
768
131k
        else
769
131k
            bits += 15;
770
132k
    }
771
772
132k
    return bits;
773
132k
}
774
775
static int find_slice_quant(AVCodecContext *avctx,
776
                            int trellis_node, int x, int y, int mbs_per_slice,
777
                            ProresThreadData *td)
778
230k
{
779
230k
    ProresContext *ctx = avctx->priv_data;
780
230k
    int i, q, pq, xp, yp;
781
230k
    const uint16_t *src;
782
230k
    int num_cblocks[MAX_PLANES], pwidth;
783
230k
    int is_chroma[MAX_PLANES];
784
230k
    const int min_quant = ctx->profile_info->min_quant;
785
230k
    const int max_quant = ctx->profile_info->max_quant;
786
230k
    int error, bits, bits_limit;
787
230k
    int mbs, prev, cur, new_score;
788
230k
    int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
789
230k
    int overquant;
790
230k
    uint16_t *qmat;
791
230k
    uint16_t *qmat_chroma;
792
230k
    int linesize[4], line_add;
793
230k
    int alpha_bits = 0;
794
795
230k
    if (ctx->pictures_per_frame == 1)
796
230k
        line_add = 0;
797
0
    else
798
0
        line_add = ctx->cur_picture_idx ^ !(ctx->pic->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST);
799
230k
    mbs = x + mbs_per_slice;
800
801
1.05M
    for (i = 0; i < ctx->num_planes; i++) {
802
824k
        is_chroma[i]    = (i == 1 || i == 2);
803
824k
        if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
804
642k
            xp             = x << 4;
805
642k
            yp             = y << 4;
806
642k
            num_cblocks[i] = 4;
807
642k
            pwidth         = avctx->width;
808
642k
        } else {
809
182k
            xp             = x << 3;
810
182k
            yp             = y << 4;
811
182k
            num_cblocks[i] = 2;
812
182k
            pwidth         = avctx->width >> 1;
813
182k
        }
814
815
824k
        linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
816
824k
        src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
817
824k
                                 line_add * ctx->pic->linesize[i]) + xp;
818
819
824k
        if (i < 3) {
820
691k
            get_slice_data(ctx, src, linesize[i], xp, yp,
821
691k
                           pwidth, avctx->height / ctx->pictures_per_frame,
822
691k
                           td->blocks[i], td->emu_buf,
823
691k
                           mbs_per_slice, num_cblocks[i], is_chroma[i]);
824
691k
        } else {
825
132k
            get_alpha_data(ctx, src, linesize[i], xp, yp,
826
132k
                           pwidth, avctx->height / ctx->pictures_per_frame,
827
132k
                           td->blocks[i], mbs_per_slice, ctx->alpha_bits);
828
132k
        }
829
824k
    }
830
831
1.84M
    for (q = min_quant; q < max_quant + 2; q++) {
832
1.61M
        td->nodes[trellis_node + q].prev_node = -1;
833
1.61M
        td->nodes[trellis_node + q].quant     = q;
834
1.61M
    }
835
836
230k
    if (ctx->alpha_bits)
837
132k
        alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
838
132k
                                          mbs_per_slice, td->blocks[3]);
839
    // todo: maybe perform coarser quantising to fit into frame size when needed
840
1.61M
    for (q = min_quant; q <= max_quant; q++) {
841
1.38M
        bits  = alpha_bits;
842
1.38M
        error = 0;
843
1.38M
        bits += estimate_slice_plane(ctx, &error, 0,
844
1.38M
                                     src, linesize[0],
845
1.38M
                                     mbs_per_slice,
846
1.38M
                                     num_cblocks[0],
847
1.38M
                                     ctx->quants[q], td); /* estimate luma plane */
848
4.15M
        for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
849
2.76M
            bits += estimate_slice_plane(ctx, &error, i,
850
2.76M
                                         src, linesize[i],
851
2.76M
                                         mbs_per_slice,
852
2.76M
                                         num_cblocks[i],
853
2.76M
                                         ctx->quants_chroma[q], td);
854
2.76M
        }
855
1.38M
        if (bits > 65000 * 8)
856
0
            error = SCORE_LIMIT;
857
858
1.38M
        slice_bits[q]  = bits;
859
1.38M
        slice_score[q] = error;
860
1.38M
    }
861
230k
    if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
862
221k
        slice_bits[max_quant + 1]  = slice_bits[max_quant];
863
221k
        slice_score[max_quant + 1] = slice_score[max_quant] + 1;
864
221k
        overquant = max_quant;
865
221k
    } else {
866
481k
        for (q = max_quant + 1; q < 128; q++) {
867
479k
            bits  = alpha_bits;
868
479k
            error = 0;
869
479k
            if (q < MAX_STORED_Q) {
870
74.1k
                qmat = ctx->quants[q];
871
74.1k
                qmat_chroma = ctx->quants_chroma[q];
872
405k
            } else {
873
405k
                qmat = td->custom_q;
874
405k
                qmat_chroma = td->custom_chroma_q;
875
26.3M
                for (i = 0; i < 64; i++) {
876
25.9M
                    qmat[i] = ctx->quant_mat[i] * q;
877
25.9M
                    qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
878
25.9M
                }
879
405k
            }
880
479k
            bits += estimate_slice_plane(ctx, &error, 0,
881
479k
                                         src, linesize[0],
882
479k
                                         mbs_per_slice,
883
479k
                                         num_cblocks[0],
884
479k
                                         qmat, td);/* estimate luma plane */
885
1.43M
            for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
886
959k
                bits += estimate_slice_plane(ctx, &error, i,
887
959k
                                             src, linesize[i],
888
959k
                                             mbs_per_slice,
889
959k
                                             num_cblocks[i],
890
959k
                                             qmat_chroma, td);
891
959k
            }
892
479k
            if (bits <= ctx->bits_per_mb * mbs_per_slice)
893
6.59k
                break;
894
479k
        }
895
896
8.85k
        slice_bits[max_quant + 1]  = bits;
897
8.85k
        slice_score[max_quant + 1] = error;
898
8.85k
        overquant = q;
899
8.85k
    }
900
230k
    td->nodes[trellis_node + max_quant + 1].quant = overquant;
901
902
230k
    bits_limit = mbs * ctx->bits_per_mb;
903
1.84M
    for (pq = min_quant; pq < max_quant + 2; pq++) {
904
1.61M
        prev = trellis_node - TRELLIS_WIDTH + pq;
905
906
12.9M
        for (q = min_quant; q < max_quant + 2; q++) {
907
11.2M
            cur = trellis_node + q;
908
909
11.2M
            bits  = td->nodes[prev].bits + slice_bits[q];
910
11.2M
            error = slice_score[q];
911
11.2M
            if (bits > bits_limit)
912
416k
                error = SCORE_LIMIT;
913
914
11.2M
            if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
915
10.8M
                new_score = td->nodes[prev].score + error;
916
426k
            else
917
426k
                new_score = SCORE_LIMIT;
918
11.2M
            if (td->nodes[cur].prev_node == -1 ||
919
11.2M
                td->nodes[cur].score >= new_score) {
920
921
9.82M
                td->nodes[cur].bits      = bits;
922
9.82M
                td->nodes[cur].score     = new_score;
923
9.82M
                td->nodes[cur].prev_node = prev;
924
9.82M
            }
925
11.2M
        }
926
1.61M
    }
927
928
230k
    error = td->nodes[trellis_node + min_quant].score;
929
230k
    pq    = trellis_node + min_quant;
930
1.61M
    for (q = min_quant + 1; q < max_quant + 2; q++) {
931
1.38M
        if (td->nodes[trellis_node + q].score <= error) {
932
478k
            error = td->nodes[trellis_node + q].score;
933
478k
            pq    = trellis_node + q;
934
478k
        }
935
1.38M
    }
936
937
230k
    return pq;
938
230k
}
939
940
static int find_quant_thread(AVCodecContext *avctx, void *arg,
941
                             int jobnr, int threadnr)
942
173k
{
943
173k
    ProresContext *ctx = avctx->priv_data;
944
173k
    ProresThreadData *td = ctx->tdata + threadnr;
945
173k
    int mbs_per_slice = ctx->mbs_per_slice;
946
173k
    int x, y = jobnr, mb, q = 0;
947
948
403k
    for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
949
737k
        while (ctx->mb_width - x < mbs_per_slice)
950
507k
            mbs_per_slice >>= 1;
951
230k
        q = find_slice_quant(avctx,
952
230k
                             (mb + 1) * TRELLIS_WIDTH, x, y,
953
230k
                             mbs_per_slice, td);
954
230k
    }
955
956
403k
    for (x = ctx->slices_width - 1; x >= 0; x--) {
957
230k
        ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
958
230k
        q = td->nodes[q].prev_node;
959
230k
    }
960
961
173k
    return 0;
962
173k
}
963
964
static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
965
                        const AVFrame *pic, int *got_packet)
966
4.19k
{
967
4.19k
    ProresContext *ctx = avctx->priv_data;
968
4.19k
    uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
969
4.19k
    uint8_t *picture_size_pos;
970
4.19k
    PutBitContext pb;
971
4.19k
    int x, y, i, mb, q = 0;
972
4.19k
    int sizes[4] = { 0 };
973
4.19k
    int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
974
4.19k
    int frame_size, picture_size, slice_size;
975
4.19k
    int pkt_size, ret;
976
4.19k
    int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
977
4.19k
    uint8_t frame_flags;
978
979
4.19k
    ctx->pic = pic;
980
4.19k
    pkt_size = ctx->frame_size_upper_bound;
981
982
4.19k
    if ((ret = ff_alloc_packet(avctx, pkt, pkt_size + FF_INPUT_BUFFER_MIN_SIZE)) < 0)
983
0
        return ret;
984
985
4.19k
    orig_buf = pkt->data;
986
987
    // frame atom
988
4.19k
    orig_buf += 4;                              // frame size
989
4.19k
    bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
990
4.19k
    buf = orig_buf;
991
992
    // frame header
993
4.19k
    tmp = buf;
994
4.19k
    buf += 2;                                   // frame header size will be stored here
995
4.19k
    bytestream_put_be16  (&buf, ctx->chroma_factor != CFACTOR_Y422 || ctx->alpha_bits ? 1 : 0);
996
4.19k
    bytestream_put_buffer(&buf, ctx->vendor, 4);
997
4.19k
    bytestream_put_be16  (&buf, avctx->width);
998
4.19k
    bytestream_put_be16  (&buf, avctx->height);
999
1000
4.19k
    frame_flags = ctx->chroma_factor << 6;
1001
4.19k
    if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
1002
0
        frame_flags |= (pic->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) ? 0x04 : 0x08;
1003
4.19k
    bytestream_put_byte  (&buf, frame_flags);
1004
1005
4.19k
    bytestream_put_byte  (&buf, 0);             // reserved
1006
4.19k
    bytestream_put_byte  (&buf, pic->color_primaries);
1007
4.19k
    bytestream_put_byte  (&buf, pic->color_trc);
1008
4.19k
    bytestream_put_byte  (&buf, pic->colorspace);
1009
4.19k
    bytestream_put_byte  (&buf, ctx->alpha_bits >> 3);
1010
4.19k
    bytestream_put_byte  (&buf, 0);             // reserved
1011
4.19k
    if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
1012
4.19k
        bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
1013
4.19k
        bytestream_put_buffer(&buf, ctx->quant_mat, 64);        // luma quantisation matrix
1014
4.19k
        bytestream_put_buffer(&buf, ctx->quant_chroma_mat, 64); // chroma quantisation matrix
1015
4.19k
    } else {
1016
0
        bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
1017
0
    }
1018
4.19k
    bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
1019
1020
4.19k
    for (ctx->cur_picture_idx = 0;
1021
8.39k
         ctx->cur_picture_idx < ctx->pictures_per_frame;
1022
4.19k
         ctx->cur_picture_idx++) {
1023
        // picture header
1024
4.19k
        picture_size_pos = buf + 1;
1025
4.19k
        bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
1026
4.19k
        buf += 4;                                   // picture data size will be stored here
1027
4.19k
        bytestream_put_be16  (&buf, ctx->slices_per_picture);
1028
4.19k
        bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1029
1030
        // seek table - will be filled during slice encoding
1031
4.19k
        slice_sizes = buf;
1032
4.19k
        buf += ctx->slices_per_picture * 2;
1033
1034
        // slices
1035
4.19k
        if (!ctx->force_quant) {
1036
4.19k
            ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1037
4.19k
                                  ctx->mb_height);
1038
4.19k
            if (ret)
1039
0
                return ret;
1040
4.19k
        }
1041
1042
177k
        for (y = 0; y < ctx->mb_height; y++) {
1043
173k
            int mbs_per_slice = ctx->mbs_per_slice;
1044
403k
            for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1045
230k
                q = ctx->force_quant ? ctx->force_quant
1046
230k
                                     : ctx->slice_q[mb + y * ctx->slices_width];
1047
1048
737k
                while (ctx->mb_width - x < mbs_per_slice)
1049
507k
                    mbs_per_slice >>= 1;
1050
1051
230k
                bytestream_put_byte(&buf, slice_hdr_size << 3);
1052
230k
                slice_hdr = buf;
1053
230k
                buf += slice_hdr_size - 1;
1054
230k
                if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1055
193
                    uint8_t *start = pkt->data;
1056
                    // Recompute new size according to max_slice_size
1057
                    // and deduce delta
1058
193
                    int delta = 200 + (ctx->pictures_per_frame *
1059
193
                                ctx->slices_per_picture + 1) *
1060
193
                                max_slice_size - pkt_size;
1061
1062
193
                    delta = FFMAX(delta, 2 * max_slice_size);
1063
193
                    ctx->frame_size_upper_bound += delta;
1064
1065
193
                    if (!ctx->warn) {
1066
137
                        avpriv_request_sample(avctx,
1067
137
                                              "Packet too small: is %i,"
1068
137
                                              " needs %i (slice: %i). "
1069
137
                                              "Correct allocation",
1070
137
                                              pkt_size, delta, max_slice_size);
1071
137
                        ctx->warn = 1;
1072
137
                    }
1073
1074
193
                    ret = av_grow_packet(pkt, delta);
1075
193
                    if (ret < 0)
1076
0
                        return ret;
1077
1078
193
                    pkt_size += delta;
1079
                    // restore pointers
1080
193
                    orig_buf         = pkt->data + (orig_buf         - start);
1081
193
                    buf              = pkt->data + (buf              - start);
1082
193
                    picture_size_pos = pkt->data + (picture_size_pos - start);
1083
193
                    slice_sizes      = pkt->data + (slice_sizes      - start);
1084
193
                    slice_hdr        = pkt->data + (slice_hdr        - start);
1085
193
                    tmp              = pkt->data + (tmp              - start);
1086
193
                }
1087
230k
                init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1088
230k
                ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1089
230k
                                   mbs_per_slice);
1090
230k
                if (ret < 0)
1091
0
                    return ret;
1092
1093
230k
                bytestream_put_byte(&slice_hdr, q);
1094
230k
                slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1095
824k
                for (i = 0; i < ctx->num_planes - 1; i++) {
1096
594k
                    bytestream_put_be16(&slice_hdr, sizes[i]);
1097
594k
                    slice_size += sizes[i];
1098
594k
                }
1099
230k
                bytestream_put_be16(&slice_sizes, slice_size);
1100
230k
                buf += slice_size - slice_hdr_size;
1101
230k
                if (max_slice_size < slice_size)
1102
668
                    max_slice_size = slice_size;
1103
230k
            }
1104
173k
        }
1105
1106
4.19k
        picture_size = buf - (picture_size_pos - 1);
1107
4.19k
        bytestream_put_be32(&picture_size_pos, picture_size);
1108
4.19k
    }
1109
1110
4.19k
    orig_buf -= 8;
1111
4.19k
    frame_size = buf - orig_buf;
1112
4.19k
    bytestream_put_be32(&orig_buf, frame_size);
1113
1114
4.19k
    pkt->size   = frame_size;
1115
4.19k
    *got_packet = 1;
1116
1117
4.19k
    return 0;
1118
4.19k
}
1119
1120
static av_cold int encode_close(AVCodecContext *avctx)
1121
940
{
1122
940
    ProresContext *ctx = avctx->priv_data;
1123
940
    int i;
1124
1125
940
    if (ctx->tdata) {
1126
1.88k
        for (i = 0; i < avctx->thread_count; i++)
1127
940
            av_freep(&ctx->tdata[i].nodes);
1128
940
    }
1129
940
    av_freep(&ctx->tdata);
1130
940
    av_freep(&ctx->slice_q);
1131
1132
940
    return 0;
1133
940
}
1134
1135
static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1136
                        ptrdiff_t linesize, int16_t *block)
1137
12.4M
{
1138
12.4M
    int x, y;
1139
12.4M
    const uint16_t *tsrc = src;
1140
1141
112M
    for (y = 0; y < 8; y++) {
1142
898M
        for (x = 0; x < 8; x++)
1143
798M
            block[y * 8 + x] = tsrc[x];
1144
99.8M
        tsrc += linesize >> 1;
1145
99.8M
    }
1146
12.4M
    fdsp->fdct(block);
1147
12.4M
}
1148
1149
static av_cold int encode_init(AVCodecContext *avctx)
1150
940
{
1151
940
    ProresContext *ctx = avctx->priv_data;
1152
940
    int mps;
1153
940
    int i, j;
1154
940
    int min_quant, max_quant;
1155
940
    int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1156
1157
940
    avctx->bits_per_raw_sample = 10;
1158
1159
940
    ctx->fdct      = prores_fdct;
1160
940
    ctx->scantable = interlaced ? ff_prores_interlaced_scan
1161
940
                                : ff_prores_progressive_scan;
1162
940
    ff_fdctdsp_init(&ctx->fdsp, avctx);
1163
1164
940
    mps = ctx->mbs_per_slice;
1165
940
    if (mps & (mps - 1)) {
1166
0
        av_log(avctx, AV_LOG_ERROR,
1167
0
               "there should be an integer power of two MBs per slice\n");
1168
0
        return AVERROR(EINVAL);
1169
0
    }
1170
940
    if (ctx->profile == PRORES_PROFILE_AUTO) {
1171
940
        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1172
940
        ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1173
940
                        !(desc->log2_chroma_w + desc->log2_chroma_h))
1174
940
                     ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1175
940
        av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1176
940
               "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1177
940
               ? "4:4:4:4 profile because of the used input colorspace"
1178
940
               : "HQ profile to keep best quality");
1179
940
    }
1180
940
    if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1181
379
        if (ctx->profile != PRORES_PROFILE_4444 &&
1182
379
            ctx->profile != PRORES_PROFILE_4444XQ) {
1183
            // force alpha and warn
1184
0
            av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1185
0
                   "encode alpha. Override with -profile if needed.\n");
1186
0
            ctx->alpha_bits = 0;
1187
0
        }
1188
379
        if (ctx->alpha_bits & 7) {
1189
0
            av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1190
0
            return AVERROR(EINVAL);
1191
0
        }
1192
379
        avctx->bits_per_coded_sample = 32;
1193
561
    } else {
1194
561
        ctx->alpha_bits = 0;
1195
561
    }
1196
1197
940
    ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1198
940
                         ? CFACTOR_Y422
1199
940
                         : CFACTOR_Y444;
1200
940
    ctx->profile_info  = prores_profile_info + ctx->profile;
1201
940
    ctx->num_planes    = 3 + !!ctx->alpha_bits;
1202
1203
940
    ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1204
1205
940
    if (interlaced)
1206
0
        ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1207
940
    else
1208
940
        ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1209
1210
940
    ctx->slices_width  = ctx->mb_width / mps;
1211
940
    ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1212
940
    ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1213
940
    ctx->pictures_per_frame = 1 + interlaced;
1214
1215
940
    if (ctx->quant_sel == -1) {
1216
940
        ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1217
940
        ctx->quant_chroma_mat = prores_quant_matrices[ctx->profile_info->quant_chroma];
1218
940
    } else {
1219
0
        ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1220
0
        ctx->quant_chroma_mat = prores_quant_matrices[ctx->quant_sel];
1221
0
    }
1222
1223
940
    if (strlen(ctx->vendor) != 4) {
1224
0
        av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1225
0
        return AVERROR_INVALIDDATA;
1226
0
    }
1227
1228
940
    ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1229
940
    if (!ctx->force_quant) {
1230
940
        if (!ctx->bits_per_mb) {
1231
1.21k
            for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1232
1.21k
                if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1233
1.21k
                                           ctx->pictures_per_frame)
1234
940
                    break;
1235
940
            ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1236
940
            if (ctx->alpha_bits)
1237
379
                ctx->bits_per_mb *= 20;
1238
940
        } else if (ctx->bits_per_mb < 128) {
1239
0
            av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1240
0
            return AVERROR_INVALIDDATA;
1241
0
        }
1242
1243
940
        min_quant = ctx->profile_info->min_quant;
1244
940
        max_quant = ctx->profile_info->max_quant;
1245
15.0k
        for (i = min_quant; i < MAX_STORED_Q; i++) {
1246
916k
            for (j = 0; j < 64; j++) {
1247
902k
                ctx->quants[i][j] = ctx->quant_mat[j] * i;
1248
902k
                ctx->quants_chroma[i][j] = ctx->quant_chroma_mat[j] * i;
1249
902k
            }
1250
14.1k
        }
1251
1252
940
        ctx->slice_q = av_malloc_array(ctx->slices_per_picture, sizeof(*ctx->slice_q));
1253
940
        if (!ctx->slice_q)
1254
0
            return AVERROR(ENOMEM);
1255
1256
940
        ctx->tdata = av_calloc(avctx->thread_count, sizeof(*ctx->tdata));
1257
940
        if (!ctx->tdata)
1258
0
            return AVERROR(ENOMEM);
1259
1260
1.88k
        for (j = 0; j < avctx->thread_count; j++) {
1261
940
            ctx->tdata[j].nodes = av_malloc_array(ctx->slices_width + 1,
1262
940
                                                  TRELLIS_WIDTH
1263
940
                                                  * sizeof(*ctx->tdata->nodes));
1264
940
            if (!ctx->tdata[j].nodes)
1265
0
                return AVERROR(ENOMEM);
1266
7.52k
            for (i = min_quant; i < max_quant + 2; i++) {
1267
6.58k
                ctx->tdata[j].nodes[i].prev_node = -1;
1268
6.58k
                ctx->tdata[j].nodes[i].bits      = 0;
1269
6.58k
                ctx->tdata[j].nodes[i].score     = 0;
1270
6.58k
            }
1271
940
        }
1272
940
    } else {
1273
0
        int ls = 0;
1274
0
        int ls_chroma = 0;
1275
1276
0
        if (ctx->force_quant > 64) {
1277
0
            av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1278
0
            return AVERROR_INVALIDDATA;
1279
0
        }
1280
1281
0
        for (j = 0; j < 64; j++) {
1282
0
            ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1283
0
            ctx->quants_chroma[0][j] = ctx->quant_chroma_mat[j] * ctx->force_quant;
1284
0
            ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1285
0
            ls_chroma += av_log2((1 << 11)  / ctx->quants_chroma[0][j]) * 2 + 1;
1286
0
        }
1287
1288
0
        ctx->bits_per_mb = ls * 4 + ls_chroma * 4;
1289
0
        if (ctx->chroma_factor == CFACTOR_Y444)
1290
0
            ctx->bits_per_mb += ls_chroma * 4;
1291
0
    }
1292
1293
940
    ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1294
940
                                   ctx->slices_per_picture + 1) *
1295
940
                                  (2 + 2 * ctx->num_planes +
1296
940
                                   (mps * ctx->bits_per_mb) / 8)
1297
940
                                  + 200;
1298
1299
940
    if (ctx->alpha_bits) {
1300
         // The alpha plane is run-coded and might exceed the bit budget.
1301
379
         ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1302
379
                                         ctx->slices_per_picture + 1) *
1303
379
         /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1304
379
         /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1305
379
    }
1306
1307
940
    avctx->codec_tag   = ctx->profile_info->tag;
1308
940
    avctx->profile = ctx->profile;
1309
1310
940
    av_log(avctx, AV_LOG_DEBUG,
1311
940
           "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1312
940
           ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1313
940
           interlaced ? "yes" : "no", ctx->bits_per_mb);
1314
940
    av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1315
940
           ctx->frame_size_upper_bound);
1316
1317
940
    return 0;
1318
940
}
1319
1320
#define OFFSET(x) offsetof(ProresContext, x)
1321
#define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1322
1323
static const AVOption options[] = {
1324
    { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1325
        AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1326
    { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1327
        { .i64 = PRORES_PROFILE_AUTO },
1328
        PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, .unit = "profile" },
1329
    { "auto",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1330
        0, 0, VE, .unit = "profile" },
1331
    { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1332
        0, 0, VE, .unit = "profile" },
1333
    { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1334
        0, 0, VE, .unit = "profile" },
1335
    { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1336
        0, 0, VE, .unit = "profile" },
1337
    { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1338
        0, 0, VE, .unit = "profile" },
1339
    { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1340
        0, 0, VE, .unit = "profile" },
1341
    { "4444xq",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1342
        0, 0, VE, .unit = "profile" },
1343
    { "vendor", "vendor ID", OFFSET(vendor),
1344
        AV_OPT_TYPE_STRING, { .str = "Lavc" }, 0, 0, VE },
1345
    { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1346
        AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1347
    { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1348
        { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, .unit = "quant_mat" },
1349
    { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1350
        0, 0, VE, .unit = "quant_mat" },
1351
    { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1352
        0, 0, VE, .unit = "quant_mat" },
1353
    { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1354
        0, 0, VE, .unit = "quant_mat" },
1355
    { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1356
        0, 0, VE, .unit = "quant_mat" },
1357
    { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1358
        0, 0, VE, .unit = "quant_mat" },
1359
    { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1360
        0, 0, VE, .unit = "quant_mat" },
1361
    { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1362
        { .i64 = 16 }, 0, 16, VE },
1363
    { NULL }
1364
};
1365
1366
static const AVClass proresenc_class = {
1367
    .class_name = "ProRes encoder",
1368
    .item_name  = av_default_item_name,
1369
    .option     = options,
1370
    .version    = LIBAVUTIL_VERSION_INT,
1371
};
1372
1373
const FFCodec ff_prores_ks_encoder = {
1374
    .p.name         = "prores_ks",
1375
    CODEC_LONG_NAME("Apple ProRes (iCodec Pro)"),
1376
    .p.type         = AVMEDIA_TYPE_VIDEO,
1377
    .p.id           = AV_CODEC_ID_PRORES,
1378
    .priv_data_size = sizeof(ProresContext),
1379
    .init           = encode_init,
1380
    .close          = encode_close,
1381
    FF_CODEC_ENCODE_CB(encode_frame),
1382
    .p.capabilities = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS |
1383
                      AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
1384
    .p.pix_fmts     = (const enum AVPixelFormat[]) {
1385
                          AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1386
                          AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1387
                      },
1388
    .p.priv_class   = &proresenc_class,
1389
    .p.profiles     = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
1390
    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
1391
};