Coverage Report

Created: 2025-12-31 07:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ffmpeg/libswscale/lut3d.c
Line
Count
Source
1
/*
2
 * Copyright (C) 2024 Niklas Haas
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
21
#include <assert.h>
22
#include <string.h>
23
24
#include "libavutil/attributes.h"
25
#include "libavutil/avassert.h"
26
#include "libavutil/mem.h"
27
28
#include "cms.h"
29
#include "csputils.h"
30
#include "lut3d.h"
31
32
SwsLut3D *ff_sws_lut3d_alloc(void)
33
0
{
34
0
    SwsLut3D *lut3d = av_malloc(sizeof(*lut3d));
35
0
    if (!lut3d)
36
0
        return NULL;
37
38
0
    lut3d->dynamic = false;
39
0
    return lut3d;
40
0
}
41
42
void ff_sws_lut3d_free(SwsLut3D **plut3d)
43
0
{
44
0
    av_freep(plut3d);
45
0
}
46
47
bool ff_sws_lut3d_test_fmt(enum AVPixelFormat fmt, int output)
48
0
{
49
0
    return fmt == AV_PIX_FMT_RGBA64;
50
0
}
51
52
enum AVPixelFormat ff_sws_lut3d_pick_pixfmt(SwsFormat fmt, int output)
53
0
{
54
0
    return AV_PIX_FMT_RGBA64;
55
0
}
56
57
/**
58
 * v0 and v1 are 'black' and 'white'
59
 * v2 and v3 are closest RGB/CMY vertices
60
 * x >= y >= z are relative weights
61
 */
62
static av_always_inline
63
v3u16_t barycentric(int shift, int x, int y, int z,
64
                    v3u16_t v0, v3u16_t v1, v3u16_t v2, v3u16_t v3)
65
0
{
66
0
    const int a = (1 << shift) - x;
67
0
    const int b = x - y;
68
0
    const int c = y - z;
69
0
    const int d = z;
70
0
    av_assert2(x >= y);
71
0
    av_assert2(y >= z);
72
73
0
    return (v3u16_t) {
74
0
        (a * v0.x + b * v1.x + c * v2.x + d * v3.x) >> shift,
75
0
        (a * v0.y + b * v1.y + c * v2.y + d * v3.y) >> shift,
76
0
        (a * v0.z + b * v1.z + c * v2.z + d * v3.z) >> shift,
77
0
    };
78
0
}
79
80
static av_always_inline
81
v3u16_t tetrahedral(const SwsLut3D *lut3d, int Rx, int Gx, int Bx,
82
                    int Rf, int Gf, int Bf)
83
0
{
84
0
    const int shift = 16 - INPUT_LUT_BITS;
85
0
    const int Rn = FFMIN(Rx + 1, INPUT_LUT_SIZE - 1);
86
0
    const int Gn = FFMIN(Gx + 1, INPUT_LUT_SIZE - 1);
87
0
    const int Bn = FFMIN(Bx + 1, INPUT_LUT_SIZE - 1);
88
89
0
    const v3u16_t c000 = lut3d->input[Bx][Gx][Rx];
90
0
    const v3u16_t c111 = lut3d->input[Bn][Gn][Rn];
91
0
    if (Rf > Gf) {
92
0
        if (Gf > Bf) {
93
0
            const v3u16_t c100 = lut3d->input[Bx][Gx][Rn];
94
0
            const v3u16_t c110 = lut3d->input[Bx][Gn][Rn];
95
0
            return barycentric(shift, Rf, Gf, Bf, c000, c100, c110, c111);
96
0
        } else if (Rf > Bf) {
97
0
            const v3u16_t c100 = lut3d->input[Bx][Gx][Rn];
98
0
            const v3u16_t c101 = lut3d->input[Bn][Gx][Rn];
99
0
            return barycentric(shift, Rf, Bf, Gf, c000, c100, c101, c111);
100
0
        } else {
101
0
            const v3u16_t c001 = lut3d->input[Bn][Gx][Rx];
102
0
            const v3u16_t c101 = lut3d->input[Bn][Gx][Rn];
103
0
            return barycentric(shift, Bf, Rf, Gf, c000, c001, c101, c111);
104
0
        }
105
0
    } else {
106
0
        if (Bf > Gf) {
107
0
            const v3u16_t c001 = lut3d->input[Bn][Gx][Rx];
108
0
            const v3u16_t c011 = lut3d->input[Bn][Gn][Rx];
109
0
            return barycentric(shift, Bf, Gf, Rf, c000, c001, c011, c111);
110
0
        } else if (Bf > Rf) {
111
0
            const v3u16_t c010 = lut3d->input[Bx][Gn][Rx];
112
0
            const v3u16_t c011 = lut3d->input[Bn][Gn][Rx];
113
0
            return barycentric(shift, Gf, Bf, Rf, c000, c010, c011, c111);
114
0
        } else {
115
0
            const v3u16_t c010 = lut3d->input[Bx][Gn][Rx];
116
0
            const v3u16_t c110 = lut3d->input[Bx][Gn][Rn];
117
0
            return barycentric(shift, Gf, Rf, Bf, c000, c010, c110, c111);
118
0
        }
119
0
    }
120
0
}
121
122
static av_always_inline v3u16_t lookup_input16(const SwsLut3D *lut3d, v3u16_t rgb)
123
0
{
124
0
    const int shift = 16 - INPUT_LUT_BITS;
125
0
    const int Rx = rgb.x >> shift;
126
0
    const int Gx = rgb.y >> shift;
127
0
    const int Bx = rgb.z >> shift;
128
0
    const int Rf = rgb.x & ((1 << shift) - 1);
129
0
    const int Gf = rgb.y & ((1 << shift) - 1);
130
0
    const int Bf = rgb.z & ((1 << shift) - 1);
131
0
    return tetrahedral(lut3d, Rx, Gx, Bx, Rf, Gf, Bf);
132
0
}
133
134
/**
135
 * Note: These functions are scaled such that x == (1 << shift) corresponds to
136
 * a value of 1.0. This makes them suitable for use when interpolation LUT
137
 * entries with a fractional part that is just masked away from the index,
138
 * since a fractional coordinate of e.g. 0xFFFF corresponds to a mix weight of
139
 * just slightly *less* than 1.0.
140
 */
141
static av_always_inline v2u16_t lerp2u16(v2u16_t a, v2u16_t b, int x, int shift)
142
0
{
143
0
    const int xi = (1 << shift) - x;
144
0
    return (v2u16_t) {
145
0
        (a.x * xi + b.x * x) >> shift,
146
0
        (a.y * xi + b.y * x) >> shift,
147
0
    };
148
0
}
149
150
static av_always_inline v3u16_t lerp3u16(v3u16_t a, v3u16_t b, int x, int shift)
151
0
{
152
0
    const int xi = (1 << shift) - x;
153
0
    return (v3u16_t) {
154
0
        (a.x * xi + b.x * x) >> shift,
155
0
        (a.y * xi + b.y * x) >> shift,
156
0
        (a.z * xi + b.z * x) >> shift,
157
0
    };
158
0
}
159
160
static av_always_inline v3u16_t lookup_output(const SwsLut3D *lut3d, v3u16_t ipt)
161
0
{
162
0
    const int Ishift = 16 - OUTPUT_LUT_BITS_I;
163
0
    const int Cshift = 16 - OUTPUT_LUT_BITS_PT;
164
0
    const int Ix = ipt.x >> Ishift;
165
0
    const int Px = ipt.y >> Cshift;
166
0
    const int Tx = ipt.z >> Cshift;
167
0
    const int If = ipt.x & ((1 << Ishift) - 1);
168
0
    const int Pf = ipt.y & ((1 << Cshift) - 1);
169
0
    const int Tf = ipt.z & ((1 << Cshift) - 1);
170
0
    const int In = FFMIN(Ix + 1, OUTPUT_LUT_SIZE_I  - 1);
171
0
    const int Pn = FFMIN(Px + 1, OUTPUT_LUT_SIZE_PT - 1);
172
0
    const int Tn = FFMIN(Tx + 1, OUTPUT_LUT_SIZE_PT - 1);
173
174
    /* Trilinear interpolation */
175
0
    const v3u16_t c000 = lut3d->output[Tx][Px][Ix];
176
0
    const v3u16_t c001 = lut3d->output[Tx][Px][In];
177
0
    const v3u16_t c010 = lut3d->output[Tx][Pn][Ix];
178
0
    const v3u16_t c011 = lut3d->output[Tx][Pn][In];
179
0
    const v3u16_t c100 = lut3d->output[Tn][Px][Ix];
180
0
    const v3u16_t c101 = lut3d->output[Tn][Px][In];
181
0
    const v3u16_t c110 = lut3d->output[Tn][Pn][Ix];
182
0
    const v3u16_t c111 = lut3d->output[Tn][Pn][In];
183
0
    const v3u16_t c00  = lerp3u16(c000, c100, Tf, Cshift);
184
0
    const v3u16_t c10  = lerp3u16(c010, c110, Tf, Cshift);
185
0
    const v3u16_t c01  = lerp3u16(c001, c101, Tf, Cshift);
186
0
    const v3u16_t c11  = lerp3u16(c011, c111, Tf, Cshift);
187
0
    const v3u16_t c0   = lerp3u16(c00,  c10,  Pf, Cshift);
188
0
    const v3u16_t c1   = lerp3u16(c01,  c11,  Pf, Cshift);
189
0
    const v3u16_t c    = lerp3u16(c0,   c1,   If, Ishift);
190
0
    return c;
191
0
}
192
193
static av_always_inline v3u16_t apply_tone_map(const SwsLut3D *lut3d, v3u16_t ipt)
194
0
{
195
0
    const int shift = 16 - TONE_LUT_BITS;
196
0
    const int Ix = ipt.x >> shift;
197
0
    const int If = ipt.x & ((1 << shift) - 1);
198
0
    const int In = FFMIN(Ix + 1, TONE_LUT_SIZE - 1);
199
200
0
    const v2u16_t w0 = lut3d->tone_map[Ix];
201
0
    const v2u16_t w1 = lut3d->tone_map[In];
202
0
    const v2u16_t w  = lerp2u16(w0, w1, If, shift);
203
0
    const int base   = (1 << 15) - w.y;
204
205
0
    ipt.x = w.x;
206
0
    ipt.y = base + (ipt.y * w.y >> 15);
207
0
    ipt.z = base + (ipt.z * w.y >> 15);
208
0
    return ipt;
209
0
}
210
211
int ff_sws_lut3d_generate(SwsLut3D *lut3d, enum AVPixelFormat fmt_in,
212
                          enum AVPixelFormat fmt_out, const SwsColorMap *map)
213
0
{
214
0
    int ret;
215
216
0
    if (!ff_sws_lut3d_test_fmt(fmt_in, 0) || !ff_sws_lut3d_test_fmt(fmt_out, 1))
217
0
        return AVERROR(EINVAL);
218
219
0
    lut3d->dynamic = map->src.frame_peak.num > 0;
220
0
    lut3d->map = *map;
221
222
0
    if (lut3d->dynamic) {
223
0
        ret = ff_sws_color_map_generate_dynamic(&lut3d->input[0][0][0],
224
0
                                             &lut3d->output[0][0][0],
225
0
                                             INPUT_LUT_SIZE, OUTPUT_LUT_SIZE_I,
226
0
                                             OUTPUT_LUT_SIZE_PT, map);
227
0
        if (ret < 0)
228
0
            return ret;
229
230
        /* Make sure initial state is valid */
231
0
        ff_sws_lut3d_update(lut3d, &map->src);
232
0
        return 0;
233
0
    } else {
234
0
        return ff_sws_color_map_generate_static(&lut3d->input[0][0][0],
235
0
                                             INPUT_LUT_SIZE, map);
236
0
    }
237
0
}
238
239
void ff_sws_lut3d_update(SwsLut3D *lut3d, const SwsColor *new_src)
240
0
{
241
0
    if (!new_src || !lut3d->dynamic)
242
0
        return;
243
244
0
    lut3d->map.src.frame_peak = new_src->frame_peak;
245
0
    lut3d->map.src.frame_avg  = new_src->frame_avg;
246
247
0
    ff_sws_tone_map_generate(lut3d->tone_map, TONE_LUT_SIZE, &lut3d->map);
248
0
}
249
250
void ff_sws_lut3d_apply(const SwsLut3D *lut3d, const uint8_t *in, int in_stride,
251
                        uint8_t *out, int out_stride, int w, int h)
252
0
{
253
0
    while (h--) {
254
0
        const uint16_t *in16 = (const uint16_t *) in;
255
0
        uint16_t *out16 = (uint16_t *) out;
256
257
0
        for (int x = 0; x < w; x++) {
258
0
            v3u16_t c = { in16[0], in16[1], in16[2] };
259
0
            c = lookup_input16(lut3d, c);
260
261
0
            if (lut3d->dynamic) {
262
0
                c = apply_tone_map(lut3d, c);
263
0
                c = lookup_output(lut3d, c);
264
0
            }
265
266
0
            out16[0] = c.x;
267
0
            out16[1] = c.y;
268
0
            out16[2] = c.z;
269
0
            out16[3] = in16[3];
270
0
            in16  += 4;
271
0
            out16 += 4;
272
0
        }
273
274
0
        in  += in_stride;
275
0
        out += out_stride;
276
0
    }
277
0
}