/src/ffmpeg/libswscale/cms.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2024 Niklas Haas |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include <math.h> |
22 | | #include <string.h> |
23 | | |
24 | | #include "libavutil/attributes.h" |
25 | | #include "libavutil/avassert.h" |
26 | | #include "libavutil/csp.h" |
27 | | #include "libavutil/slicethread.h" |
28 | | |
29 | | #include "cms.h" |
30 | | #include "csputils.h" |
31 | | #include "libswscale/swscale.h" |
32 | | #include "format.h" |
33 | | |
34 | | bool ff_sws_color_map_noop(const SwsColorMap *map) |
35 | 0 | { |
36 | | /* If the encoding space is different, we must go through a conversion */ |
37 | 0 | if (map->src.prim != map->dst.prim || map->src.trc != map->dst.trc) |
38 | 0 | return false; |
39 | | |
40 | | /* If the black point changes, we have to perform black point compensation */ |
41 | 0 | if (av_cmp_q(map->src.min_luma, map->dst.min_luma)) |
42 | 0 | return false; |
43 | | |
44 | 0 | switch (map->intent) { |
45 | 0 | case SWS_INTENT_ABSOLUTE_COLORIMETRIC: |
46 | 0 | case SWS_INTENT_RELATIVE_COLORIMETRIC: |
47 | 0 | return ff_prim_superset(&map->dst.gamut, &map->src.gamut) && |
48 | 0 | av_cmp_q(map->src.max_luma, map->dst.max_luma) <= 0; |
49 | 0 | case SWS_INTENT_PERCEPTUAL: |
50 | 0 | case SWS_INTENT_SATURATION: |
51 | 0 | return ff_prim_equal(&map->dst.gamut, &map->src.gamut) && |
52 | 0 | !av_cmp_q(map->src.max_luma, map->dst.max_luma); |
53 | 0 | default: |
54 | 0 | av_assert0(!"Invalid gamut mapping intent?"); |
55 | 0 | return true; |
56 | 0 | } |
57 | 0 | } |
58 | | |
59 | | /* Approximation of gamut hull at a given intensity level */ |
60 | | static const float hull(float I) |
61 | 0 | { |
62 | 0 | return ((I - 6.0f) * I + 9.0f) * I; |
63 | 0 | } |
64 | | |
65 | | /* For some minimal type safety, and code cleanliness */ |
66 | | typedef struct RGB { |
67 | | float R, G, B; /* nits */ |
68 | | } RGB; |
69 | | |
70 | | typedef struct IPT { |
71 | | float I, P, T; |
72 | | } IPT; |
73 | | |
74 | | typedef struct ICh { |
75 | | float I, C, h; |
76 | | } ICh; |
77 | | |
78 | | static av_always_inline ICh ipt2ich(IPT c) |
79 | 0 | { |
80 | 0 | return (ICh) { |
81 | 0 | .I = c.I, |
82 | 0 | .C = sqrtf(c.P * c.P + c.T * c.T), |
83 | 0 | .h = atan2f(c.T, c.P), |
84 | 0 | }; |
85 | 0 | } |
86 | | |
87 | | static av_always_inline IPT ich2ipt(ICh c) |
88 | 0 | { |
89 | 0 | return (IPT) { |
90 | 0 | .I = c.I, |
91 | 0 | .P = c.C * cosf(c.h), |
92 | 0 | .T = c.C * sinf(c.h), |
93 | 0 | }; |
94 | 0 | } |
95 | | |
96 | | /* Helper struct containing pre-computed cached values describing a gamut */ |
97 | | typedef struct Gamut { |
98 | | SwsMatrix3x3 encoding2lms; |
99 | | SwsMatrix3x3 lms2encoding; |
100 | | SwsMatrix3x3 lms2content; |
101 | | SwsMatrix3x3 content2lms; |
102 | | av_csp_eotf_function eotf; |
103 | | av_csp_eotf_function eotf_inv; |
104 | | float Iavg_frame; |
105 | | float Imax_frame; |
106 | | float Imin, Imax; |
107 | | float Lb, Lw; |
108 | | AVCIExy wp; |
109 | | ICh peak; /* updated as needed in loop body when hue changes */ |
110 | | } Gamut; |
111 | | |
112 | | static Gamut gamut_from_colorspace(SwsColor fmt) |
113 | 0 | { |
114 | 0 | const AVColorPrimariesDesc *encoding = av_csp_primaries_desc_from_id(fmt.prim); |
115 | 0 | const AVColorPrimariesDesc content = { |
116 | 0 | .prim = fmt.gamut, |
117 | 0 | .wp = encoding->wp, |
118 | 0 | }; |
119 | |
|
120 | 0 | const float Lw = av_q2d(fmt.max_luma), Lb = av_q2d(fmt.min_luma); |
121 | 0 | const float Imax = pq_oetf(Lw); |
122 | |
|
123 | 0 | return (Gamut) { |
124 | 0 | .encoding2lms = ff_sws_ipt_rgb2lms(encoding), |
125 | 0 | .lms2encoding = ff_sws_ipt_lms2rgb(encoding), |
126 | 0 | .lms2content = ff_sws_ipt_lms2rgb(&content), |
127 | 0 | .content2lms = ff_sws_ipt_rgb2lms(&content), |
128 | 0 | .eotf = av_csp_itu_eotf(fmt.trc), |
129 | 0 | .eotf_inv = av_csp_itu_eotf_inv(fmt.trc), |
130 | 0 | .wp = encoding->wp, |
131 | 0 | .Imin = pq_oetf(Lb), |
132 | 0 | .Imax = Imax, |
133 | 0 | .Imax_frame = fmt.frame_peak.den ? pq_oetf(av_q2d(fmt.frame_peak)) : Imax, |
134 | 0 | .Iavg_frame = fmt.frame_avg.den ? pq_oetf(av_q2d(fmt.frame_avg)) : 0.0f, |
135 | 0 | .Lb = Lb, |
136 | 0 | .Lw = Lw, |
137 | 0 | }; |
138 | 0 | } |
139 | | |
140 | | static av_always_inline IPT rgb2ipt(RGB c, const SwsMatrix3x3 rgb2lms) |
141 | 0 | { |
142 | 0 | const float L = rgb2lms.m[0][0] * c.R + |
143 | 0 | rgb2lms.m[0][1] * c.G + |
144 | 0 | rgb2lms.m[0][2] * c.B; |
145 | 0 | const float M = rgb2lms.m[1][0] * c.R + |
146 | 0 | rgb2lms.m[1][1] * c.G + |
147 | 0 | rgb2lms.m[1][2] * c.B; |
148 | 0 | const float S = rgb2lms.m[2][0] * c.R + |
149 | 0 | rgb2lms.m[2][1] * c.G + |
150 | 0 | rgb2lms.m[2][2] * c.B; |
151 | 0 | const float Lp = pq_oetf(L); |
152 | 0 | const float Mp = pq_oetf(M); |
153 | 0 | const float Sp = pq_oetf(S); |
154 | 0 | return (IPT) { |
155 | 0 | .I = 0.4000f * Lp + 0.4000f * Mp + 0.2000f * Sp, |
156 | 0 | .P = 4.4550f * Lp - 4.8510f * Mp + 0.3960f * Sp, |
157 | 0 | .T = 0.8056f * Lp + 0.3572f * Mp - 1.1628f * Sp, |
158 | 0 | }; |
159 | 0 | } |
160 | | |
161 | | static av_always_inline RGB ipt2rgb(IPT c, const SwsMatrix3x3 lms2rgb) |
162 | 0 | { |
163 | 0 | const float Lp = c.I + 0.0975689f * c.P + 0.205226f * c.T; |
164 | 0 | const float Mp = c.I - 0.1138760f * c.P + 0.133217f * c.T; |
165 | 0 | const float Sp = c.I + 0.0326151f * c.P - 0.676887f * c.T; |
166 | 0 | const float L = pq_eotf(Lp); |
167 | 0 | const float M = pq_eotf(Mp); |
168 | 0 | const float S = pq_eotf(Sp); |
169 | 0 | return (RGB) { |
170 | 0 | .R = lms2rgb.m[0][0] * L + |
171 | 0 | lms2rgb.m[0][1] * M + |
172 | 0 | lms2rgb.m[0][2] * S, |
173 | 0 | .G = lms2rgb.m[1][0] * L + |
174 | 0 | lms2rgb.m[1][1] * M + |
175 | 0 | lms2rgb.m[1][2] * S, |
176 | 0 | .B = lms2rgb.m[2][0] * L + |
177 | 0 | lms2rgb.m[2][1] * M + |
178 | 0 | lms2rgb.m[2][2] * S, |
179 | 0 | }; |
180 | 0 | } |
181 | | |
182 | | static inline bool ingamut(IPT c, Gamut gamut) |
183 | 0 | { |
184 | 0 | const float min_rgb = gamut.Lb - 1e-4f; |
185 | 0 | const float max_rgb = gamut.Lw + 1e-2f; |
186 | 0 | const float Lp = c.I + 0.0975689f * c.P + 0.205226f * c.T; |
187 | 0 | const float Mp = c.I - 0.1138760f * c.P + 0.133217f * c.T; |
188 | 0 | const float Sp = c.I + 0.0326151f * c.P - 0.676887f * c.T; |
189 | 0 | if (Lp < gamut.Imin || Lp > gamut.Imax || |
190 | 0 | Mp < gamut.Imin || Mp > gamut.Imax || |
191 | 0 | Sp < gamut.Imin || Sp > gamut.Imax) |
192 | 0 | { |
193 | | /* Values outside legal LMS range */ |
194 | 0 | return false; |
195 | 0 | } else { |
196 | 0 | const float L = pq_eotf(Lp); |
197 | 0 | const float M = pq_eotf(Mp); |
198 | 0 | const float S = pq_eotf(Sp); |
199 | 0 | RGB rgb = { |
200 | 0 | .R = gamut.lms2content.m[0][0] * L + |
201 | 0 | gamut.lms2content.m[0][1] * M + |
202 | 0 | gamut.lms2content.m[0][2] * S, |
203 | 0 | .G = gamut.lms2content.m[1][0] * L + |
204 | 0 | gamut.lms2content.m[1][1] * M + |
205 | 0 | gamut.lms2content.m[1][2] * S, |
206 | 0 | .B = gamut.lms2content.m[2][0] * L + |
207 | 0 | gamut.lms2content.m[2][1] * M + |
208 | 0 | gamut.lms2content.m[2][2] * S, |
209 | 0 | }; |
210 | 0 | return rgb.R >= min_rgb && rgb.R <= max_rgb && |
211 | 0 | rgb.G >= min_rgb && rgb.G <= max_rgb && |
212 | 0 | rgb.B >= min_rgb && rgb.B <= max_rgb; |
213 | 0 | } |
214 | 0 | } |
215 | | |
216 | | static const float maxDelta = 5e-5f; |
217 | | |
218 | | // Find gamut intersection using specified bounds |
219 | | static inline ICh |
220 | | desat_bounded(float I, float h, float Cmin, float Cmax, Gamut gamut) |
221 | 0 | { |
222 | 0 | if (I <= gamut.Imin) |
223 | 0 | return (ICh) { .I = gamut.Imin, .C = 0, .h = h }; |
224 | 0 | else if (I >= gamut.Imax) |
225 | 0 | return (ICh) { .I = gamut.Imax, .C = 0, .h = h }; |
226 | 0 | else { |
227 | 0 | const float maxDI = I * maxDelta; |
228 | 0 | ICh res = { .I = I, .C = (Cmin + Cmax) / 2, .h = h }; |
229 | 0 | do { |
230 | 0 | if (ingamut(ich2ipt(res), gamut)) { |
231 | 0 | Cmin = res.C; |
232 | 0 | } else { |
233 | 0 | Cmax = res.C; |
234 | 0 | } |
235 | 0 | res.C = (Cmin + Cmax) / 2; |
236 | 0 | } while (Cmax - Cmin > maxDI); |
237 | |
|
238 | 0 | return res; |
239 | 0 | } |
240 | 0 | } |
241 | | |
242 | | // Finds maximally saturated in-gamut color (for given hue) |
243 | | static inline ICh saturate(float hue, Gamut gamut) |
244 | 0 | { |
245 | 0 | static const float invphi = 0.6180339887498948f; |
246 | 0 | static const float invphi2 = 0.38196601125010515f; |
247 | |
|
248 | 0 | ICh lo = { .I = gamut.Imin, .h = hue }; |
249 | 0 | ICh hi = { .I = gamut.Imax, .h = hue }; |
250 | 0 | float de = hi.I - lo.I; |
251 | 0 | ICh a = { .I = lo.I + invphi2 * de }; |
252 | 0 | ICh b = { .I = lo.I + invphi * de }; |
253 | 0 | a = desat_bounded(a.I, hue, 0.0f, 0.5f, gamut); |
254 | 0 | b = desat_bounded(b.I, hue, 0.0f, 0.5f, gamut); |
255 | |
|
256 | 0 | while (de > maxDelta) { |
257 | 0 | de *= invphi; |
258 | 0 | if (a.C > b.C) { |
259 | 0 | hi = b; |
260 | 0 | b = a; |
261 | 0 | a.I = lo.I + invphi2 * de; |
262 | 0 | a = desat_bounded(a.I, hue, lo.C - maxDelta, 0.5f, gamut); |
263 | 0 | } else { |
264 | 0 | lo = a; |
265 | 0 | a = b; |
266 | 0 | b.I = lo.I + invphi * de; |
267 | 0 | b = desat_bounded(b.I, hue, hi.C - maxDelta, 0.5f, gamut); |
268 | 0 | } |
269 | 0 | } |
270 | |
|
271 | 0 | return a.C > b.C ? a : b; |
272 | 0 | } |
273 | | |
274 | | static float softclip(float value, float source, float target) |
275 | 0 | { |
276 | 0 | const float j = SOFTCLIP_KNEE; |
277 | 0 | float peak, x, a, b, scale; |
278 | 0 | if (!target) |
279 | 0 | return 0.0f; |
280 | | |
281 | 0 | peak = source / target; |
282 | 0 | x = fminf(value / target, peak); |
283 | 0 | if (x <= j || peak <= 1.0) |
284 | 0 | return value; |
285 | | |
286 | | /* Apply simple mobius function */ |
287 | 0 | a = -j*j * (peak - 1.0f) / (j*j - 2.0f * j + peak); |
288 | 0 | b = (j*j - 2.0f * j * peak + peak) / fmaxf(1e-6f, peak - 1.0f); |
289 | 0 | scale = (b*b + 2.0f * b*j + j*j) / (b - a); |
290 | |
|
291 | 0 | return scale * (x + a) / (x + b) * target; |
292 | 0 | } |
293 | | |
294 | | /** |
295 | | * Something like fmixf(base, c, x) but follows an exponential curve, note |
296 | | * that this can be used to extend 'c' outwards for x > 1 |
297 | | */ |
298 | | static inline ICh mix_exp(ICh c, float x, float gamma, float base) |
299 | 0 | { |
300 | 0 | return (ICh) { |
301 | 0 | .I = base + (c.I - base) * powf(x, gamma), |
302 | 0 | .C = c.C * x, |
303 | 0 | .h = c.h, |
304 | 0 | }; |
305 | 0 | } |
306 | | |
307 | | /** |
308 | | * Drop gamma for colors approaching black and achromatic to avoid numerical |
309 | | * instabilities, and excessive brightness boosting of grain, while also |
310 | | * strongly boosting gamma for values exceeding the target peak |
311 | | */ |
312 | | static inline float scale_gamma(float gamma, ICh ich, Gamut gamut) |
313 | 0 | { |
314 | 0 | const float Imin = gamut.Imin; |
315 | 0 | const float Irel = fmaxf((ich.I - Imin) / (gamut.peak.I - Imin), 0.0f); |
316 | 0 | return gamma * powf(Irel, 3) * fminf(ich.C / gamut.peak.C, 1.0f); |
317 | 0 | } |
318 | | |
319 | | /* Clip a color along the exponential curve given by `gamma` */ |
320 | | static inline IPT clip_gamma(IPT ipt, float gamma, Gamut gamut) |
321 | 0 | { |
322 | 0 | float lo = 0.0f, hi = 1.0f, x = 0.5f; |
323 | 0 | const float maxDI = fmaxf(ipt.I * maxDelta, 1e-7f); |
324 | 0 | ICh ich; |
325 | |
|
326 | 0 | if (ipt.I <= gamut.Imin) |
327 | 0 | return (IPT) { .I = gamut.Imin }; |
328 | 0 | if (ingamut(ipt, gamut)) |
329 | 0 | return ipt; |
330 | | |
331 | 0 | ich = ipt2ich(ipt); |
332 | 0 | if (!gamma) |
333 | 0 | return ich2ipt(desat_bounded(ich.I, ich.h, 0.0f, ich.C, gamut)); |
334 | | |
335 | 0 | gamma = scale_gamma(gamma, ich, gamut); |
336 | 0 | do { |
337 | 0 | ICh test = mix_exp(ich, x, gamma, gamut.peak.I); |
338 | 0 | if (ingamut(ich2ipt(test), gamut)) { |
339 | 0 | lo = x; |
340 | 0 | } else { |
341 | 0 | hi = x; |
342 | 0 | } |
343 | 0 | x = (lo + hi) / 2.0f; |
344 | 0 | } while (hi - lo > maxDI); |
345 | |
|
346 | 0 | return ich2ipt(mix_exp(ich, x, gamma, gamut.peak.I)); |
347 | 0 | } |
348 | | |
349 | | typedef struct CmsCtx CmsCtx; |
350 | | struct CmsCtx { |
351 | | /* Tone mapping parameters */ |
352 | | float Qa, Qb, Qc, Pa, Pb, src_knee, dst_knee; /* perceptual */ |
353 | | float I_scale, I_offset; /* linear methods */ |
354 | | |
355 | | /* Colorspace parameters */ |
356 | | Gamut src; |
357 | | Gamut tmp; /* after tone mapping */ |
358 | | Gamut dst; |
359 | | SwsMatrix3x3 adaptation; /* for absolute intent */ |
360 | | |
361 | | /* Invocation parameters */ |
362 | | SwsColorMap map; |
363 | | float (*tone_map)(const CmsCtx *ctx, float I); |
364 | | IPT (*adapt_colors)(const CmsCtx *ctx, IPT ipt); |
365 | | v3u16_t *input; |
366 | | v3u16_t *output; |
367 | | |
368 | | /* Threading parameters */ |
369 | | int slice_size; |
370 | | int size_input; |
371 | | int size_output_I; |
372 | | int size_output_PT; |
373 | | }; |
374 | | |
375 | | /** |
376 | | * Helper function to pick a knee point based on the * HDR10+ brightness |
377 | | * metadata and scene brightness average matching. |
378 | | * |
379 | | * Inspired by SMPTE ST2094-10, with some modifications |
380 | | */ |
381 | | static void st2094_pick_knee(float src_max, float src_min, float src_avg, |
382 | | float dst_max, float dst_min, |
383 | | float *out_src_knee, float *out_dst_knee) |
384 | 0 | { |
385 | 0 | const float min_knee = PERCEPTUAL_KNEE_MIN; |
386 | 0 | const float max_knee = PERCEPTUAL_KNEE_MAX; |
387 | 0 | const float def_knee = PERCEPTUAL_KNEE_DEF; |
388 | 0 | const float src_knee_min = fmixf(src_min, src_max, min_knee); |
389 | 0 | const float src_knee_max = fmixf(src_min, src_max, max_knee); |
390 | 0 | const float dst_knee_min = fmixf(dst_min, dst_max, min_knee); |
391 | 0 | const float dst_knee_max = fmixf(dst_min, dst_max, max_knee); |
392 | 0 | float src_knee, target, adapted, tuning, adaptation, dst_knee; |
393 | | |
394 | | /* Choose source knee based on dynamic source scene brightness */ |
395 | 0 | src_knee = src_avg ? src_avg : fmixf(src_min, src_max, def_knee); |
396 | 0 | src_knee = av_clipf(src_knee, src_knee_min, src_knee_max); |
397 | | |
398 | | /* Choose target adaptation point based on linearly re-scaling source knee */ |
399 | 0 | target = (src_knee - src_min) / (src_max - src_min); |
400 | 0 | adapted = fmixf(dst_min, dst_max, target); |
401 | | |
402 | | /** |
403 | | * Choose the destination knee by picking the perceptual adaptation point |
404 | | * between the source knee and the desired target. This moves the knee |
405 | | * point, on the vertical axis, closer to the 1:1 (neutral) line. |
406 | | * |
407 | | * Adjust the adaptation strength towards 1 based on how close the knee |
408 | | * point is to its extreme values (min/max knee) |
409 | | */ |
410 | 0 | tuning = smoothstepf(max_knee, def_knee, target) * |
411 | 0 | smoothstepf(min_knee, def_knee, target); |
412 | 0 | adaptation = fmixf(1.0f, PERCEPTUAL_ADAPTATION, tuning); |
413 | 0 | dst_knee = fmixf(src_knee, adapted, adaptation); |
414 | 0 | dst_knee = av_clipf(dst_knee, dst_knee_min, dst_knee_max); |
415 | |
|
416 | 0 | *out_src_knee = src_knee; |
417 | 0 | *out_dst_knee = dst_knee; |
418 | 0 | } |
419 | | |
420 | | static void tone_map_setup(CmsCtx *ctx, bool dynamic) |
421 | 0 | { |
422 | 0 | const float dst_min = ctx->dst.Imin; |
423 | 0 | const float dst_max = ctx->dst.Imax; |
424 | 0 | const float src_min = ctx->src.Imin; |
425 | 0 | const float src_max = dynamic ? ctx->src.Imax_frame : ctx->src.Imax; |
426 | 0 | const float src_avg = dynamic ? ctx->src.Iavg_frame : 0.0f; |
427 | 0 | float slope, ratio, in_min, in_max, out_min, out_max, t; |
428 | |
|
429 | 0 | switch (ctx->map.intent) { |
430 | 0 | case SWS_INTENT_PERCEPTUAL: |
431 | 0 | st2094_pick_knee(src_max, src_min, src_avg, dst_max, dst_min, |
432 | 0 | &ctx->src_knee, &ctx->dst_knee); |
433 | | |
434 | | /* Solve for linear knee (Pa = 0) */ |
435 | 0 | slope = (ctx->dst_knee - dst_min) / (ctx->src_knee - src_min); |
436 | | |
437 | | /** |
438 | | * Tune the slope at the knee point slightly: raise it to a user-provided |
439 | | * gamma exponent, multiplied by an extra tuning coefficient designed to |
440 | | * make the slope closer to 1.0 when the difference in peaks is low, and |
441 | | * closer to linear when the difference between peaks is high. |
442 | | */ |
443 | 0 | ratio = src_max / dst_max - 1.0f; |
444 | 0 | ratio = av_clipf(SLOPE_TUNING * ratio, SLOPE_OFFSET, 1.0f + SLOPE_OFFSET); |
445 | 0 | slope = powf(slope, (1.0f - PERCEPTUAL_CONTRAST) * ratio); |
446 | | |
447 | | /* Normalize everything the pivot to make the math easier */ |
448 | 0 | in_min = src_min - ctx->src_knee; |
449 | 0 | in_max = src_max - ctx->src_knee; |
450 | 0 | out_min = dst_min - ctx->dst_knee; |
451 | 0 | out_max = dst_max - ctx->dst_knee; |
452 | | |
453 | | /** |
454 | | * Solve P of order 2 for: |
455 | | * P(in_min) = out_min |
456 | | * P'(0.0) = slope |
457 | | * P(0.0) = 0.0 |
458 | | */ |
459 | 0 | ctx->Pa = (out_min - slope * in_min) / (in_min * in_min); |
460 | 0 | ctx->Pb = slope; |
461 | | |
462 | | /** |
463 | | * Solve Q of order 3 for: |
464 | | * Q(in_max) = out_max |
465 | | * Q''(in_max) = 0.0 |
466 | | * Q(0.0) = 0.0 |
467 | | * Q'(0.0) = slope |
468 | | */ |
469 | 0 | t = 2 * in_max * in_max; |
470 | 0 | ctx->Qa = (slope * in_max - out_max) / (in_max * t); |
471 | 0 | ctx->Qb = -3 * (slope * in_max - out_max) / t; |
472 | 0 | ctx->Qc = slope; |
473 | 0 | break; |
474 | 0 | case SWS_INTENT_SATURATION: |
475 | | /* Linear stretch */ |
476 | 0 | ctx->I_scale = (dst_max - dst_min) / (src_max - src_min); |
477 | 0 | ctx->I_offset = dst_min - src_min * ctx->I_scale; |
478 | 0 | break; |
479 | 0 | case SWS_INTENT_RELATIVE_COLORIMETRIC: |
480 | | /* Pure black point adaptation */ |
481 | 0 | ctx->I_scale = src_max / (src_max - src_min) / |
482 | 0 | (dst_max / (dst_max - dst_min)); |
483 | 0 | ctx->I_offset = dst_min - src_min * ctx->I_scale; |
484 | 0 | break; |
485 | 0 | case SWS_INTENT_ABSOLUTE_COLORIMETRIC: |
486 | | /* Hard clip */ |
487 | 0 | ctx->I_scale = 1.0f; |
488 | 0 | ctx->I_offset = 0.0f; |
489 | 0 | break; |
490 | 0 | } |
491 | 0 | } |
492 | | |
493 | | static av_always_inline IPT tone_map_apply(const CmsCtx *ctx, IPT ipt) |
494 | 0 | { |
495 | 0 | float I = ipt.I, desat; |
496 | |
|
497 | 0 | if (ctx->map.intent == SWS_INTENT_PERCEPTUAL) { |
498 | 0 | const float Pa = ctx->Pa, Pb = ctx->Pb; |
499 | 0 | const float Qa = ctx->Qa, Qb = ctx->Qb, Qc = ctx->Qc; |
500 | 0 | I -= ctx->src_knee; |
501 | 0 | I = I > 0 ? ((Qa * I + Qb) * I + Qc) * I : (Pa * I + Pb) * I; |
502 | 0 | I += ctx->dst_knee; |
503 | 0 | } else { |
504 | 0 | I = ctx->I_scale * I + ctx->I_offset; |
505 | 0 | } |
506 | | |
507 | | /** |
508 | | * Avoids raising saturation excessively when raising brightness, and |
509 | | * also desaturates when reducing brightness greatly to account for the |
510 | | * reduction in gamut volume. |
511 | | */ |
512 | 0 | desat = fminf(ipt.I / I, hull(I) / hull(ipt.I)); |
513 | 0 | return (IPT) { |
514 | 0 | .I = I, |
515 | 0 | .P = ipt.P * desat, |
516 | 0 | .T = ipt.T * desat, |
517 | 0 | }; |
518 | 0 | } |
519 | | |
520 | | static IPT perceptual(const CmsCtx *ctx, IPT ipt) |
521 | 0 | { |
522 | 0 | ICh ich = ipt2ich(ipt); |
523 | 0 | IPT mapped = rgb2ipt(ipt2rgb(ipt, ctx->tmp.lms2content), ctx->dst.content2lms); |
524 | 0 | RGB rgb; |
525 | 0 | float maxRGB; |
526 | | |
527 | | /* Protect in gamut region */ |
528 | 0 | const float maxC = fmaxf(ctx->tmp.peak.C, ctx->dst.peak.C); |
529 | 0 | float k = smoothstepf(PERCEPTUAL_DEADZONE, 1.0f, ich.C / maxC); |
530 | 0 | k *= PERCEPTUAL_STRENGTH; |
531 | 0 | ipt.I = fmixf(ipt.I, mapped.I, k); |
532 | 0 | ipt.P = fmixf(ipt.P, mapped.P, k); |
533 | 0 | ipt.T = fmixf(ipt.T, mapped.T, k); |
534 | |
|
535 | 0 | rgb = ipt2rgb(ipt, ctx->dst.lms2content); |
536 | 0 | maxRGB = fmaxf(rgb.R, fmaxf(rgb.G, rgb.B)); |
537 | 0 | rgb.R = fmaxf(softclip(rgb.R, maxRGB, ctx->dst.Lw), ctx->dst.Lb); |
538 | 0 | rgb.G = fmaxf(softclip(rgb.G, maxRGB, ctx->dst.Lw), ctx->dst.Lb); |
539 | 0 | rgb.B = fmaxf(softclip(rgb.B, maxRGB, ctx->dst.Lw), ctx->dst.Lb); |
540 | |
|
541 | 0 | return rgb2ipt(rgb, ctx->dst.content2lms); |
542 | 0 | } |
543 | | |
544 | | static IPT relative(const CmsCtx *ctx, IPT ipt) |
545 | 0 | { |
546 | 0 | return clip_gamma(ipt, COLORIMETRIC_GAMMA, ctx->dst); |
547 | 0 | } |
548 | | |
549 | | static IPT absolute(const CmsCtx *ctx, IPT ipt) |
550 | 0 | { |
551 | 0 | RGB rgb = ipt2rgb(ipt, ctx->dst.lms2encoding); |
552 | 0 | float c[3] = { rgb.R, rgb.G, rgb.B }; |
553 | 0 | ff_sws_matrix3x3_apply(&ctx->adaptation, c); |
554 | 0 | ipt = rgb2ipt((RGB) { c[0], c[1], c[2] }, ctx->dst.encoding2lms); |
555 | |
|
556 | 0 | return clip_gamma(ipt, COLORIMETRIC_GAMMA, ctx->dst); |
557 | 0 | } |
558 | | |
559 | | static IPT saturation(const CmsCtx * ctx, IPT ipt) |
560 | 0 | { |
561 | 0 | RGB rgb = ipt2rgb(ipt, ctx->tmp.lms2content); |
562 | 0 | return rgb2ipt(rgb, ctx->dst.content2lms); |
563 | 0 | } |
564 | | |
565 | | static av_always_inline av_const uint16_t av_round16f(float x) |
566 | 0 | { |
567 | 0 | return av_clip_uint16(x * (UINT16_MAX - 1) + 0.5f); |
568 | 0 | } |
569 | | |
570 | | /* Call this whenever the hue changes inside the loop body */ |
571 | | static av_always_inline void update_hue_peaks(CmsCtx *ctx, float P, float T) |
572 | 0 | { |
573 | 0 | const float hue = atan2f(T, P); |
574 | 0 | switch (ctx->map.intent) { |
575 | 0 | case SWS_INTENT_PERCEPTUAL: |
576 | 0 | ctx->tmp.peak = saturate(hue, ctx->tmp); |
577 | | /* fall through */ |
578 | 0 | case SWS_INTENT_RELATIVE_COLORIMETRIC: |
579 | 0 | case SWS_INTENT_ABSOLUTE_COLORIMETRIC: |
580 | 0 | ctx->dst.peak = saturate(hue, ctx->dst); |
581 | 0 | return; |
582 | 0 | default: |
583 | 0 | return; |
584 | 0 | } |
585 | 0 | } |
586 | | |
587 | | static void generate_slice(void *priv, int jobnr, int threadnr, int nb_jobs, |
588 | | int nb_threads) |
589 | 0 | { |
590 | 0 | CmsCtx ctx = *(const CmsCtx *) priv; |
591 | |
|
592 | 0 | const int slice_start = jobnr * ctx.slice_size; |
593 | 0 | const int slice_stride = ctx.size_input * ctx.size_input; |
594 | 0 | const int slice_end = FFMIN((jobnr + 1) * ctx.slice_size, ctx.size_input); |
595 | 0 | v3u16_t *input = &ctx.input[slice_start * slice_stride]; |
596 | |
|
597 | 0 | const int output_slice_h = (ctx.size_output_PT + nb_jobs - 1) / nb_jobs; |
598 | 0 | const int output_start = jobnr * output_slice_h; |
599 | 0 | const int output_stride = ctx.size_output_PT * ctx.size_output_I; |
600 | 0 | const int output_end = FFMIN((jobnr + 1) * output_slice_h, ctx.size_output_PT); |
601 | 0 | v3u16_t *output = ctx.output ? &ctx.output[output_start * output_stride] : NULL; |
602 | |
|
603 | 0 | const float I_scale = 1.0f / (ctx.src.Imax - ctx.src.Imin); |
604 | 0 | const float I_offset = -ctx.src.Imin * I_scale; |
605 | 0 | const float PT_offset = (float) (1 << 15) / (UINT16_MAX - 1); |
606 | |
|
607 | 0 | const float input_scale = 1.0f / (ctx.size_input - 1); |
608 | 0 | const float output_scale_PT = 1.0f / (ctx.size_output_PT - 1); |
609 | 0 | const float output_scale_I = (ctx.tmp.Imax - ctx.tmp.Imin) / |
610 | 0 | (ctx.size_output_I - 1); |
611 | |
|
612 | 0 | for (int Bx = slice_start; Bx < slice_end; Bx++) { |
613 | 0 | const float B = input_scale * Bx; |
614 | 0 | for (int Gx = 0; Gx < ctx.size_input; Gx++) { |
615 | 0 | const float G = input_scale * Gx; |
616 | 0 | for (int Rx = 0; Rx < ctx.size_input; Rx++) { |
617 | 0 | double c[3] = { input_scale * Rx, G, B }; |
618 | 0 | RGB rgb; |
619 | 0 | IPT ipt; |
620 | |
|
621 | 0 | ctx.src.eotf(ctx.src.Lw, ctx.src.Lb, c); |
622 | 0 | rgb = (RGB) { c[0], c[1], c[2] }; |
623 | 0 | ipt = rgb2ipt(rgb, ctx.src.encoding2lms); |
624 | |
|
625 | 0 | if (output) { |
626 | | /* Save intermediate value to 3DLUT */ |
627 | 0 | *input++ = (v3u16_t) { |
628 | 0 | av_round16f(I_scale * ipt.I + I_offset), |
629 | 0 | av_round16f(ipt.P + PT_offset), |
630 | 0 | av_round16f(ipt.T + PT_offset), |
631 | 0 | }; |
632 | 0 | } else { |
633 | 0 | update_hue_peaks(&ctx, ipt.P, ipt.T); |
634 | |
|
635 | 0 | ipt = tone_map_apply(&ctx, ipt); |
636 | 0 | ipt = ctx.adapt_colors(&ctx, ipt); |
637 | 0 | rgb = ipt2rgb(ipt, ctx.dst.lms2encoding); |
638 | |
|
639 | 0 | c[0] = rgb.R; |
640 | 0 | c[1] = rgb.G; |
641 | 0 | c[2] = rgb.B; |
642 | 0 | ctx.dst.eotf_inv(ctx.dst.Lw, ctx.dst.Lb, c); |
643 | 0 | *input++ = (v3u16_t) { |
644 | 0 | av_round16f(c[0]), |
645 | 0 | av_round16f(c[1]), |
646 | 0 | av_round16f(c[2]), |
647 | 0 | }; |
648 | 0 | } |
649 | 0 | } |
650 | 0 | } |
651 | 0 | } |
652 | |
|
653 | 0 | if (!output) |
654 | 0 | return; |
655 | | |
656 | | /* Generate split gamut mapping LUT */ |
657 | 0 | for (int Tx = output_start; Tx < output_end; Tx++) { |
658 | 0 | const float T = output_scale_PT * Tx - PT_offset; |
659 | 0 | for (int Px = 0; Px < ctx.size_output_PT; Px++) { |
660 | 0 | const float P = output_scale_PT * Px - PT_offset; |
661 | 0 | update_hue_peaks(&ctx, P, T); |
662 | |
|
663 | 0 | for (int Ix = 0; Ix < ctx.size_output_I; Ix++) { |
664 | 0 | const float I = output_scale_I * Ix + ctx.tmp.Imin; |
665 | 0 | IPT ipt = ctx.adapt_colors(&ctx, (IPT) { I, P, T }); |
666 | 0 | RGB rgb = ipt2rgb(ipt, ctx.dst.lms2encoding); |
667 | 0 | double c[3] = { rgb.R, rgb.G, rgb.B }; |
668 | 0 | ctx.dst.eotf_inv(ctx.dst.Lw, ctx.dst.Lb, c); |
669 | 0 | *output++ = (v3u16_t) { |
670 | 0 | av_round16f(c[0]), |
671 | 0 | av_round16f(c[1]), |
672 | 0 | av_round16f(c[2]), |
673 | 0 | }; |
674 | 0 | } |
675 | 0 | } |
676 | 0 | } |
677 | 0 | } |
678 | | |
679 | | int ff_sws_color_map_generate_static(v3u16_t *lut, int size, const SwsColorMap *map) |
680 | 0 | { |
681 | 0 | return ff_sws_color_map_generate_dynamic(lut, NULL, size, 1, 1, map); |
682 | 0 | } |
683 | | |
684 | | int ff_sws_color_map_generate_dynamic(v3u16_t *input, v3u16_t *output, |
685 | | int size_input, int size_I, int size_PT, |
686 | | const SwsColorMap *map) |
687 | 0 | { |
688 | 0 | AVSliceThread *slicethread; |
689 | 0 | int ret, num_slices; |
690 | |
|
691 | 0 | CmsCtx ctx = { |
692 | 0 | .map = *map, |
693 | 0 | .input = input, |
694 | 0 | .output = output, |
695 | 0 | .size_input = size_input, |
696 | 0 | .size_output_I = size_I, |
697 | 0 | .size_output_PT = size_PT, |
698 | 0 | .src = gamut_from_colorspace(map->src), |
699 | 0 | .dst = gamut_from_colorspace(map->dst), |
700 | 0 | }; |
701 | |
|
702 | 0 | switch (ctx.map.intent) { |
703 | 0 | case SWS_INTENT_PERCEPTUAL: ctx.adapt_colors = perceptual; break; |
704 | 0 | case SWS_INTENT_RELATIVE_COLORIMETRIC: ctx.adapt_colors = relative; break; |
705 | 0 | case SWS_INTENT_SATURATION: ctx.adapt_colors = saturation; break; |
706 | 0 | case SWS_INTENT_ABSOLUTE_COLORIMETRIC: ctx.adapt_colors = absolute; break; |
707 | 0 | default: return AVERROR(EINVAL); |
708 | 0 | } |
709 | | |
710 | 0 | if (!output) { |
711 | | /* Tone mapping is handled in a separate step when using dynamic TM */ |
712 | 0 | tone_map_setup(&ctx, false); |
713 | 0 | } |
714 | | |
715 | | /* Intermediate color space after tone mapping */ |
716 | 0 | ctx.tmp = ctx.src; |
717 | 0 | ctx.tmp.Lb = ctx.dst.Lb; |
718 | 0 | ctx.tmp.Lw = ctx.dst.Lw; |
719 | 0 | ctx.tmp.Imin = ctx.dst.Imin; |
720 | 0 | ctx.tmp.Imax = ctx.dst.Imax; |
721 | |
|
722 | 0 | if (ctx.map.intent == SWS_INTENT_ABSOLUTE_COLORIMETRIC) { |
723 | | /** |
724 | | * The IPT transform already implies an explicit white point adaptation |
725 | | * from src to dst, so to get absolute colorimetric semantics we have |
726 | | * to explicitly undo this adaptation with a * corresponding inverse. |
727 | | */ |
728 | 0 | ctx.adaptation = ff_sws_get_adaptation(&ctx.map.dst.gamut, |
729 | 0 | ctx.dst.wp, ctx.src.wp); |
730 | 0 | } |
731 | |
|
732 | 0 | ret = avpriv_slicethread_create(&slicethread, &ctx, generate_slice, NULL, 0); |
733 | 0 | if (ret < 0) |
734 | 0 | return ret; |
735 | | |
736 | 0 | ctx.slice_size = (ctx.size_input + ret - 1) / ret; |
737 | 0 | num_slices = (ctx.size_input + ctx.slice_size - 1) / ctx.slice_size; |
738 | 0 | avpriv_slicethread_execute(slicethread, num_slices, 0); |
739 | 0 | avpriv_slicethread_free(&slicethread); |
740 | 0 | return 0; |
741 | 0 | } |
742 | | |
743 | | void ff_sws_tone_map_generate(v2u16_t *lut, int size, const SwsColorMap *map) |
744 | 0 | { |
745 | 0 | CmsCtx ctx = { |
746 | 0 | .map = *map, |
747 | 0 | .src = gamut_from_colorspace(map->src), |
748 | 0 | .dst = gamut_from_colorspace(map->dst), |
749 | 0 | }; |
750 | |
|
751 | 0 | const float src_scale = (ctx.src.Imax - ctx.src.Imin) / (size - 1); |
752 | 0 | const float src_offset = ctx.src.Imin; |
753 | 0 | const float dst_scale = 1.0f / (ctx.dst.Imax - ctx.dst.Imin); |
754 | 0 | const float dst_offset = -ctx.dst.Imin * dst_scale; |
755 | |
|
756 | 0 | tone_map_setup(&ctx, true); |
757 | |
|
758 | 0 | for (int i = 0; i < size; i++) { |
759 | 0 | const float I = src_scale * i + src_offset; |
760 | 0 | IPT ipt = tone_map_apply(&ctx, (IPT) { I, 1.0f }); |
761 | 0 | lut[i] = (v2u16_t) { |
762 | 0 | av_round16f(dst_scale * ipt.I + dst_offset), |
763 | 0 | av_clip_uint16(ipt.P * (1 << 15) + 0.5f), |
764 | 0 | }; |
765 | 0 | } |
766 | 0 | } |