Coverage Report

Created: 2025-07-23 07:47

/src/libjxl/third_party/skcms/skcms.cc
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2018 Google Inc.
3
 *
4
 * Use of this source code is governed by a BSD-style license that can be
5
 * found in the LICENSE file.
6
 */
7
8
#include "src/skcms_public.h"  // NO_G3_REWRITE
9
#include "src/skcms_internals.h"  // NO_G3_REWRITE
10
#include "src/skcms_Transform.h"  // NO_G3_REWRITE
11
#include <assert.h>
12
#include <float.h>
13
#include <limits.h>
14
#include <stdlib.h>
15
#include <string.h>
16
17
#if defined(__ARM_NEON)
18
    #include <arm_neon.h>
19
#elif defined(__SSE__)
20
    #include <immintrin.h>
21
22
    #if defined(__clang__)
23
        // That #include <immintrin.h> is usually enough, but Clang's headers
24
        // "helpfully" skip including the whole kitchen sink when _MSC_VER is
25
        // defined, because lots of programs on Windows would include that and
26
        // it'd be a lot slower.  But we want all those headers included so we
27
        // can use their features after runtime checks later.
28
        #include <smmintrin.h>
29
        #include <avxintrin.h>
30
        #include <avx2intrin.h>
31
        #include <avx512fintrin.h>
32
        #include <avx512dqintrin.h>
33
    #endif
34
#endif
35
36
using namespace skcms_private;
37
38
static bool sAllowRuntimeCPUDetection = true;
39
40
0
void skcms_DisableRuntimeCPUDetection() {
41
0
    sAllowRuntimeCPUDetection = false;
42
0
}
43
44
2.21M
static float log2f_(float x) {
45
    // The first approximation of log2(x) is its exponent 'e', minus 127.
46
2.21M
    int32_t bits;
47
2.21M
    memcpy(&bits, &x, sizeof(bits));
48
49
2.21M
    float e = (float)bits * (1.0f / (1<<23));
50
51
    // If we use the mantissa too we can refine the error signficantly.
52
2.21M
    int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
53
2.21M
    float m;
54
2.21M
    memcpy(&m, &m_bits, sizeof(m));
55
56
2.21M
    return (e - 124.225514990f
57
2.21M
              -   1.498030302f*m
58
2.21M
              -   1.725879990f/(0.3520887068f + m));
59
2.21M
}
60
265k
static float logf_(float x) {
61
265k
    const float ln2 = 0.69314718f;
62
265k
    return ln2*log2f_(x);
63
265k
}
64
65
1.94M
static float exp2f_(float x) {
66
1.94M
    if (x > 128.0f) {
67
1.29k
        return INFINITY_;
68
1.94M
    } else if (x < -127.0f) {
69
16.5k
        return 0.0f;
70
16.5k
    }
71
1.92M
    float fract = x - floorf_(x);
72
73
1.92M
    float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
74
1.92M
                                        -   1.490129070f*fract
75
1.92M
                                        +  27.728023300f/(4.84252568f - fract));
76
77
    // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
78
    // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
79
    // Negative values are effectively underflow - we'll end up returning a (different) negative
80
    // value, which makes no sense. So clamp to zero.
81
1.92M
    if (fbits >= (float)INT_MAX) {
82
0
        return INFINITY_;
83
1.92M
    } else if (fbits < 0) {
84
77
        return 0;
85
77
    }
86
87
1.92M
    int32_t bits = (int32_t)fbits;
88
1.92M
    memcpy(&x, &bits, sizeof(x));
89
1.92M
    return x;
90
1.92M
}
91
92
// Not static, as it's used by some test tools.
93
2.11M
float powf_(float x, float y) {
94
2.11M
    if (x <= 0.f) {
95
119k
        return 0.f;
96
119k
    }
97
1.99M
    if (x == 1.f) {
98
45.7k
        return 1.f;
99
45.7k
    }
100
1.94M
    return exp2f_(log2f_(x) * y);
101
1.99M
}
102
103
0
static float expf_(float x) {
104
0
    const float log2_e = 1.4426950408889634074f;
105
0
    return exp2f_(log2_e * x);
106
0
}
107
108
3.00M
static float fmaxf_(float x, float y) { return x > y ? x : y; }
109
1.50M
static float fminf_(float x, float y) { return x < y ? x : y; }
110
111
1.61M
static bool isfinitef_(float x) { return 0 == x*0; }
112
113
1.50M
static float minus_1_ulp(float x) {
114
1.50M
    int32_t bits;
115
1.50M
    memcpy(&bits, &x, sizeof(bits));
116
1.50M
    bits = bits - 1;
117
1.50M
    memcpy(&x, &bits, sizeof(bits));
118
1.50M
    return x;
119
1.50M
}
120
121
// Most transfer functions we work with are sRGBish.
122
// For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
123
// and repurpose the other fields to hold the parameters of the HDR functions.
124
struct TF_PQish  { float A,B,C,D,E,F; };
125
struct TF_HLGish { float R,G,a,b,c,K_minus_1; };
126
// We didn't originally support a scale factor K for HLG, and instead just stored 0 in
127
// the unused `f` field of skcms_TransferFunction for HLGish and HLGInvish transfer functions.
128
// By storing f=K-1, those old unusued f=0 values now mean K=1, a noop scale factor.
129
130
0
static float TFKind_marker(skcms_TFType kind) {
131
    // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
132
0
    return -(float)kind;
133
0
}
134
135
static skcms_TFType classify(const skcms_TransferFunction& tf, TF_PQish*   pq = nullptr
136
1.47M
                                                             , TF_HLGish* hlg = nullptr) {
137
1.47M
    if (tf.g < 0) {
138
        // Negative "g" is mapped to enum values; large negative are for sure invalid.
139
890
        if (tf.g < -128) {
140
252
            return skcms_TFType_Invalid;
141
252
        }
142
638
        int enum_g = -static_cast<int>(tf.g);
143
        // Non-whole "g" values are invalid as well.
144
638
        if (static_cast<float>(-enum_g) != tf.g) {
145
595
            return skcms_TFType_Invalid;
146
595
        }
147
        // TODO: soundness checks for PQ/HLG like we do for sRGBish?
148
43
        switch (enum_g) {
149
0
            case skcms_TFType_PQish:
150
0
                if (pq) {
151
0
                    memcpy(pq , &tf.a, sizeof(*pq ));
152
0
                }
153
0
                return skcms_TFType_PQish;
154
0
            case skcms_TFType_HLGish:
155
0
                if (hlg) {
156
0
                    memcpy(hlg, &tf.a, sizeof(*hlg));
157
0
                }
158
0
                return skcms_TFType_HLGish;
159
0
            case skcms_TFType_HLGinvish:
160
0
                if (hlg) {
161
0
                    memcpy(hlg, &tf.a, sizeof(*hlg));
162
0
                }
163
0
                return skcms_TFType_HLGinvish;
164
43
        }
165
43
        return skcms_TFType_Invalid;
166
43
    }
167
168
    // Basic soundness checks for sRGBish transfer functions.
169
1.47M
    if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
170
            // a,c,d,g should be non-negative to make any sense.
171
1.47M
            && tf.a >= 0
172
1.47M
            && tf.c >= 0
173
1.47M
            && tf.d >= 0
174
1.47M
            && tf.g >= 0
175
            // Raising a negative value to a fractional tf->g produces complex numbers.
176
1.47M
            && tf.a * tf.d + tf.b >= 0) {
177
1.47M
        return skcms_TFType_sRGBish;
178
1.47M
    }
179
180
303
    return skcms_TFType_Invalid;
181
1.47M
}
182
183
0
skcms_TFType skcms_TransferFunction_getType(const skcms_TransferFunction* tf) {
184
0
    return classify(*tf);
185
0
}
186
7.14k
bool skcms_TransferFunction_isSRGBish(const skcms_TransferFunction* tf) {
187
7.14k
    return classify(*tf) == skcms_TFType_sRGBish;
188
7.14k
}
189
0
bool skcms_TransferFunction_isPQish(const skcms_TransferFunction* tf) {
190
0
    return classify(*tf) == skcms_TFType_PQish;
191
0
}
192
0
bool skcms_TransferFunction_isHLGish(const skcms_TransferFunction* tf) {
193
0
    return classify(*tf) == skcms_TFType_HLGish;
194
0
}
195
196
bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
197
                                      float A, float B, float C,
198
0
                                      float D, float E, float F) {
199
0
    *tf = { TFKind_marker(skcms_TFType_PQish), A,B,C,D,E,F };
200
0
    assert(skcms_TransferFunction_isPQish(tf));
201
0
    return true;
202
0
}
203
204
bool skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction* tf,
205
                                             float K, float R, float G,
206
0
                                             float a, float b, float c) {
207
0
    *tf = { TFKind_marker(skcms_TFType_HLGish), R,G, a,b,c, K-1.0f };
208
0
    assert(skcms_TransferFunction_isHLGish(tf));
209
0
    return true;
210
0
}
211
212
1.38M
float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
213
1.38M
    float sign = x < 0 ? -1.0f : 1.0f;
214
1.38M
    x *= sign;
215
216
1.38M
    TF_PQish  pq;
217
1.38M
    TF_HLGish hlg;
218
1.38M
    switch (classify(*tf, &pq, &hlg)) {
219
0
        case skcms_TFType_Invalid: break;
220
221
0
        case skcms_TFType_HLGish: {
222
0
            const float K = hlg.K_minus_1 + 1.0f;
223
0
            return K * sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
224
0
                                            : expf_((x-hlg.c)*hlg.a) + hlg.b);
225
0
        }
226
227
        // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
228
0
        case skcms_TFType_HLGinvish: {
229
0
            const float K = hlg.K_minus_1 + 1.0f;
230
0
            x /= K;
231
0
            return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
232
0
                                  : hlg.a * logf_(x - hlg.b) + hlg.c);
233
0
        }
234
235
1.38M
        case skcms_TFType_sRGBish:
236
1.38M
            return sign * (x < tf->d ?       tf->c * x + tf->f
237
1.38M
                                     : powf_(tf->a * x + tf->b, tf->g) + tf->e);
238
239
0
        case skcms_TFType_PQish:
240
0
            return sign *
241
0
                   powf_((pq.A + pq.B * powf_(x, pq.C)) / (pq.D + pq.E * powf_(x, pq.C)), pq.F);
242
1.38M
    }
243
0
    return 0;
244
1.38M
}
245
246
247
1.50M
static float eval_curve(const skcms_Curve* curve, float x) {
248
1.50M
    if (curve->table_entries == 0) {
249
0
        return skcms_TransferFunction_eval(&curve->parametric, x);
250
0
    }
251
252
1.50M
    float ix = fmaxf_(0, fminf_(x, 1)) * static_cast<float>(curve->table_entries - 1);
253
1.50M
    int   lo = (int)                   ix        ,
254
1.50M
          hi = (int)(float)minus_1_ulp(ix + 1.0f);
255
1.50M
    float t = ix - (float)lo;
256
257
1.50M
    float l, h;
258
1.50M
    if (curve->table_8) {
259
3.07k
        l = curve->table_8[lo] * (1/255.0f);
260
3.07k
        h = curve->table_8[hi] * (1/255.0f);
261
1.49M
    } else {
262
1.49M
        uint16_t be_l, be_h;
263
1.49M
        memcpy(&be_l, curve->table_16 + 2*lo, 2);
264
1.49M
        memcpy(&be_h, curve->table_16 + 2*hi, 2);
265
1.49M
        uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
266
1.49M
        uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
267
1.49M
        l = le_l * (1/65535.0f);
268
1.49M
        h = le_h * (1/65535.0f);
269
1.49M
    }
270
1.50M
    return l + (h-l)*t;
271
1.50M
}
272
273
5.30k
float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
274
5.30k
    uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
275
5.30k
    const float dx = 1.0f / static_cast<float>(N - 1);
276
5.30k
    float err = 0;
277
1.36M
    for (uint32_t i = 0; i < N; i++) {
278
1.35M
        float x = static_cast<float>(i) * dx,
279
1.35M
              y = eval_curve(curve, x);
280
1.35M
        err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
281
1.35M
    }
282
5.30k
    return err;
283
5.30k
}
284
285
0
bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
286
0
    return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
287
0
}
288
289
// Additional ICC signature values that are only used internally
290
enum {
291
    // File signature
292
    skcms_Signature_acsp = 0x61637370,
293
294
    // Tag signatures
295
    skcms_Signature_rTRC = 0x72545243,
296
    skcms_Signature_gTRC = 0x67545243,
297
    skcms_Signature_bTRC = 0x62545243,
298
    skcms_Signature_kTRC = 0x6B545243,
299
300
    skcms_Signature_rXYZ = 0x7258595A,
301
    skcms_Signature_gXYZ = 0x6758595A,
302
    skcms_Signature_bXYZ = 0x6258595A,
303
304
    skcms_Signature_A2B0 = 0x41324230,
305
    skcms_Signature_B2A0 = 0x42324130,
306
307
    skcms_Signature_CHAD = 0x63686164,
308
    skcms_Signature_WTPT = 0x77747074,
309
310
    skcms_Signature_CICP = 0x63696370,
311
312
    // Type signatures
313
    skcms_Signature_curv = 0x63757276,
314
    skcms_Signature_mft1 = 0x6D667431,
315
    skcms_Signature_mft2 = 0x6D667432,
316
    skcms_Signature_mAB  = 0x6D414220,
317
    skcms_Signature_mBA  = 0x6D424120,
318
    skcms_Signature_para = 0x70617261,
319
    skcms_Signature_sf32 = 0x73663332,
320
    // XYZ is also a PCS signature, so it's defined in skcms.h
321
    // skcms_Signature_XYZ = 0x58595A20,
322
};
323
324
7.15k
static uint16_t read_big_u16(const uint8_t* ptr) {
325
7.15k
    uint16_t be;
326
7.15k
    memcpy(&be, ptr, sizeof(be));
327
#if defined(_MSC_VER)
328
    return _byteswap_ushort(be);
329
#else
330
7.15k
    return __builtin_bswap16(be);
331
7.15k
#endif
332
7.15k
}
333
334
511k
static uint32_t read_big_u32(const uint8_t* ptr) {
335
511k
    uint32_t be;
336
511k
    memcpy(&be, ptr, sizeof(be));
337
#if defined(_MSC_VER)
338
    return _byteswap_ulong(be);
339
#else
340
511k
    return __builtin_bswap32(be);
341
511k
#endif
342
511k
}
343
344
71.8k
static int32_t read_big_i32(const uint8_t* ptr) {
345
71.8k
    return (int32_t)read_big_u32(ptr);
346
71.8k
}
347
348
71.8k
static float read_big_fixed(const uint8_t* ptr) {
349
71.8k
    return static_cast<float>(read_big_i32(ptr)) * (1.0f / 65536.0f);
350
71.8k
}
351
352
// Maps to an in-memory profile so that fields line up to the locations specified
353
// in ICC.1:2010, section 7.2
354
typedef struct {
355
    uint8_t size                [ 4];
356
    uint8_t cmm_type            [ 4];
357
    uint8_t version             [ 4];
358
    uint8_t profile_class       [ 4];
359
    uint8_t data_color_space    [ 4];
360
    uint8_t pcs                 [ 4];
361
    uint8_t creation_date_time  [12];
362
    uint8_t signature           [ 4];
363
    uint8_t platform            [ 4];
364
    uint8_t flags               [ 4];
365
    uint8_t device_manufacturer [ 4];
366
    uint8_t device_model        [ 4];
367
    uint8_t device_attributes   [ 8];
368
    uint8_t rendering_intent    [ 4];
369
    uint8_t illuminant_X        [ 4];
370
    uint8_t illuminant_Y        [ 4];
371
    uint8_t illuminant_Z        [ 4];
372
    uint8_t creator             [ 4];
373
    uint8_t profile_id          [16];
374
    uint8_t reserved            [28];
375
    uint8_t tag_count           [ 4]; // Technically not part of header, but required
376
} header_Layout;
377
378
typedef struct {
379
    uint8_t signature [4];
380
    uint8_t offset    [4];
381
    uint8_t size      [4];
382
} tag_Layout;
383
384
36.5k
static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
385
36.5k
    return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
386
36.5k
}
387
388
// s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
389
// use of the type is for the CHAD tag that stores exactly nine values.
390
typedef struct {
391
    uint8_t type     [ 4];
392
    uint8_t reserved [ 4];
393
    uint8_t values   [36];
394
} sf32_Layout;
395
396
810
bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
397
810
    skcms_ICCTag tag;
398
810
    if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
399
587
        return false;
400
587
    }
401
402
223
    if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
403
9
        return false;
404
9
    }
405
406
214
    const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
407
214
    const uint8_t* values = sf32Tag->values;
408
856
    for (int r = 0; r < 3; ++r)
409
2.56k
    for (int c = 0; c < 3; ++c, values += 4) {
410
1.92k
        m->vals[r][c] = read_big_fixed(values);
411
1.92k
    }
412
214
    return true;
413
223
}
414
415
// XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
416
// the type are for tags/data that store exactly one triple.
417
typedef struct {
418
    uint8_t type     [4];
419
    uint8_t reserved [4];
420
    uint8_t X        [4];
421
    uint8_t Y        [4];
422
    uint8_t Z        [4];
423
} XYZ_Layout;
424
425
8.92k
static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
426
8.92k
    if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
427
4
        return false;
428
4
    }
429
430
8.91k
    const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
431
432
8.91k
    *x = read_big_fixed(xyzTag->X);
433
8.91k
    *y = read_big_fixed(xyzTag->Y);
434
8.91k
    *z = read_big_fixed(xyzTag->Z);
435
8.91k
    return true;
436
8.92k
}
437
438
443
bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
439
443
    skcms_ICCTag tag;
440
443
    return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
441
443
           read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
442
443
}
443
444
0
static int data_color_space_channel_count(uint32_t data_color_space) {
445
0
    switch (data_color_space) {
446
0
        case skcms_Signature_CMYK:   return 4;
447
0
        case skcms_Signature_Gray:   return 1;
448
0
        case skcms_Signature_RGB:    return 3;
449
0
        case skcms_Signature_Lab:    return 3;
450
0
        case skcms_Signature_XYZ:    return 3;
451
0
        case skcms_Signature_CIELUV: return 3;
452
0
        case skcms_Signature_YCbCr:  return 3;
453
0
        case skcms_Signature_CIEYxy: return 3;
454
0
        case skcms_Signature_HSV:    return 3;
455
0
        case skcms_Signature_HLS:    return 3;
456
0
        case skcms_Signature_CMY:    return 3;
457
0
        case skcms_Signature_2CLR:   return 2;
458
0
        case skcms_Signature_3CLR:   return 3;
459
0
        case skcms_Signature_4CLR:   return 4;
460
0
        case skcms_Signature_5CLR:   return 5;
461
0
        case skcms_Signature_6CLR:   return 6;
462
0
        case skcms_Signature_7CLR:   return 7;
463
0
        case skcms_Signature_8CLR:   return 8;
464
0
        case skcms_Signature_9CLR:   return 9;
465
0
        case skcms_Signature_10CLR:  return 10;
466
0
        case skcms_Signature_11CLR:  return 11;
467
0
        case skcms_Signature_12CLR:  return 12;
468
0
        case skcms_Signature_13CLR:  return 13;
469
0
        case skcms_Signature_14CLR:  return 14;
470
0
        case skcms_Signature_15CLR:  return 15;
471
0
        default:                     return -1;
472
0
    }
473
0
}
474
475
0
int skcms_GetInputChannelCount(const skcms_ICCProfile* profile) {
476
0
    int a2b_count = 0;
477
0
    if (profile->has_A2B) {
478
0
        a2b_count = profile->A2B.input_channels != 0
479
0
                        ? static_cast<int>(profile->A2B.input_channels)
480
0
                        : 3;
481
0
    }
482
483
0
    skcms_ICCTag tag;
484
0
    int trc_count = 0;
485
0
    if (skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &tag)) {
486
0
        trc_count = 1;
487
0
    } else if (profile->has_trc) {
488
0
        trc_count = 3;
489
0
    }
490
491
0
    int dcs_count = data_color_space_channel_count(profile->data_color_space);
492
493
0
    if (dcs_count < 0) {
494
0
        return -1;
495
0
    }
496
497
0
    if (a2b_count > 0 && a2b_count != dcs_count) {
498
0
        return -1;
499
0
    }
500
0
    if (trc_count > 0 && trc_count != dcs_count) {
501
0
        return -1;
502
0
    }
503
504
0
    return dcs_count;
505
0
}
506
507
static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
508
2.82k
                           const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
509
2.82k
    return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
510
2.82k
           read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
511
2.82k
           read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
512
2.82k
}
513
514
typedef struct {
515
    uint8_t type          [4];
516
    uint8_t reserved_a    [4];
517
    uint8_t function_type [2];
518
    uint8_t reserved_b    [2];
519
    uint8_t variable      [1/*variable*/];  // 1, 3, 4, 5, or 7 s15.16, depending on function_type
520
} para_Layout;
521
522
static bool read_curve_para(const uint8_t* buf, uint32_t size,
523
7.15k
                            skcms_Curve* curve, uint32_t* curve_size) {
524
7.15k
    if (size < SAFE_FIXED_SIZE(para_Layout)) {
525
1
        return false;
526
1
    }
527
528
7.14k
    const para_Layout* paraTag = (const para_Layout*)buf;
529
530
7.14k
    enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
531
7.14k
    uint16_t function_type = read_big_u16(paraTag->function_type);
532
7.14k
    if (function_type > kGABCDEF) {
533
1
        return false;
534
1
    }
535
536
7.14k
    static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
537
7.14k
    if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
538
1
        return false;
539
1
    }
540
541
7.14k
    if (curve_size) {
542
162
        *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
543
162
    }
544
545
7.14k
    curve->table_entries = 0;
546
7.14k
    curve->parametric.a  = 1.0f;
547
7.14k
    curve->parametric.b  = 0.0f;
548
7.14k
    curve->parametric.c  = 0.0f;
549
7.14k
    curve->parametric.d  = 0.0f;
550
7.14k
    curve->parametric.e  = 0.0f;
551
7.14k
    curve->parametric.f  = 0.0f;
552
7.14k
    curve->parametric.g  = read_big_fixed(paraTag->variable);
553
554
7.14k
    switch (function_type) {
555
7
        case kGAB:
556
7
            curve->parametric.a = read_big_fixed(paraTag->variable + 4);
557
7
            curve->parametric.b = read_big_fixed(paraTag->variable + 8);
558
7
            if (curve->parametric.a == 0) {
559
1
                return false;
560
1
            }
561
6
            curve->parametric.d = -curve->parametric.b / curve->parametric.a;
562
6
            break;
563
16
        case kGABC:
564
16
            curve->parametric.a = read_big_fixed(paraTag->variable + 4);
565
16
            curve->parametric.b = read_big_fixed(paraTag->variable + 8);
566
16
            curve->parametric.e = read_big_fixed(paraTag->variable + 12);
567
16
            if (curve->parametric.a == 0) {
568
1
                return false;
569
1
            }
570
15
            curve->parametric.d = -curve->parametric.b / curve->parametric.a;
571
15
            curve->parametric.f = curve->parametric.e;
572
15
            break;
573
6.64k
        case kGABCD:
574
6.64k
            curve->parametric.a = read_big_fixed(paraTag->variable + 4);
575
6.64k
            curve->parametric.b = read_big_fixed(paraTag->variable + 8);
576
6.64k
            curve->parametric.c = read_big_fixed(paraTag->variable + 12);
577
6.64k
            curve->parametric.d = read_big_fixed(paraTag->variable + 16);
578
6.64k
            break;
579
6
        case kGABCDEF:
580
6
            curve->parametric.a = read_big_fixed(paraTag->variable + 4);
581
6
            curve->parametric.b = read_big_fixed(paraTag->variable + 8);
582
6
            curve->parametric.c = read_big_fixed(paraTag->variable + 12);
583
6
            curve->parametric.d = read_big_fixed(paraTag->variable + 16);
584
6
            curve->parametric.e = read_big_fixed(paraTag->variable + 20);
585
6
            curve->parametric.f = read_big_fixed(paraTag->variable + 24);
586
6
            break;
587
7.14k
    }
588
7.14k
    return skcms_TransferFunction_isSRGBish(&curve->parametric);
589
7.14k
}
590
591
typedef struct {
592
    uint8_t type          [4];
593
    uint8_t reserved      [4];
594
    uint8_t value_count   [4];
595
    uint8_t variable      [1/*variable*/];  // value_count, 8.8 if 1, uint16 (n*65535) if > 1
596
} curv_Layout;
597
598
static bool read_curve_curv(const uint8_t* buf, uint32_t size,
599
1.67k
                            skcms_Curve* curve, uint32_t* curve_size) {
600
1.67k
    if (size < SAFE_FIXED_SIZE(curv_Layout)) {
601
1
        return false;
602
1
    }
603
604
1.67k
    const curv_Layout* curvTag = (const curv_Layout*)buf;
605
606
1.67k
    uint32_t value_count = read_big_u32(curvTag->value_count);
607
1.67k
    if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
608
2
        return false;
609
2
    }
610
611
1.67k
    if (curve_size) {
612
0
        *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
613
0
    }
614
615
1.67k
    if (value_count < 2) {
616
186
        curve->table_entries = 0;
617
186
        curve->parametric.a  = 1.0f;
618
186
        curve->parametric.b  = 0.0f;
619
186
        curve->parametric.c  = 0.0f;
620
186
        curve->parametric.d  = 0.0f;
621
186
        curve->parametric.e  = 0.0f;
622
186
        curve->parametric.f  = 0.0f;
623
186
        if (value_count == 0) {
624
            // Empty tables are a shorthand for an identity curve
625
180
            curve->parametric.g = 1.0f;
626
180
        } else {
627
            // Single entry tables are a shorthand for simple gamma
628
6
            curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
629
6
        }
630
1.48k
    } else {
631
1.48k
        curve->table_8       = nullptr;
632
1.48k
        curve->table_16      = curvTag->variable;
633
1.48k
        curve->table_entries = value_count;
634
1.48k
    }
635
636
1.67k
    return true;
637
1.67k
}
638
639
// Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
640
// If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
641
static bool read_curve(const uint8_t* buf, uint32_t size,
642
8.82k
                       skcms_Curve* curve, uint32_t* curve_size) {
643
8.82k
    if (!buf || size < 4 || !curve) {
644
0
        return false;
645
0
    }
646
647
8.82k
    uint32_t type = read_big_u32(buf);
648
8.82k
    if (type == skcms_Signature_para) {
649
7.15k
        return read_curve_para(buf, size, curve, curve_size);
650
7.15k
    } else if (type == skcms_Signature_curv) {
651
1.67k
        return read_curve_curv(buf, size, curve, curve_size);
652
1.67k
    }
653
654
5
    return false;
655
8.82k
}
656
657
// mft1 and mft2 share a large chunk of data
658
typedef struct {
659
    uint8_t type                 [ 4];
660
    uint8_t reserved_a           [ 4];
661
    uint8_t input_channels       [ 1];
662
    uint8_t output_channels      [ 1];
663
    uint8_t grid_points          [ 1];
664
    uint8_t reserved_b           [ 1];
665
    uint8_t matrix               [36];
666
} mft_CommonLayout;
667
668
typedef struct {
669
    mft_CommonLayout common      [1];
670
671
    uint8_t variable             [1/*variable*/];
672
} mft1_Layout;
673
674
typedef struct {
675
    mft_CommonLayout common      [1];
676
677
    uint8_t input_table_entries  [2];
678
    uint8_t output_table_entries [2];
679
    uint8_t variable             [1/*variable*/];
680
} mft2_Layout;
681
682
2
static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
683
    // MFT matrices are applied before the first set of curves, but must be identity unless the
684
    // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
685
    // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
686
    // field/flag.
687
2
    a2b->matrix_channels = 0;
688
2
    a2b-> input_channels = mftTag-> input_channels[0];
689
2
    a2b->output_channels = mftTag->output_channels[0];
690
691
    // We require exactly three (ie XYZ/Lab/RGB) output channels
692
2
    if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
693
0
        return false;
694
0
    }
695
    // We require at least one, and no more than four (ie CMYK) input channels
696
2
    if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
697
0
        return false;
698
0
    }
699
700
8
    for (uint32_t i = 0; i < a2b->input_channels; ++i) {
701
6
        a2b->grid_points[i] = mftTag->grid_points[0];
702
6
    }
703
    // The grid only makes sense with at least two points along each axis
704
2
    if (a2b->grid_points[0] < 2) {
705
0
        return false;
706
0
    }
707
2
    return true;
708
2
}
709
710
// All as the A2B version above, except where noted.
711
0
static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_B2A* b2a) {
712
    // Same as A2B.
713
0
    b2a->matrix_channels = 0;
714
0
    b2a-> input_channels = mftTag-> input_channels[0];
715
0
    b2a->output_channels = mftTag->output_channels[0];
716
717
718
    // For B2A, exactly 3 input channels (XYZ) and 3 (RGB) or 4 (CMYK) output channels.
719
0
    if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
720
0
        return false;
721
0
    }
722
0
    if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
723
0
        return false;
724
0
    }
725
726
    // Same as A2B.
727
0
    for (uint32_t i = 0; i < b2a->input_channels; ++i) {
728
0
        b2a->grid_points[i] = mftTag->grid_points[0];
729
0
    }
730
0
    if (b2a->grid_points[0] < 2) {
731
0
        return false;
732
0
    }
733
0
    return true;
734
0
}
735
736
template <typename A2B_or_B2A>
737
static bool init_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
738
                        uint32_t input_table_entries, uint32_t output_table_entries,
739
2
                        A2B_or_B2A* out) {
740
    // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
741
2
    uint32_t byte_len_per_input_table  = input_table_entries * byte_width;
742
2
    uint32_t byte_len_per_output_table = output_table_entries * byte_width;
743
744
    // [input|output]_channels are <= 4, so still no overflow
745
2
    uint32_t byte_len_all_input_tables  = out->input_channels * byte_len_per_input_table;
746
2
    uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
747
748
2
    uint64_t grid_size = out->output_channels * byte_width;
749
8
    for (uint32_t axis = 0; axis < out->input_channels; ++axis) {
750
6
        grid_size *= out->grid_points[axis];
751
6
    }
752
753
2
    if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
754
0
        return false;
755
0
    }
756
757
8
    for (uint32_t i = 0; i < out->input_channels; ++i) {
758
6
        out->input_curves[i].table_entries = input_table_entries;
759
6
        if (byte_width == 1) {
760
6
            out->input_curves[i].table_8  = table_base + i * byte_len_per_input_table;
761
6
            out->input_curves[i].table_16 = nullptr;
762
6
        } else {
763
0
            out->input_curves[i].table_8  = nullptr;
764
0
            out->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
765
0
        }
766
6
    }
767
768
2
    if (byte_width == 1) {
769
2
        out->grid_8  = table_base + byte_len_all_input_tables;
770
2
        out->grid_16 = nullptr;
771
2
    } else {
772
0
        out->grid_8  = nullptr;
773
0
        out->grid_16 = table_base + byte_len_all_input_tables;
774
0
    }
775
776
2
    const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
777
8
    for (uint32_t i = 0; i < out->output_channels; ++i) {
778
6
        out->output_curves[i].table_entries = output_table_entries;
779
6
        if (byte_width == 1) {
780
6
            out->output_curves[i].table_8  = output_table_base + i * byte_len_per_output_table;
781
6
            out->output_curves[i].table_16 = nullptr;
782
6
        } else {
783
0
            out->output_curves[i].table_8  = nullptr;
784
0
            out->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
785
0
        }
786
6
    }
787
788
2
    return true;
789
2
}
skcms.cc:bool init_tables<skcms_A2B>(unsigned char const*, unsigned long, unsigned int, unsigned int, unsigned int, skcms_A2B*)
Line
Count
Source
739
2
                        A2B_or_B2A* out) {
740
    // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
741
2
    uint32_t byte_len_per_input_table  = input_table_entries * byte_width;
742
2
    uint32_t byte_len_per_output_table = output_table_entries * byte_width;
743
744
    // [input|output]_channels are <= 4, so still no overflow
745
2
    uint32_t byte_len_all_input_tables  = out->input_channels * byte_len_per_input_table;
746
2
    uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
747
748
2
    uint64_t grid_size = out->output_channels * byte_width;
749
8
    for (uint32_t axis = 0; axis < out->input_channels; ++axis) {
750
6
        grid_size *= out->grid_points[axis];
751
6
    }
752
753
2
    if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
754
0
        return false;
755
0
    }
756
757
8
    for (uint32_t i = 0; i < out->input_channels; ++i) {
758
6
        out->input_curves[i].table_entries = input_table_entries;
759
6
        if (byte_width == 1) {
760
6
            out->input_curves[i].table_8  = table_base + i * byte_len_per_input_table;
761
6
            out->input_curves[i].table_16 = nullptr;
762
6
        } else {
763
0
            out->input_curves[i].table_8  = nullptr;
764
0
            out->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
765
0
        }
766
6
    }
767
768
2
    if (byte_width == 1) {
769
2
        out->grid_8  = table_base + byte_len_all_input_tables;
770
2
        out->grid_16 = nullptr;
771
2
    } else {
772
0
        out->grid_8  = nullptr;
773
0
        out->grid_16 = table_base + byte_len_all_input_tables;
774
0
    }
775
776
2
    const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
777
8
    for (uint32_t i = 0; i < out->output_channels; ++i) {
778
6
        out->output_curves[i].table_entries = output_table_entries;
779
6
        if (byte_width == 1) {
780
6
            out->output_curves[i].table_8  = output_table_base + i * byte_len_per_output_table;
781
6
            out->output_curves[i].table_16 = nullptr;
782
6
        } else {
783
0
            out->output_curves[i].table_8  = nullptr;
784
0
            out->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
785
0
        }
786
6
    }
787
788
2
    return true;
789
2
}
Unexecuted instantiation: skcms.cc:bool init_tables<skcms_B2A>(unsigned char const*, unsigned long, unsigned int, unsigned int, unsigned int, skcms_B2A*)
790
791
template <typename A2B_or_B2A>
792
2
static bool read_tag_mft1(const skcms_ICCTag* tag, A2B_or_B2A* out) {
793
2
    if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
794
0
        return false;
795
0
    }
796
797
2
    const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
798
2
    if (!read_mft_common(mftTag->common, out)) {
799
0
        return false;
800
0
    }
801
802
2
    uint32_t input_table_entries  = 256;
803
2
    uint32_t output_table_entries = 256;
804
805
2
    return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
806
2
                       input_table_entries, output_table_entries, out);
807
2
}
skcms.cc:bool read_tag_mft1<skcms_A2B>(skcms_ICCTag const*, skcms_A2B*)
Line
Count
Source
792
2
static bool read_tag_mft1(const skcms_ICCTag* tag, A2B_or_B2A* out) {
793
2
    if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
794
0
        return false;
795
0
    }
796
797
2
    const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
798
2
    if (!read_mft_common(mftTag->common, out)) {
799
0
        return false;
800
0
    }
801
802
2
    uint32_t input_table_entries  = 256;
803
2
    uint32_t output_table_entries = 256;
804
805
2
    return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
806
2
                       input_table_entries, output_table_entries, out);
807
2
}
Unexecuted instantiation: skcms.cc:bool read_tag_mft1<skcms_B2A>(skcms_ICCTag const*, skcms_B2A*)
808
809
template <typename A2B_or_B2A>
810
0
static bool read_tag_mft2(const skcms_ICCTag* tag, A2B_or_B2A* out) {
811
0
    if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
812
0
        return false;
813
0
    }
814
815
0
    const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
816
0
    if (!read_mft_common(mftTag->common, out)) {
817
0
        return false;
818
0
    }
819
820
0
    uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
821
0
    uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
822
823
    // ICC spec mandates that 2 <= table_entries <= 4096
824
0
    if (input_table_entries < 2 || input_table_entries > 4096 ||
825
0
        output_table_entries < 2 || output_table_entries > 4096) {
826
0
        return false;
827
0
    }
828
829
0
    return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
830
0
                       input_table_entries, output_table_entries, out);
831
0
}
Unexecuted instantiation: skcms.cc:bool read_tag_mft2<skcms_A2B>(skcms_ICCTag const*, skcms_A2B*)
Unexecuted instantiation: skcms.cc:bool read_tag_mft2<skcms_B2A>(skcms_ICCTag const*, skcms_B2A*)
832
833
static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
834
54
                        uint32_t num_curves, skcms_Curve* curves) {
835
216
    for (uint32_t i = 0; i < num_curves; ++i) {
836
162
        if (curve_offset > size) {
837
0
            return false;
838
0
        }
839
840
162
        uint32_t curve_bytes;
841
162
        if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
842
0
            return false;
843
0
        }
844
845
162
        if (curve_bytes > UINT32_MAX - 3) {
846
0
            return false;
847
0
        }
848
162
        curve_bytes = (curve_bytes + 3) & ~3U;
849
850
162
        uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
851
162
        curve_offset = (uint32_t)new_offset_64;
852
162
        if (new_offset_64 != curve_offset) {
853
0
            return false;
854
0
        }
855
162
    }
856
857
54
    return true;
858
54
}
859
860
// mAB and mBA tags use the same encoding, including color lookup tables.
861
typedef struct {
862
    uint8_t type                 [ 4];
863
    uint8_t reserved_a           [ 4];
864
    uint8_t input_channels       [ 1];
865
    uint8_t output_channels      [ 1];
866
    uint8_t reserved_b           [ 2];
867
    uint8_t b_curve_offset       [ 4];
868
    uint8_t matrix_offset        [ 4];
869
    uint8_t m_curve_offset       [ 4];
870
    uint8_t clut_offset          [ 4];
871
    uint8_t a_curve_offset       [ 4];
872
} mAB_or_mBA_Layout;
873
874
typedef struct {
875
    uint8_t grid_points          [16];
876
    uint8_t grid_byte_width      [ 1];
877
    uint8_t reserved             [ 3];
878
    uint8_t variable             [1/*variable*/];
879
} CLUT_Layout;
880
881
13
static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
882
13
    if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
883
0
        return false;
884
0
    }
885
886
13
    const mAB_or_mBA_Layout* mABTag = (const mAB_or_mBA_Layout*)tag->buf;
887
888
13
    a2b->input_channels  = mABTag->input_channels[0];
889
13
    a2b->output_channels = mABTag->output_channels[0];
890
891
    // We require exactly three (ie XYZ/Lab/RGB) output channels
892
13
    if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
893
0
        return false;
894
0
    }
895
    // We require no more than four (ie CMYK) input channels
896
13
    if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
897
0
        return false;
898
0
    }
899
900
13
    uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
901
13
    uint32_t matrix_offset  = read_big_u32(mABTag->matrix_offset);
902
13
    uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
903
13
    uint32_t clut_offset    = read_big_u32(mABTag->clut_offset);
904
13
    uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
905
906
    // "B" curves must be present
907
13
    if (0 == b_curve_offset) {
908
0
        return false;
909
0
    }
910
911
13
    if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
912
13
                     a2b->output_curves)) {
913
0
        return false;
914
0
    }
915
916
    // "M" curves and Matrix must be used together
917
13
    if (0 != m_curve_offset) {
918
13
        if (0 == matrix_offset) {
919
0
            return false;
920
0
        }
921
13
        a2b->matrix_channels = a2b->output_channels;
922
13
        if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
923
13
                         a2b->matrix_curves)) {
924
0
            return false;
925
0
        }
926
927
        // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
928
13
        if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
929
0
            return false;
930
0
        }
931
13
        float encoding_factor = pcs_is_xyz ? (65535 / 32768.0f) : 1.0f;
932
13
        const uint8_t* mtx_buf = tag->buf + matrix_offset;
933
13
        a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf +  0);
934
13
        a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf +  4);
935
13
        a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf +  8);
936
13
        a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
937
13
        a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
938
13
        a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
939
13
        a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
940
13
        a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
941
13
        a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
942
13
        a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
943
13
        a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
944
13
        a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
945
13
    } else {
946
0
        if (0 != matrix_offset) {
947
0
            return false;
948
0
        }
949
0
        a2b->matrix_channels = 0;
950
0
    }
951
952
    // "A" curves and CLUT must be used together
953
13
    if (0 != a_curve_offset) {
954
13
        if (0 == clut_offset) {
955
0
            return false;
956
0
        }
957
13
        if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
958
13
                         a2b->input_curves)) {
959
0
            return false;
960
0
        }
961
962
13
        if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
963
0
            return false;
964
0
        }
965
13
        const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
966
967
13
        if (clut->grid_byte_width[0] == 1) {
968
0
            a2b->grid_8  = clut->variable;
969
0
            a2b->grid_16 = nullptr;
970
13
        } else if (clut->grid_byte_width[0] == 2) {
971
13
            a2b->grid_8  = nullptr;
972
13
            a2b->grid_16 = clut->variable;
973
13
        } else {
974
0
            return false;
975
0
        }
976
977
13
        uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];  // the payload
978
52
        for (uint32_t i = 0; i < a2b->input_channels; ++i) {
979
39
            a2b->grid_points[i] = clut->grid_points[i];
980
            // The grid only makes sense with at least two points along each axis
981
39
            if (a2b->grid_points[i] < 2) {
982
0
                return false;
983
0
            }
984
39
            grid_size *= a2b->grid_points[i];
985
39
        }
986
13
        if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
987
0
            return false;
988
0
        }
989
13
    } else {
990
0
        if (0 != clut_offset) {
991
0
            return false;
992
0
        }
993
994
        // If there is no CLUT, the number of input and output channels must match
995
0
        if (a2b->input_channels != a2b->output_channels) {
996
0
            return false;
997
0
        }
998
999
        // Zero out the number of input channels to signal that we're skipping this stage
1000
0
        a2b->input_channels = 0;
1001
0
    }
1002
1003
13
    return true;
1004
13
}
1005
1006
// Exactly the same as read_tag_mab(), except where there are comments.
1007
// TODO: refactor the two to eliminate common code?
1008
15
static bool read_tag_mba(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
1009
15
    if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
1010
0
        return false;
1011
0
    }
1012
1013
15
    const mAB_or_mBA_Layout* mBATag = (const mAB_or_mBA_Layout*)tag->buf;
1014
1015
15
    b2a->input_channels  = mBATag->input_channels[0];
1016
15
    b2a->output_channels = mBATag->output_channels[0];
1017
1018
    // Require exactly 3 inputs (XYZ) and 3 (RGB) or 4 (CMYK) outputs.
1019
15
    if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
1020
0
        return false;
1021
0
    }
1022
15
    if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
1023
0
        return false;
1024
0
    }
1025
1026
15
    uint32_t b_curve_offset = read_big_u32(mBATag->b_curve_offset);
1027
15
    uint32_t matrix_offset  = read_big_u32(mBATag->matrix_offset);
1028
15
    uint32_t m_curve_offset = read_big_u32(mBATag->m_curve_offset);
1029
15
    uint32_t clut_offset    = read_big_u32(mBATag->clut_offset);
1030
15
    uint32_t a_curve_offset = read_big_u32(mBATag->a_curve_offset);
1031
1032
15
    if (0 == b_curve_offset) {
1033
0
        return false;
1034
0
    }
1035
1036
    // "B" curves are our inputs, not outputs.
1037
15
    if (!read_curves(tag->buf, tag->size, b_curve_offset, b2a->input_channels,
1038
15
                     b2a->input_curves)) {
1039
0
        return false;
1040
0
    }
1041
1042
15
    if (0 != m_curve_offset) {
1043
0
        if (0 == matrix_offset) {
1044
0
            return false;
1045
0
        }
1046
        // Matrix channels is tied to input_channels (3), not output_channels.
1047
0
        b2a->matrix_channels = b2a->input_channels;
1048
1049
0
        if (!read_curves(tag->buf, tag->size, m_curve_offset, b2a->matrix_channels,
1050
0
                         b2a->matrix_curves)) {
1051
0
            return false;
1052
0
        }
1053
1054
0
        if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
1055
0
            return false;
1056
0
        }
1057
0
        float encoding_factor = pcs_is_xyz ? (32768 / 65535.0f) : 1.0f;  // TODO: understand
1058
0
        const uint8_t* mtx_buf = tag->buf + matrix_offset;
1059
0
        b2a->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf +  0);
1060
0
        b2a->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf +  4);
1061
0
        b2a->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf +  8);
1062
0
        b2a->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
1063
0
        b2a->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
1064
0
        b2a->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
1065
0
        b2a->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
1066
0
        b2a->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
1067
0
        b2a->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
1068
0
        b2a->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
1069
0
        b2a->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
1070
0
        b2a->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
1071
15
    } else {
1072
15
        if (0 != matrix_offset) {
1073
0
            return false;
1074
0
        }
1075
15
        b2a->matrix_channels = 0;
1076
15
    }
1077
1078
15
    if (0 != a_curve_offset) {
1079
0
        if (0 == clut_offset) {
1080
0
            return false;
1081
0
        }
1082
1083
        // "A" curves are our output, not input.
1084
0
        if (!read_curves(tag->buf, tag->size, a_curve_offset, b2a->output_channels,
1085
0
                         b2a->output_curves)) {
1086
0
            return false;
1087
0
        }
1088
1089
0
        if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
1090
0
            return false;
1091
0
        }
1092
0
        const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
1093
1094
0
        if (clut->grid_byte_width[0] == 1) {
1095
0
            b2a->grid_8  = clut->variable;
1096
0
            b2a->grid_16 = nullptr;
1097
0
        } else if (clut->grid_byte_width[0] == 2) {
1098
0
            b2a->grid_8  = nullptr;
1099
0
            b2a->grid_16 = clut->variable;
1100
0
        } else {
1101
0
            return false;
1102
0
        }
1103
1104
0
        uint64_t grid_size = b2a->output_channels * clut->grid_byte_width[0];
1105
0
        for (uint32_t i = 0; i < b2a->input_channels; ++i) {
1106
0
            b2a->grid_points[i] = clut->grid_points[i];
1107
0
            if (b2a->grid_points[i] < 2) {
1108
0
                return false;
1109
0
            }
1110
0
            grid_size *= b2a->grid_points[i];
1111
0
        }
1112
0
        if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
1113
0
            return false;
1114
0
        }
1115
15
    } else {
1116
15
        if (0 != clut_offset) {
1117
0
            return false;
1118
0
        }
1119
1120
15
        if (b2a->input_channels != b2a->output_channels) {
1121
0
            return false;
1122
0
        }
1123
1124
        // Zero out *output* channels to skip this stage.
1125
15
        b2a->output_channels = 0;
1126
15
    }
1127
15
    return true;
1128
15
}
1129
1130
// If you pass f, we'll fit a possibly-non-zero value for *f.
1131
// If you pass nullptr, we'll assume you want *f to be treated as zero.
1132
static int fit_linear(const skcms_Curve* curve, int N, float tol,
1133
1.22k
                      float* c, float* d, float* f = nullptr) {
1134
1.22k
    assert(N > 1);
1135
    // We iteratively fit the first points to the TF's linear piece.
1136
    // We want the cx + f line to pass through the first and last points we fit exactly.
1137
    //
1138
    // As we walk along the points we find the minimum and maximum slope of the line before the
1139
    // error would exceed our tolerance.  We stop when the range [slope_min, slope_max] becomes
1140
    // emtpy, when we definitely can't add any more points.
1141
    //
1142
    // Some points' error intervals may intersect the running interval but not lie fully
1143
    // within it.  So we keep track of the last point we saw that is a valid end point candidate,
1144
    // and once the search is done, back up to build the line through *that* point.
1145
1.22k
    const float dx = 1.0f / static_cast<float>(N - 1);
1146
1147
1.22k
    int lin_points = 1;
1148
1149
1.22k
    float f_zero = 0.0f;
1150
1.22k
    if (f) {
1151
12
        *f = eval_curve(curve, 0);
1152
1.20k
    } else {
1153
1.20k
        f = &f_zero;
1154
1.20k
    }
1155
1156
1157
1.22k
    float slope_min = -INFINITY_;
1158
1.22k
    float slope_max = +INFINITY_;
1159
9.03k
    for (int i = 1; i < N; ++i) {
1160
8.97k
        float x = static_cast<float>(i) * dx;
1161
8.97k
        float y = eval_curve(curve, x);
1162
1163
8.97k
        float slope_max_i = (y + tol - *f) / x,
1164
8.97k
              slope_min_i = (y - tol - *f) / x;
1165
8.97k
        if (slope_max_i < slope_min || slope_max < slope_min_i) {
1166
            // Slope intervals would no longer overlap.
1167
1.15k
            break;
1168
1.15k
        }
1169
7.81k
        slope_max = fminf_(slope_max, slope_max_i);
1170
7.81k
        slope_min = fmaxf_(slope_min, slope_min_i);
1171
1172
7.81k
        float cur_slope = (y - *f) / x;
1173
7.81k
        if (slope_min <= cur_slope && cur_slope <= slope_max) {
1174
7.30k
            lin_points = i + 1;
1175
7.30k
            *c = cur_slope;
1176
7.30k
        }
1177
7.81k
    }
1178
1179
    // Set D to the last point that met our tolerance.
1180
1.22k
    *d = static_cast<float>(lin_points - 1) * dx;
1181
1.22k
    return lin_points;
1182
1.22k
}
1183
1184
// If this skcms_Curve holds an identity table, rewrite it as an identity skcms_TransferFunction.
1185
174
static void canonicalize_identity(skcms_Curve* curve) {
1186
174
    if (curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
1187
12
        int N = (int)curve->table_entries;
1188
1189
12
        float c = 0.0f, d = 0.0f, f = 0.0f;
1190
12
        if (N == fit_linear(curve, N, 1.0f/static_cast<float>(2*N), &c,&d,&f)
1191
12
            && c == 1.0f
1192
12
            && f == 0.0f) {
1193
12
            curve->table_entries = 0;
1194
12
            curve->table_8       = nullptr;
1195
12
            curve->table_16      = nullptr;
1196
12
            curve->parametric    = skcms_TransferFunction{1,1,0,0,0,0,0};
1197
12
        }
1198
12
    }
1199
174
}
1200
1201
15
static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
1202
15
    bool ok = false;
1203
15
    if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, a2b); }
1204
15
    if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, a2b); }
1205
15
    if (tag->type == skcms_Signature_mAB ) { ok = read_tag_mab(tag, a2b, pcs_is_xyz); }
1206
15
    if (!ok) {
1207
0
        return false;
1208
0
    }
1209
1210
15
    if (a2b->input_channels > 0) { canonicalize_identity(a2b->input_curves + 0); }
1211
15
    if (a2b->input_channels > 1) { canonicalize_identity(a2b->input_curves + 1); }
1212
15
    if (a2b->input_channels > 2) { canonicalize_identity(a2b->input_curves + 2); }
1213
15
    if (a2b->input_channels > 3) { canonicalize_identity(a2b->input_curves + 3); }
1214
1215
15
    if (a2b->matrix_channels > 0) { canonicalize_identity(a2b->matrix_curves + 0); }
1216
15
    if (a2b->matrix_channels > 1) { canonicalize_identity(a2b->matrix_curves + 1); }
1217
15
    if (a2b->matrix_channels > 2) { canonicalize_identity(a2b->matrix_curves + 2); }
1218
1219
15
    if (a2b->output_channels > 0) { canonicalize_identity(a2b->output_curves + 0); }
1220
15
    if (a2b->output_channels > 1) { canonicalize_identity(a2b->output_curves + 1); }
1221
15
    if (a2b->output_channels > 2) { canonicalize_identity(a2b->output_curves + 2); }
1222
1223
15
    return true;
1224
15
}
1225
1226
15
static bool read_b2a(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
1227
15
    bool ok = false;
1228
15
    if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, b2a); }
1229
15
    if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, b2a); }
1230
15
    if (tag->type == skcms_Signature_mBA ) { ok = read_tag_mba(tag, b2a, pcs_is_xyz); }
1231
15
    if (!ok) {
1232
0
        return false;
1233
0
    }
1234
1235
15
    if (b2a->input_channels > 0) { canonicalize_identity(b2a->input_curves + 0); }
1236
15
    if (b2a->input_channels > 1) { canonicalize_identity(b2a->input_curves + 1); }
1237
15
    if (b2a->input_channels > 2) { canonicalize_identity(b2a->input_curves + 2); }
1238
1239
15
    if (b2a->matrix_channels > 0) { canonicalize_identity(b2a->matrix_curves + 0); }
1240
15
    if (b2a->matrix_channels > 1) { canonicalize_identity(b2a->matrix_curves + 1); }
1241
15
    if (b2a->matrix_channels > 2) { canonicalize_identity(b2a->matrix_curves + 2); }
1242
1243
15
    if (b2a->output_channels > 0) { canonicalize_identity(b2a->output_curves + 0); }
1244
15
    if (b2a->output_channels > 1) { canonicalize_identity(b2a->output_curves + 1); }
1245
15
    if (b2a->output_channels > 2) { canonicalize_identity(b2a->output_curves + 2); }
1246
15
    if (b2a->output_channels > 3) { canonicalize_identity(b2a->output_curves + 3); }
1247
1248
15
    return true;
1249
15
}
1250
1251
typedef struct {
1252
    uint8_t type                     [4];
1253
    uint8_t reserved                 [4];
1254
    uint8_t color_primaries          [1];
1255
    uint8_t transfer_characteristics [1];
1256
    uint8_t matrix_coefficients      [1];
1257
    uint8_t video_full_range_flag    [1];
1258
} CICP_Layout;
1259
1260
1.48k
static bool read_cicp(const skcms_ICCTag* tag, skcms_CICP* cicp) {
1261
1.48k
    if (tag->type != skcms_Signature_CICP || tag->size < SAFE_SIZEOF(CICP_Layout)) {
1262
0
        return false;
1263
0
    }
1264
1265
1.48k
    const CICP_Layout* cicpTag = (const CICP_Layout*)tag->buf;
1266
1267
1.48k
    cicp->color_primaries          = cicpTag->color_primaries[0];
1268
1.48k
    cicp->transfer_characteristics = cicpTag->transfer_characteristics[0];
1269
1.48k
    cicp->matrix_coefficients      = cicpTag->matrix_coefficients[0];
1270
1.48k
    cicp->video_full_range_flag    = cicpTag->video_full_range_flag[0];
1271
1.48k
    return true;
1272
1.48k
}
1273
1274
0
void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
1275
0
    if (!profile || !profile->buffer || !tag) { return; }
1276
0
    if (idx > profile->tag_count) { return; }
1277
0
    const tag_Layout* tags = get_tag_table(profile);
1278
0
    tag->signature = read_big_u32(tags[idx].signature);
1279
0
    tag->size      = read_big_u32(tags[idx].size);
1280
0
    tag->buf       = read_big_u32(tags[idx].offset) + profile->buffer;
1281
0
    tag->type      = read_big_u32(tag->buf);
1282
0
}
1283
1284
33.4k
bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
1285
33.4k
    if (!profile || !profile->buffer || !tag) { return false; }
1286
33.4k
    const tag_Layout* tags = get_tag_table(profile);
1287
304k
    for (uint32_t i = 0; i < profile->tag_count; ++i) {
1288
290k
        if (read_big_u32(tags[i].signature) == sig) {
1289
19.3k
            tag->signature = sig;
1290
19.3k
            tag->size      = read_big_u32(tags[i].size);
1291
19.3k
            tag->buf       = read_big_u32(tags[i].offset) + profile->buffer;
1292
19.3k
            tag->type      = read_big_u32(tag->buf);
1293
19.3k
            return true;
1294
19.3k
        }
1295
290k
    }
1296
14.1k
    return false;
1297
33.4k
}
1298
1299
3.00k
static bool usable_as_src(const skcms_ICCProfile* profile) {
1300
3.00k
    return profile->has_A2B
1301
3.00k
       || (profile->has_trc && profile->has_toXYZD50);
1302
3.00k
}
1303
1304
bool skcms_ParseWithA2BPriority(const void* buf, size_t len,
1305
                                const int priority[], const int priorities,
1306
3.08k
                                skcms_ICCProfile* profile) {
1307
3.08k
    static_assert(SAFE_SIZEOF(header_Layout) == 132, "need to update header code");
1308
1309
3.08k
    if (!profile) {
1310
0
        return false;
1311
0
    }
1312
3.08k
    memset(profile, 0, SAFE_SIZEOF(*profile));
1313
1314
3.08k
    if (len < SAFE_SIZEOF(header_Layout)) {
1315
0
        return false;
1316
0
    }
1317
1318
    // Byte-swap all header fields
1319
3.08k
    const header_Layout* header  = (const header_Layout*)buf;
1320
3.08k
    profile->buffer              = (const uint8_t*)buf;
1321
3.08k
    profile->size                = read_big_u32(header->size);
1322
3.08k
    uint32_t version             = read_big_u32(header->version);
1323
3.08k
    profile->data_color_space    = read_big_u32(header->data_color_space);
1324
3.08k
    profile->pcs                 = read_big_u32(header->pcs);
1325
3.08k
    uint32_t signature           = read_big_u32(header->signature);
1326
3.08k
    float illuminant_X           = read_big_fixed(header->illuminant_X);
1327
3.08k
    float illuminant_Y           = read_big_fixed(header->illuminant_Y);
1328
3.08k
    float illuminant_Z           = read_big_fixed(header->illuminant_Z);
1329
3.08k
    profile->tag_count           = read_big_u32(header->tag_count);
1330
1331
    // Validate signature, size (smaller than buffer, large enough to hold tag table),
1332
    // and major version
1333
3.08k
    uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1334
3.08k
    if (signature != skcms_Signature_acsp ||
1335
3.08k
        profile->size > len ||
1336
3.08k
        profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1337
3.08k
        (version >> 24) > 4) {
1338
14
        return false;
1339
14
    }
1340
1341
    // Validate that illuminant is D50 white
1342
3.07k
    if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1343
3.07k
        fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1344
3.07k
        fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1345
4
        return false;
1346
4
    }
1347
1348
    // Validate that all tag entries have sane offset + size
1349
3.06k
    const tag_Layout* tags = get_tag_table(profile);
1350
34.4k
    for (uint32_t i = 0; i < profile->tag_count; ++i) {
1351
31.3k
        uint32_t tag_offset = read_big_u32(tags[i].offset);
1352
31.3k
        uint32_t tag_size   = read_big_u32(tags[i].size);
1353
31.3k
        uint64_t tag_end    = (uint64_t)tag_offset + (uint64_t)tag_size;
1354
31.3k
        if (tag_size < 4 || tag_end > profile->size) {
1355
12
            return false;
1356
12
        }
1357
31.3k
    }
1358
1359
3.05k
    if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1360
24
        return false;
1361
24
    }
1362
1363
3.03k
    bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1364
1365
    // Pre-parse commonly used tags.
1366
3.03k
    skcms_ICCTag kTRC;
1367
3.03k
    if (profile->data_color_space == skcms_Signature_Gray &&
1368
3.03k
        skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1369
169
        if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1370
            // Malformed tag
1371
0
            return false;
1372
0
        }
1373
169
        profile->trc[1] = profile->trc[0];
1374
169
        profile->trc[2] = profile->trc[0];
1375
169
        profile->has_trc = true;
1376
1377
169
        if (pcs_is_xyz) {
1378
169
            profile->toXYZD50.vals[0][0] = illuminant_X;
1379
169
            profile->toXYZD50.vals[1][1] = illuminant_Y;
1380
169
            profile->toXYZD50.vals[2][2] = illuminant_Z;
1381
169
            profile->has_toXYZD50 = true;
1382
169
        }
1383
2.86k
    } else {
1384
2.86k
        skcms_ICCTag rTRC, gTRC, bTRC;
1385
2.86k
        if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1386
2.86k
            skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1387
2.86k
            skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1388
2.84k
            if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1389
2.84k
                !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1390
2.84k
                !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1391
                // Malformed TRC tags
1392
18
                return false;
1393
18
            }
1394
2.82k
            profile->has_trc = true;
1395
2.82k
        }
1396
1397
2.84k
        skcms_ICCTag rXYZ, gXYZ, bXYZ;
1398
2.84k
        if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1399
2.84k
            skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1400
2.84k
            skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1401
2.82k
            if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1402
                // Malformed XYZ tags
1403
3
                return false;
1404
3
            }
1405
2.82k
            profile->has_toXYZD50 = true;
1406
2.82k
        }
1407
2.84k
    }
1408
1409
8.99k
    for (int i = 0; i < priorities; i++) {
1410
        // enum { perceptual, relative_colormetric, saturation }
1411
6.00k
        if (priority[i] < 0 || priority[i] > 2) {
1412
0
            return false;
1413
0
        }
1414
6.00k
        uint32_t sig = skcms_Signature_A2B0 + static_cast<uint32_t>(priority[i]);
1415
6.00k
        skcms_ICCTag tag;
1416
6.00k
        if (skcms_GetTagBySignature(profile, sig, &tag)) {
1417
15
            if (!read_a2b(&tag, &profile->A2B, pcs_is_xyz)) {
1418
                // Malformed A2B tag
1419
0
                return false;
1420
0
            }
1421
15
            profile->has_A2B = true;
1422
15
            break;
1423
15
        }
1424
6.00k
    }
1425
1426
8.99k
    for (int i = 0; i < priorities; i++) {
1427
        // enum { perceptual, relative_colormetric, saturation }
1428
6.00k
        if (priority[i] < 0 || priority[i] > 2) {
1429
0
            return false;
1430
0
        }
1431
6.00k
        uint32_t sig = skcms_Signature_B2A0 + static_cast<uint32_t>(priority[i]);
1432
6.00k
        skcms_ICCTag tag;
1433
6.00k
        if (skcms_GetTagBySignature(profile, sig, &tag)) {
1434
15
            if (!read_b2a(&tag, &profile->B2A, pcs_is_xyz)) {
1435
                // Malformed B2A tag
1436
0
                return false;
1437
0
            }
1438
15
            profile->has_B2A = true;
1439
15
            break;
1440
15
        }
1441
6.00k
    }
1442
1443
3.00k
    skcms_ICCTag cicp_tag;
1444
3.00k
    if (skcms_GetTagBySignature(profile, skcms_Signature_CICP, &cicp_tag)) {
1445
1.48k
        if (!read_cicp(&cicp_tag, &profile->CICP)) {
1446
            // Malformed CICP tag
1447
0
            return false;
1448
0
        }
1449
1.48k
        profile->has_CICP = true;
1450
1.48k
    }
1451
1452
3.00k
    return usable_as_src(profile);
1453
3.00k
}
1454
1455
1456
0
const skcms_ICCProfile* skcms_sRGB_profile() {
1457
0
    static const skcms_ICCProfile sRGB_profile = {
1458
0
        nullptr,               // buffer, moot here
1459
1460
0
        0,                     // size, moot here
1461
0
        skcms_Signature_RGB,   // data_color_space
1462
0
        skcms_Signature_XYZ,   // pcs
1463
0
        0,                     // tag count, moot here
1464
1465
        // We choose to represent sRGB with its canonical transfer function,
1466
        // and with its canonical XYZD50 gamut matrix.
1467
0
        {   // the 3 trc curves
1468
0
            {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1469
0
            {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1470
0
            {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1471
0
        },
1472
1473
0
        {{  // 3x3 toXYZD50 matrix
1474
0
            { 0.436065674f, 0.385147095f, 0.143066406f },
1475
0
            { 0.222488403f, 0.716873169f, 0.060607910f },
1476
0
            { 0.013916016f, 0.097076416f, 0.714096069f },
1477
0
        }},
1478
1479
0
        {   // an empty A2B
1480
0
            {   // input_curves
1481
0
                {{0, {0,0, 0,0,0,0,0}}},
1482
0
                {{0, {0,0, 0,0,0,0,0}}},
1483
0
                {{0, {0,0, 0,0,0,0,0}}},
1484
0
                {{0, {0,0, 0,0,0,0,0}}},
1485
0
            },
1486
0
            nullptr,   // grid_8
1487
0
            nullptr,   // grid_16
1488
0
            0,         // input_channels
1489
0
            {0,0,0,0}, // grid_points
1490
1491
0
            {   // matrix_curves
1492
0
                {{0, {0,0, 0,0,0,0,0}}},
1493
0
                {{0, {0,0, 0,0,0,0,0}}},
1494
0
                {{0, {0,0, 0,0,0,0,0}}},
1495
0
            },
1496
0
            {{  // matrix (3x4)
1497
0
                { 0,0,0,0 },
1498
0
                { 0,0,0,0 },
1499
0
                { 0,0,0,0 },
1500
0
            }},
1501
0
            0,  // matrix_channels
1502
1503
0
            0,  // output_channels
1504
0
            {   // output_curves
1505
0
                {{0, {0,0, 0,0,0,0,0}}},
1506
0
                {{0, {0,0, 0,0,0,0,0}}},
1507
0
                {{0, {0,0, 0,0,0,0,0}}},
1508
0
            },
1509
0
        },
1510
1511
0
        {   // an empty B2A
1512
0
            {   // input_curves
1513
0
                {{0, {0,0, 0,0,0,0,0}}},
1514
0
                {{0, {0,0, 0,0,0,0,0}}},
1515
0
                {{0, {0,0, 0,0,0,0,0}}},
1516
0
            },
1517
0
            0,  // input_channels
1518
1519
0
            0,  // matrix_channels
1520
0
            {   // matrix_curves
1521
0
                {{0, {0,0, 0,0,0,0,0}}},
1522
0
                {{0, {0,0, 0,0,0,0,0}}},
1523
0
                {{0, {0,0, 0,0,0,0,0}}},
1524
0
            },
1525
0
            {{  // matrix (3x4)
1526
0
                { 0,0,0,0 },
1527
0
                { 0,0,0,0 },
1528
0
                { 0,0,0,0 },
1529
0
            }},
1530
1531
0
            {   // output_curves
1532
0
                {{0, {0,0, 0,0,0,0,0}}},
1533
0
                {{0, {0,0, 0,0,0,0,0}}},
1534
0
                {{0, {0,0, 0,0,0,0,0}}},
1535
0
                {{0, {0,0, 0,0,0,0,0}}},
1536
0
            },
1537
0
            nullptr,    // grid_8
1538
0
            nullptr,    // grid_16
1539
0
            {0,0,0,0},  // grid_points
1540
0
            0,          // output_channels
1541
0
        },
1542
1543
0
        { 0, 0, 0, 0 },  // an empty CICP
1544
1545
0
        true,  // has_trc
1546
0
        true,  // has_toXYZD50
1547
0
        false, // has_A2B
1548
0
        false, // has B2A
1549
0
        false, // has_CICP
1550
0
    };
1551
0
    return &sRGB_profile;
1552
0
}
1553
1554
3.25k
const skcms_ICCProfile* skcms_XYZD50_profile() {
1555
    // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1556
3.25k
    static const skcms_ICCProfile XYZD50_profile = {
1557
3.25k
        nullptr,               // buffer, moot here
1558
1559
3.25k
        0,                     // size, moot here
1560
3.25k
        skcms_Signature_RGB,   // data_color_space
1561
3.25k
        skcms_Signature_XYZ,   // pcs
1562
3.25k
        0,                     // tag count, moot here
1563
1564
3.25k
        {   // the 3 trc curves
1565
3.25k
            {{0, {1,1, 0,0,0,0,0}}},
1566
3.25k
            {{0, {1,1, 0,0,0,0,0}}},
1567
3.25k
            {{0, {1,1, 0,0,0,0,0}}},
1568
3.25k
        },
1569
1570
3.25k
        {{  // 3x3 toXYZD50 matrix
1571
3.25k
            { 1,0,0 },
1572
3.25k
            { 0,1,0 },
1573
3.25k
            { 0,0,1 },
1574
3.25k
        }},
1575
1576
3.25k
        {   // an empty A2B
1577
3.25k
            {   // input_curves
1578
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1579
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1580
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1581
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1582
3.25k
            },
1583
3.25k
            nullptr,   // grid_8
1584
3.25k
            nullptr,   // grid_16
1585
3.25k
            0,         // input_channels
1586
3.25k
            {0,0,0,0}, // grid_points
1587
1588
3.25k
            {   // matrix_curves
1589
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1590
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1591
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1592
3.25k
            },
1593
3.25k
            {{  // matrix (3x4)
1594
3.25k
                { 0,0,0,0 },
1595
3.25k
                { 0,0,0,0 },
1596
3.25k
                { 0,0,0,0 },
1597
3.25k
            }},
1598
3.25k
            0,  // matrix_channels
1599
1600
3.25k
            0,  // output_channels
1601
3.25k
            {   // output_curves
1602
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1603
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1604
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1605
3.25k
            },
1606
3.25k
        },
1607
1608
3.25k
        {   // an empty B2A
1609
3.25k
            {   // input_curves
1610
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1611
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1612
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1613
3.25k
            },
1614
3.25k
            0,  // input_channels
1615
1616
3.25k
            0,  // matrix_channels
1617
3.25k
            {   // matrix_curves
1618
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1619
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1620
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1621
3.25k
            },
1622
3.25k
            {{  // matrix (3x4)
1623
3.25k
                { 0,0,0,0 },
1624
3.25k
                { 0,0,0,0 },
1625
3.25k
                { 0,0,0,0 },
1626
3.25k
            }},
1627
1628
3.25k
            {   // output_curves
1629
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1630
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1631
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1632
3.25k
                {{0, {0,0, 0,0,0,0,0}}},
1633
3.25k
            },
1634
3.25k
            nullptr,    // grid_8
1635
3.25k
            nullptr,    // grid_16
1636
3.25k
            {0,0,0,0},  // grid_points
1637
3.25k
            0,          // output_channels
1638
3.25k
        },
1639
1640
3.25k
        { 0, 0, 0, 0 },  // an empty CICP
1641
1642
3.25k
        true,  // has_trc
1643
3.25k
        true,  // has_toXYZD50
1644
3.25k
        false, // has_A2B
1645
3.25k
        false, // has B2A
1646
3.25k
        false, // has_CICP
1647
3.25k
    };
1648
1649
3.25k
    return &XYZD50_profile;
1650
3.25k
}
1651
1652
0
const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1653
0
    return &skcms_sRGB_profile()->trc[0].parametric;
1654
0
}
1655
1656
0
const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1657
0
    static const skcms_TransferFunction sRGB_inv =
1658
0
        {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1659
0
    return &sRGB_inv;
1660
0
}
1661
1662
0
const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1663
0
    static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1664
0
    return &identity;
1665
0
}
1666
1667
const uint8_t skcms_252_random_bytes[] = {
1668
    8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1669
    119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1670
    154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1671
    194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1672
    108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1673
    70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1674
    137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1675
    9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1676
    129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1677
    140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1678
    219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1679
    123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1680
    189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1681
    174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1682
    2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1683
    112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1684
};
1685
1686
1.07k
bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1687
    // Test for exactly equal profiles first.
1688
1.07k
    if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1689
0
        return true;
1690
0
    }
1691
1692
    // For now this is the essentially the same strategy we use in test_only.c
1693
    // for our skcms_Transform() smoke tests:
1694
    //    1) transform A to XYZD50
1695
    //    2) transform B to XYZD50
1696
    //    3) return true if they're similar enough
1697
    // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1698
1699
    // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1700
    // 252 is evenly divisible by 3 and 4.  Only 192, 10, 241, and 43 are missing.
1701
1702
    // We want to allow otherwise equivalent profiles tagged as grayscale and RGB
1703
    // to be treated as equal.  But CMYK profiles are a totally different ballgame.
1704
1.07k
    const auto CMYK = skcms_Signature_CMYK;
1705
1.07k
    if ((A->data_color_space == CMYK) != (B->data_color_space == CMYK)) {
1706
0
        return false;
1707
0
    }
1708
1709
    // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1710
    // TODO: working with RGBA_8888 either way is probably fastest.
1711
1.07k
    skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1712
1.07k
    size_t npixels = 84;
1713
1.07k
    if (A->data_color_space == skcms_Signature_CMYK) {
1714
0
        fmt = skcms_PixelFormat_RGBA_8888;
1715
0
        npixels = 63;
1716
0
    }
1717
1718
    // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1719
    // use pre-canned results and skip that skcms_Transform() call?
1720
1.07k
    uint8_t dstA[252],
1721
1.07k
            dstB[252];
1722
1.07k
    if (!skcms_Transform(
1723
1.07k
                skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, A,
1724
1.07k
                dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1725
1.07k
                npixels)) {
1726
0
        return false;
1727
0
    }
1728
1.07k
    if (!skcms_Transform(
1729
1.07k
                skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, B,
1730
1.07k
                dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1731
1.07k
                npixels)) {
1732
0
        return false;
1733
0
    }
1734
1735
    // TODO: make sure this final check has reasonable codegen.
1736
35.2k
    for (size_t i = 0; i < 252; i++) {
1737
35.1k
        if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1738
943
            return false;
1739
943
        }
1740
35.1k
    }
1741
128
    return true;
1742
1.07k
}
1743
1744
bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1745
0
                                      const skcms_TransferFunction* inv_tf) {
1746
0
    if (!profile || !profile->has_trc) {
1747
0
        return false;
1748
0
    }
1749
1750
0
    return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1751
0
           skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1752
0
           skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1753
0
}
1754
1755
0
static bool is_zero_to_one(float x) {
1756
0
    return 0 <= x && x <= 1;
1757
0
}
1758
1759
typedef struct { float vals[3]; } skcms_Vector3;
1760
1761
5.38k
static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1762
5.38k
    skcms_Vector3 dst = {{0,0,0}};
1763
21.5k
    for (int row = 0; row < 3; ++row) {
1764
16.1k
        dst.vals[row] = m->vals[row][0] * v->vals[0]
1765
16.1k
                      + m->vals[row][1] * v->vals[1]
1766
16.1k
                      + m->vals[row][2] * v->vals[2];
1767
16.1k
    }
1768
5.38k
    return dst;
1769
5.38k
}
1770
1771
bool skcms_AdaptToXYZD50(float wx, float wy,
1772
0
                         skcms_Matrix3x3* toXYZD50) {
1773
0
    if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1774
0
        !toXYZD50) {
1775
0
        return false;
1776
0
    }
1777
1778
    // Assumes that Y is 1.0f.
1779
0
    skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1780
1781
    // Now convert toXYZ matrix to toXYZD50.
1782
0
    skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1783
1784
    // Calculate the chromatic adaptation matrix.  We will use the Bradford method, thus
1785
    // the matrices below.  The Bradford method is used by Adobe and is widely considered
1786
    // to be the best.
1787
0
    skcms_Matrix3x3 xyz_to_lms = {{
1788
0
        {  0.8951f,  0.2664f, -0.1614f },
1789
0
        { -0.7502f,  1.7135f,  0.0367f },
1790
0
        {  0.0389f, -0.0685f,  1.0296f },
1791
0
    }};
1792
0
    skcms_Matrix3x3 lms_to_xyz = {{
1793
0
        {  0.9869929f, -0.1470543f, 0.1599627f },
1794
0
        {  0.4323053f,  0.5183603f, 0.0492912f },
1795
0
        { -0.0085287f,  0.0400428f, 0.9684867f },
1796
0
    }};
1797
1798
0
    skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1799
0
    skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1800
1801
0
    *toXYZD50 = {{
1802
0
        { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1803
0
        { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1804
0
        { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1805
0
    }};
1806
0
    *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1807
0
    *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1808
1809
0
    return true;
1810
0
}
1811
1812
bool skcms_PrimariesToXYZD50(float rx, float ry,
1813
                             float gx, float gy,
1814
                             float bx, float by,
1815
                             float wx, float wy,
1816
0
                             skcms_Matrix3x3* toXYZD50) {
1817
0
    if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1818
0
        !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1819
0
        !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1820
0
        !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1821
0
        !toXYZD50) {
1822
0
        return false;
1823
0
    }
1824
1825
    // First, we need to convert xy values (primaries) to XYZ.
1826
0
    skcms_Matrix3x3 primaries = {{
1827
0
        { rx, gx, bx },
1828
0
        { ry, gy, by },
1829
0
        { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1830
0
    }};
1831
0
    skcms_Matrix3x3 primaries_inv;
1832
0
    if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1833
0
        return false;
1834
0
    }
1835
1836
    // Assumes that Y is 1.0f.
1837
0
    skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1838
0
    skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1839
1840
0
    skcms_Matrix3x3 toXYZ = {{
1841
0
        { XYZ.vals[0],           0,           0 },
1842
0
        {           0, XYZ.vals[1],           0 },
1843
0
        {           0,           0, XYZ.vals[2] },
1844
0
    }};
1845
0
    toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1846
1847
0
    skcms_Matrix3x3 DXtoD50;
1848
0
    if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1849
0
        return false;
1850
0
    }
1851
1852
0
    *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1853
0
    return true;
1854
0
}
1855
1856
1857
9.54k
bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1858
9.54k
    double a00 = src->vals[0][0],
1859
9.54k
           a01 = src->vals[1][0],
1860
9.54k
           a02 = src->vals[2][0],
1861
9.54k
           a10 = src->vals[0][1],
1862
9.54k
           a11 = src->vals[1][1],
1863
9.54k
           a12 = src->vals[2][1],
1864
9.54k
           a20 = src->vals[0][2],
1865
9.54k
           a21 = src->vals[1][2],
1866
9.54k
           a22 = src->vals[2][2];
1867
1868
9.54k
    double b0 = a00*a11 - a01*a10,
1869
9.54k
           b1 = a00*a12 - a02*a10,
1870
9.54k
           b2 = a01*a12 - a02*a11,
1871
9.54k
           b3 = a20,
1872
9.54k
           b4 = a21,
1873
9.54k
           b5 = a22;
1874
1875
9.54k
    double determinant = b0*b5
1876
9.54k
                       - b1*b4
1877
9.54k
                       + b2*b3;
1878
1879
9.54k
    if (determinant == 0) {
1880
13
        return false;
1881
13
    }
1882
1883
9.52k
    double invdet = 1.0 / determinant;
1884
9.52k
    if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1885
160
        return false;
1886
160
    }
1887
1888
9.36k
    b0 *= invdet;
1889
9.36k
    b1 *= invdet;
1890
9.36k
    b2 *= invdet;
1891
9.36k
    b3 *= invdet;
1892
9.36k
    b4 *= invdet;
1893
9.36k
    b5 *= invdet;
1894
1895
9.36k
    dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1896
9.36k
    dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1897
9.36k
    dst->vals[2][0] = (float)(        +     b2 );
1898
9.36k
    dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1899
9.36k
    dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1900
9.36k
    dst->vals[2][1] = (float)(        -     b1 );
1901
9.36k
    dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1902
9.36k
    dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1903
9.36k
    dst->vals[2][2] = (float)(        +     b0 );
1904
1905
37.3k
    for (int r = 0; r < 3; ++r)
1906
112k
    for (int c = 0; c < 3; ++c) {
1907
84.0k
        if (!isfinitef_(dst->vals[r][c])) {
1908
38
            return false;
1909
38
        }
1910
84.0k
    }
1911
9.32k
    return true;
1912
9.36k
}
1913
1914
3.66k
skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1915
3.66k
    skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1916
14.6k
    for (int r = 0; r < 3; r++)
1917
43.9k
        for (int c = 0; c < 3; c++) {
1918
32.9k
            m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1919
32.9k
                         + A->vals[r][1] * B->vals[1][c]
1920
32.9k
                         + A->vals[r][2] * B->vals[2][c];
1921
32.9k
        }
1922
3.66k
    return m;
1923
3.66k
}
1924
1925
#if defined(__clang__)
1926
    [[clang::no_sanitize("float-divide-by-zero")]]  // Checked for by classify() on the way out.
1927
#endif
1928
25.1k
bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1929
25.1k
    TF_PQish  pq;
1930
25.1k
    TF_HLGish hlg;
1931
25.1k
    switch (classify(*src, &pq, &hlg)) {
1932
889
        case skcms_TFType_Invalid: return false;
1933
24.2k
        case skcms_TFType_sRGBish: break;  // handled below
1934
1935
0
        case skcms_TFType_PQish:
1936
0
            *dst = { TFKind_marker(skcms_TFType_PQish), -pq.A,  pq.D, 1.0f/pq.F
1937
0
                                                      ,  pq.B, -pq.E, 1.0f/pq.C};
1938
0
            return true;
1939
1940
0
        case skcms_TFType_HLGish:
1941
0
            *dst = { TFKind_marker(skcms_TFType_HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1942
0
                                                          , 1.0f/hlg.a, hlg.b, hlg.c
1943
0
                                                          , hlg.K_minus_1 };
1944
0
            return true;
1945
1946
0
        case skcms_TFType_HLGinvish:
1947
0
            *dst = { TFKind_marker(skcms_TFType_HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1948
0
                                                       , 1.0f/hlg.a, hlg.b, hlg.c
1949
0
                                                       , hlg.K_minus_1 };
1950
0
            return true;
1951
25.1k
    }
1952
1953
24.2k
    assert (classify(*src) == skcms_TFType_sRGBish);
1954
1955
    // We're inverting this function, solving for x in terms of y.
1956
    //   y = (cx + f)         x < d
1957
    //       (ax + b)^g + e   x ≥ d
1958
    // The inverse of this function can be expressed in the same piecewise form.
1959
24.2k
    skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1960
1961
    // We'll start by finding the new threshold inv.d.
1962
    // In principle we should be able to find that by solving for y at x=d from either side.
1963
    // (If those two d values aren't the same, it's a discontinuous transfer function.)
1964
24.2k
    float d_l =       src->c * src->d + src->f,
1965
24.2k
          d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1966
24.2k
    if (fabsf_(d_l - d_r) > 1/512.0f) {
1967
508
        return false;
1968
508
    }
1969
23.7k
    inv.d = d_l;  // TODO(mtklein): better in practice to choose d_r?
1970
1971
    // When d=0, the linear section collapses to a point.  We leave c,d,f all zero in that case.
1972
23.7k
    if (inv.d > 0) {
1973
        // Inverting the linear section is pretty straightfoward:
1974
        //        y       = cx + f
1975
        //        y - f   = cx
1976
        //   (1/c)y - f/c = x
1977
10.6k
        inv.c =    1.0f/src->c;
1978
10.6k
        inv.f = -src->f/src->c;
1979
10.6k
    }
1980
1981
    // The interesting part is inverting the nonlinear section:
1982
    //         y                = (ax + b)^g + e.
1983
    //         y - e            = (ax + b)^g
1984
    //        (y - e)^1/g       =  ax + b
1985
    //        (y - e)^1/g - b   =  ax
1986
    //   (1/a)(y - e)^1/g - b/a =   x
1987
    //
1988
    // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1989
    //   let k = (1/a)^g
1990
    //   (1/a)( y -  e)^1/g - b/a = x
1991
    //        (ky - ke)^1/g - b/a = x
1992
1993
23.7k
    float k = powf_(src->a, -src->g);  // (1/a)^g == a^-g
1994
23.7k
    inv.g = 1.0f / src->g;
1995
23.7k
    inv.a = k;
1996
23.7k
    inv.b = -k * src->e;
1997
23.7k
    inv.e = -src->b / src->a;
1998
1999
    // We need to enforce the same constraints here that we do when fitting a curve,
2000
    // a >= 0 and ad+b >= 0.  These constraints are checked by classify(), so they're true
2001
    // of the source function if we're here.
2002
2003
    // Just like when fitting the curve, there's really no way to rescue a < 0.
2004
23.7k
    if (inv.a < 0) {
2005
0
        return false;
2006
0
    }
2007
    // On the other hand we can rescue an ad+b that's gone slightly negative here.
2008
23.7k
    if (inv.a * inv.d + inv.b < 0) {
2009
978
        inv.b = -inv.a * inv.d;
2010
978
    }
2011
2012
    // That should usually make classify(inv) == sRGBish true, but there are a couple situations
2013
    // where we might still fail here, like non-finite parameter values.
2014
23.7k
    if (classify(inv) != skcms_TFType_sRGBish) {
2015
189
        return false;
2016
189
    }
2017
2018
23.5k
    assert (inv.a >= 0);
2019
23.5k
    assert (inv.a * inv.d + inv.b >= 0);
2020
2021
    // Now in principle we're done.
2022
    // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
2023
    // e or f of the inverse, depending on which segment contains src(1.0f).
2024
23.5k
    float s = skcms_TransferFunction_eval(src, 1.0f);
2025
23.5k
    if (!isfinitef_(s)) {
2026
152
        return false;
2027
152
    }
2028
2029
23.4k
    float sign = s < 0 ? -1.0f : 1.0f;
2030
23.4k
    s *= sign;
2031
23.4k
    if (s < inv.d) {
2032
253
        inv.f = 1.0f - sign * inv.c * s;
2033
23.1k
    } else {
2034
23.1k
        inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
2035
23.1k
    }
2036
2037
23.4k
    *dst = inv;
2038
23.4k
    return classify(*dst) == skcms_TFType_sRGBish;
2039
23.5k
}
2040
2041
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
2042
2043
// From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
2044
//
2045
//   tf(x) =  cx + f          x < d
2046
//   tf(x) = (ax + b)^g + e   x ≥ d
2047
//
2048
// When fitting, we add the additional constraint that both pieces meet at d:
2049
//
2050
//   cd + f = (ad + b)^g + e
2051
//
2052
// Solving for e and folding it through gives an alternate formulation of the non-linear piece:
2053
//
2054
//   tf(x) =                           cx + f   x < d
2055
//   tf(x) = (ax + b)^g - (ad + b)^g + cd + f   x ≥ d
2056
//
2057
// Our overall strategy is then:
2058
//    For a couple tolerances,
2059
//       - fit_linear():    fit c,d,f iteratively to as many points as our tolerance allows
2060
//       - invert c,d,f
2061
//       - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
2062
//                          (and by constraint, inverted e) to the inverse of the table.
2063
//    Return the parameters with least maximum error.
2064
//
2065
// To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
2066
// of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
2067
//
2068
//    let y = Table(x)
2069
//    r(x) = x - f_inv(y)
2070
//
2071
//    ∂r/∂g = ln(ay + b)*(ay + b)^g
2072
//          - ln(ad + b)*(ad + b)^g
2073
//    ∂r/∂a = yg(ay + b)^(g-1)
2074
//          - dg(ad + b)^(g-1)
2075
//    ∂r/∂b =  g(ay + b)^(g-1)
2076
//          -  g(ad + b)^(g-1)
2077
2078
// Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
2079
// and fill out the gradient of the residual into dfdP.
2080
static float rg_nonlinear(float x,
2081
                          const skcms_Curve* curve,
2082
                          const skcms_TransferFunction* tf,
2083
132k
                          float dfdP[3]) {
2084
132k
    const float y = eval_curve(curve, x);
2085
2086
132k
    const float g = tf->g, a = tf->a, b = tf->b,
2087
132k
                c = tf->c, d = tf->d, f = tf->f;
2088
2089
132k
    const float Y = fmaxf_(a*y + b, 0.0f),
2090
132k
                D =        a*d + b;
2091
132k
    assert (D >= 0);
2092
2093
    // The gradient.
2094
132k
    dfdP[0] = logf_(Y)*powf_(Y, g)
2095
132k
            - logf_(D)*powf_(D, g);
2096
132k
    dfdP[1] = y*g*powf_(Y, g-1)
2097
132k
            - d*g*powf_(D, g-1);
2098
132k
    dfdP[2] =   g*powf_(Y, g-1)
2099
132k
            -   g*powf_(D, g-1);
2100
2101
    // The residual.
2102
132k
    const float f_inv = powf_(Y, g)
2103
132k
                      - powf_(D, g)
2104
132k
                      + c*d + f;
2105
132k
    return x - f_inv;
2106
132k
}
2107
2108
static bool gauss_newton_step(const skcms_Curve* curve,
2109
                                    skcms_TransferFunction* tf,
2110
5.59k
                              float x0, float dx, int N) {
2111
    // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
2112
    //
2113
    // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
2114
    //
2115
    // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
2116
    //   where r(P) is the residual vector
2117
    //   and Jf is the Jacobian matrix of f(), ∂r/∂P.
2118
    //
2119
    // Let's review the shape of each of these expressions:
2120
    //   r(P)   is [N x 1], a column vector with one entry per value of x tested
2121
    //   Jf     is [N x 3], a matrix with an entry for each (x,P) pair
2122
    //   Jf^T   is [3 x N], the transpose of Jf
2123
    //
2124
    //   Jf^T Jf   is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
2125
    //                                              and so is its inverse (Jf^T Jf)^-1
2126
    //   Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
2127
    //
2128
    // Our implementation strategy to get to the final ∆P is
2129
    //   1) evaluate Jf^T Jf,   call that lhs
2130
    //   2) evaluate Jf^T r(P), call that rhs
2131
    //   3) invert lhs
2132
    //   4) multiply inverse lhs by rhs
2133
    //
2134
    // This is a friendly implementation strategy because we don't have to have any
2135
    // buffers that scale with N, and equally nice don't have to perform any matrix
2136
    // operations that are variable size.
2137
    //
2138
    // Other implementation strategies could trade this off, e.g. evaluating the
2139
    // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
2140
    // the residuals.  That would probably require implementing singular value
2141
    // decomposition, and would create a [3 x N] matrix to be multiplied by the
2142
    // [N x 1] residual vector, but on the upside I think that'd eliminate the
2143
    // possibility of this gauss_newton_step() function ever failing.
2144
2145
    // 0) start off with lhs and rhs safely zeroed.
2146
5.59k
    skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
2147
5.59k
    skcms_Vector3   rhs = {  {0,0,0} };
2148
2149
    // 1,2) evaluate lhs and evaluate rhs
2150
    //   We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
2151
    //   so we'll have to update lhs and rhs at the same time.
2152
138k
    for (int i = 0; i < N; i++) {
2153
132k
        float x = x0 + static_cast<float>(i)*dx;
2154
2155
132k
        float dfdP[3] = {0,0,0};
2156
132k
        float resid = rg_nonlinear(x,curve,tf, dfdP);
2157
2158
530k
        for (int r = 0; r < 3; r++) {
2159
1.59M
            for (int c = 0; c < 3; c++) {
2160
1.19M
                lhs.vals[r][c] += dfdP[r] * dfdP[c];
2161
1.19M
            }
2162
397k
            rhs.vals[r] += dfdP[r] * resid;
2163
397k
        }
2164
132k
    }
2165
2166
    // If any of the 3 P parameters are unused, this matrix will be singular.
2167
    // Detect those cases and fix them up to indentity instead, so we can invert.
2168
22.3k
    for (int k = 0; k < 3; k++) {
2169
16.7k
        if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
2170
16.7k
            lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
2171
2.93k
            lhs.vals[k][k] = 1;
2172
2.93k
        }
2173
16.7k
    }
2174
2175
    // 3) invert lhs
2176
5.59k
    skcms_Matrix3x3 lhs_inv;
2177
5.59k
    if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
2178
210
        return false;
2179
210
    }
2180
2181
    // 4) multiply inverse lhs by rhs
2182
5.38k
    skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
2183
5.38k
    tf->g += dP.vals[0];
2184
5.38k
    tf->a += dP.vals[1];
2185
5.38k
    tf->b += dP.vals[2];
2186
5.38k
    return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
2187
5.59k
}
2188
2189
static float max_roundtrip_error_checked(const skcms_Curve* curve,
2190
6.07k
                                         const skcms_TransferFunction* tf_inv) {
2191
6.07k
    skcms_TransferFunction tf;
2192
6.07k
    if (!skcms_TransferFunction_invert(tf_inv, &tf) || skcms_TFType_sRGBish != classify(tf)) {
2193
1.41k
        return INFINITY_;
2194
1.41k
    }
2195
2196
4.66k
    skcms_TransferFunction tf_inv_again;
2197
4.66k
    if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
2198
404
        return INFINITY_;
2199
404
    }
2200
2201
4.26k
    return skcms_MaxRoundtripError(curve, &tf_inv_again);
2202
4.66k
}
2203
2204
// Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
2205
1.15k
static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
2206
    // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
2207
6.53k
    auto fixup_tf = [tf]() {
2208
        // a must be non-negative. That ensures the function is monotonically increasing.
2209
        // We don't really know how to fix up a if it goes negative.
2210
6.53k
        if (tf->a < 0) {
2211
441
            return false;
2212
441
        }
2213
        // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
2214
        // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
2215
6.09k
        if (tf->a * tf->d + tf->b < 0) {
2216
578
            tf->b = -tf->a * tf->d;
2217
578
        }
2218
6.09k
        assert (tf->a >= 0 &&
2219
6.09k
                tf->a * tf->d + tf->b >= 0);
2220
2221
        // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
2222
        // parameter so we can guarantee this.
2223
6.09k
        tf->e =   tf->c*tf->d + tf->f
2224
6.09k
          - powf_(tf->a*tf->d + tf->b, tf->g);
2225
2226
6.09k
        return isfinitef_(tf->e);
2227
6.53k
    };
2228
2229
1.15k
    if (!fixup_tf()) {
2230
0
        return false;
2231
0
    }
2232
2233
    // No matter where we start, dx should always represent N even steps from 0 to 1.
2234
1.15k
    const float dx = 1.0f / static_cast<float>(N-1);
2235
2236
1.15k
    skcms_TransferFunction best_tf = *tf;
2237
1.15k
    float best_max_error = INFINITY_;
2238
2239
    // Need this or several curves get worse... *sigh*
2240
1.15k
    float init_error = max_roundtrip_error_checked(curve, tf);
2241
1.15k
    if (init_error < best_max_error) {
2242
993
        best_max_error = init_error;
2243
993
        best_tf = *tf;
2244
993
    }
2245
2246
    // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
2247
6.07k
    for (int j = 0; j < 8; j++) {
2248
5.59k
        if (!gauss_newton_step(curve, tf, static_cast<float>(L)*dx, dx, N-L) || !fixup_tf()) {
2249
667
            *tf = best_tf;
2250
667
            return isfinitef_(best_max_error);
2251
667
        }
2252
2253
4.92k
        float max_error = max_roundtrip_error_checked(curve, tf);
2254
4.92k
        if (max_error < best_max_error) {
2255
548
            best_max_error = max_error;
2256
548
            best_tf = *tf;
2257
548
        }
2258
4.92k
    }
2259
2260
485
    *tf = best_tf;
2261
485
    return isfinitef_(best_max_error);
2262
1.15k
}
2263
2264
bool skcms_ApproximateCurve(const skcms_Curve* curve,
2265
                            skcms_TransferFunction* approx,
2266
840
                            float* max_error) {
2267
840
    if (!curve || !approx || !max_error) {
2268
0
        return false;
2269
0
    }
2270
2271
840
    if (curve->table_entries == 0) {
2272
        // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
2273
236
        return false;
2274
236
    }
2275
2276
604
    if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
2277
        // We need at least two points, and must put some reasonable cap on the maximum number.
2278
0
        return false;
2279
0
    }
2280
2281
604
    int N = (int)curve->table_entries;
2282
604
    const float dx = 1.0f / static_cast<float>(N - 1);
2283
2284
604
    *max_error = INFINITY_;
2285
604
    const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
2286
1.81k
    for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
2287
1.20k
        skcms_TransferFunction tf,
2288
1.20k
                               tf_inv;
2289
2290
        // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
2291
1.20k
        tf.f = 0.0f;
2292
1.20k
        int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
2293
2294
1.20k
        if (L == N) {
2295
            // If the entire data set was linear, move the coefficients to the nonlinear portion
2296
            // with G == 1.  This lets use a canonical representation with d == 0.
2297
33
            tf.g = 1;
2298
33
            tf.a = tf.c;
2299
33
            tf.b = tf.f;
2300
33
            tf.c = tf.d = tf.e = tf.f = 0;
2301
1.17k
        } else if (L == N - 1) {
2302
            // Degenerate case with only two points in the nonlinear segment. Solve directly.
2303
23
            tf.g = 1;
2304
23
            tf.a = (eval_curve(curve, static_cast<float>(N-1)*dx) -
2305
23
                    eval_curve(curve, static_cast<float>(N-2)*dx))
2306
23
                 / dx;
2307
23
            tf.b = eval_curve(curve, static_cast<float>(N-2)*dx)
2308
23
                 - tf.a * static_cast<float>(N-2)*dx;
2309
23
            tf.e = 0;
2310
1.15k
        } else {
2311
            // Start by guessing a gamma-only curve through the midpoint.
2312
1.15k
            int mid = (L + N) / 2;
2313
1.15k
            float mid_x = static_cast<float>(mid) / static_cast<float>(N - 1);
2314
1.15k
            float mid_y = eval_curve(curve, mid_x);
2315
1.15k
            tf.g = log2f_(mid_y) / log2f_(mid_x);
2316
1.15k
            tf.a = 1;
2317
1.15k
            tf.b = 0;
2318
1.15k
            tf.e =    tf.c*tf.d + tf.f
2319
1.15k
              - powf_(tf.a*tf.d + tf.b, tf.g);
2320
2321
2322
1.15k
            if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
2323
1.15k
                !fit_nonlinear(curve, L,N, &tf_inv)) {
2324
131
                continue;
2325
131
            }
2326
2327
            // We fit tf_inv, so calculate tf to keep in sync.
2328
            // fit_nonlinear() should guarantee invertibility.
2329
1.02k
            if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
2330
0
                assert(false);
2331
0
                continue;
2332
0
            }
2333
1.02k
        }
2334
2335
        // We'd better have a sane, sRGB-ish TF by now.
2336
        // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
2337
        // anything else is just some accident of math and the way we pun tf.g as a type flag.
2338
        // fit_nonlinear() should guarantee this, but the special cases may fail this test.
2339
1.07k
        if (skcms_TFType_sRGBish != classify(tf)) {
2340
9
            continue;
2341
9
        }
2342
2343
        // We find our error by roundtripping the table through tf_inv.
2344
        //
2345
        // (The most likely use case for this approximation is to be inverted and
2346
        // used as the transfer function for a destination color space.)
2347
        //
2348
        // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
2349
        // invertible, so re-verify that here (and use the new inverse for testing).
2350
        // fit_nonlinear() should guarantee this, but the special cases that don't use
2351
        // it may fail this test.
2352
1.06k
        if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
2353
22
            continue;
2354
22
        }
2355
2356
1.04k
        float err = skcms_MaxRoundtripError(curve, &tf_inv);
2357
1.04k
        if (*max_error > err) {
2358
734
            *max_error = err;
2359
734
            *approx    = tf;
2360
734
        }
2361
1.04k
    }
2362
604
    return isfinitef_(*max_error);
2363
604
}
2364
2365
enum class CpuType { Baseline, HSW, SKX };
2366
2367
3.49k
static CpuType cpu_type() {
2368
    #if defined(SKCMS_PORTABLE) || !defined(__x86_64__) || defined(SKCMS_FORCE_BASELINE)
2369
        return CpuType::Baseline;
2370
    #elif defined(SKCMS_FORCE_HSW)
2371
        return CpuType::HSW;
2372
    #elif defined(SKCMS_FORCE_SKX)
2373
        return CpuType::SKX;
2374
    #else
2375
3.49k
        static const CpuType type = []{
2376
2
            if (!sAllowRuntimeCPUDetection) {
2377
0
                return CpuType::Baseline;
2378
0
            }
2379
            // See http://www.sandpile.org/x86/cpuid.htm
2380
2381
            // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2382
2
            uint32_t eax, ebx, ecx, edx;
2383
2
            __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2384
2
                                         : "0"(1), "2"(0));
2385
2
            if ((edx & (1u<<25)) &&  // SSE
2386
2
                (edx & (1u<<26)) &&  // SSE2
2387
2
                (ecx & (1u<< 0)) &&  // SSE3
2388
2
                (ecx & (1u<< 9)) &&  // SSSE3
2389
2
                (ecx & (1u<<12)) &&  // FMA (N.B. not used, avoided even)
2390
2
                (ecx & (1u<<19)) &&  // SSE4.1
2391
2
                (ecx & (1u<<20)) &&  // SSE4.2
2392
2
                (ecx & (1u<<26)) &&  // XSAVE
2393
2
                (ecx & (1u<<27)) &&  // OSXSAVE
2394
2
                (ecx & (1u<<28)) &&  // AVX
2395
2
                (ecx & (1u<<29))) {  // F16C
2396
2397
                // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2398
2
                __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2399
2
                                             : "0"(7), "2"(0));
2400
                // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2401
2
                uint32_t xcr0, dont_need_edx;
2402
2
                __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2403
2404
2
                if ((xcr0 & (1u<<1)) &&  // XMM register state saved?
2405
2
                    (xcr0 & (1u<<2)) &&  // YMM register state saved?
2406
2
                    (ebx  & (1u<<5))) {  // AVX2
2407
                    // At this point we're at least HSW.  Continue checking for SKX.
2408
2
                    if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2409
2
                        (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2410
2
                        (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2411
2
                        (ebx  & (1u<<16)) && // AVX512F
2412
2
                        (ebx  & (1u<<17)) && // AVX512DQ
2413
2
                        (ebx  & (1u<<28)) && // AVX512CD
2414
2
                        (ebx  & (1u<<30)) && // AVX512BW
2415
2
                        (ebx  & (1u<<31))) { // AVX512VL
2416
0
                        return CpuType::SKX;
2417
0
                    }
2418
2
                    return CpuType::HSW;
2419
2
                }
2420
2
            }
2421
0
            return CpuType::Baseline;
2422
2
        }();
2423
3.49k
        return type;
2424
3.49k
    #endif
2425
3.49k
}
2426
2427
17.2k
static bool tf_is_gamma(const skcms_TransferFunction& tf) {
2428
17.2k
    return tf.g > 0 && tf.a == 1 &&
2429
17.2k
           tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0;
2430
17.2k
}
2431
2432
struct OpAndArg {
2433
    Op          op;
2434
    const void* arg;
2435
};
2436
2437
21.6k
static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2438
21.6k
    struct OpType {
2439
21.6k
        Op sGamma, sRGBish, PQish, HLGish, HLGinvish, table;
2440
21.6k
    };
2441
21.6k
    static constexpr OpType kOps[] = {
2442
21.6k
        { Op::gamma_r, Op::tf_r, Op::pq_r, Op::hlg_r, Op::hlginv_r, Op::table_r },
2443
21.6k
        { Op::gamma_g, Op::tf_g, Op::pq_g, Op::hlg_g, Op::hlginv_g, Op::table_g },
2444
21.6k
        { Op::gamma_b, Op::tf_b, Op::pq_b, Op::hlg_b, Op::hlginv_b, Op::table_b },
2445
21.6k
        { Op::gamma_a, Op::tf_a, Op::pq_a, Op::hlg_a, Op::hlginv_a, Op::table_a },
2446
21.6k
    };
2447
21.6k
    const auto& op = kOps[channel];
2448
2449
21.6k
    if (curve->table_entries == 0) {
2450
17.2k
        const OpAndArg noop = { Op::load_a8/*doesn't matter*/, nullptr };
2451
2452
17.2k
        const skcms_TransferFunction& tf = curve->parametric;
2453
2454
17.2k
        if (tf_is_gamma(tf)) {
2455
12.6k
            return tf.g != 1 ? OpAndArg{op.sGamma, &tf}
2456
12.6k
                             : noop;
2457
12.6k
        }
2458
2459
4.53k
        switch (classify(tf)) {
2460
0
            case skcms_TFType_Invalid:    return noop;
2461
4.53k
            case skcms_TFType_sRGBish:    return OpAndArg{op.sRGBish,   &tf};
2462
0
            case skcms_TFType_PQish:      return OpAndArg{op.PQish,     &tf};
2463
0
            case skcms_TFType_HLGish:     return OpAndArg{op.HLGish,    &tf};
2464
0
            case skcms_TFType_HLGinvish:  return OpAndArg{op.HLGinvish, &tf};
2465
4.53k
        }
2466
4.53k
    }
2467
4.42k
    return OpAndArg{op.table, curve};
2468
21.6k
}
2469
2470
7.21k
static int select_curve_ops(const skcms_Curve* curves, int numChannels, OpAndArg* ops) {
2471
    // We process the channels in reverse order, yielding ops in ABGR order.
2472
    // (Working backwards allows us to fuse trailing B+G+R ops into a single RGB op.)
2473
7.21k
    int cursor = 0;
2474
28.8k
    for (int index = numChannels; index-- > 0; ) {
2475
21.6k
        ops[cursor] = select_curve_op(&curves[index], index);
2476
21.6k
        if (ops[cursor].arg) {
2477
10.0k
            ++cursor;
2478
10.0k
        }
2479
21.6k
    }
2480
2481
    // Identify separate B+G+R ops and fuse them into a single RGB op.
2482
7.21k
    if (cursor >= 3) {
2483
3.29k
        struct FusableOps {
2484
3.29k
            Op r, g, b, rgb;
2485
3.29k
        };
2486
3.29k
        static constexpr FusableOps kFusableOps[] = {
2487
3.29k
            {Op::gamma_r,  Op::gamma_g,  Op::gamma_b,  Op::gamma_rgb},
2488
3.29k
            {Op::tf_r,     Op::tf_g,     Op::tf_b,     Op::tf_rgb},
2489
3.29k
            {Op::pq_r,     Op::pq_g,     Op::pq_b,     Op::pq_rgb},
2490
3.29k
            {Op::hlg_r,    Op::hlg_g,    Op::hlg_b,    Op::hlg_rgb},
2491
3.29k
            {Op::hlginv_r, Op::hlginv_g, Op::hlginv_b, Op::hlginv_rgb},
2492
3.29k
        };
2493
2494
3.29k
        int posR = cursor - 1;
2495
3.29k
        int posG = cursor - 2;
2496
3.29k
        int posB = cursor - 3;
2497
12.0k
        for (const FusableOps& fusableOp : kFusableOps) {
2498
12.0k
            if (ops[posR].op == fusableOp.r &&
2499
12.0k
                ops[posG].op == fusableOp.g &&
2500
12.0k
                ops[posB].op == fusableOp.b &&
2501
12.0k
                (0 == memcmp(ops[posR].arg, ops[posG].arg, sizeof(skcms_TransferFunction))) &&
2502
12.0k
                (0 == memcmp(ops[posR].arg, ops[posB].arg, sizeof(skcms_TransferFunction)))) {
2503
                // Fuse the three matching ops into one.
2504
1.42k
                ops[posB].op = fusableOp.rgb;
2505
1.42k
                cursor -= 2;
2506
1.42k
                break;
2507
1.42k
            }
2508
12.0k
        }
2509
3.29k
    }
2510
2511
7.21k
    return cursor;
2512
7.21k
}
2513
2514
6.98k
static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2515
6.98k
    switch (fmt >> 1) {   // ignore rgb/bgr
2516
0
        case skcms_PixelFormat_A_8              >> 1: return  1;
2517
0
        case skcms_PixelFormat_G_8              >> 1: return  1;
2518
0
        case skcms_PixelFormat_GA_88            >> 1: return  2;
2519
0
        case skcms_PixelFormat_ABGR_4444        >> 1: return  2;
2520
0
        case skcms_PixelFormat_RGB_565          >> 1: return  2;
2521
4.28k
        case skcms_PixelFormat_RGB_888          >> 1: return  3;
2522
0
        case skcms_PixelFormat_RGBA_8888        >> 1: return  4;
2523
0
        case skcms_PixelFormat_RGBA_8888_sRGB   >> 1: return  4;
2524
0
        case skcms_PixelFormat_RGBA_1010102     >> 1: return  4;
2525
0
        case skcms_PixelFormat_RGB_101010x_XR   >> 1: return  4;
2526
0
        case skcms_PixelFormat_RGB_161616LE     >> 1: return  6;
2527
0
        case skcms_PixelFormat_RGBA_10101010_XR >> 1: return  8;
2528
0
        case skcms_PixelFormat_RGBA_16161616LE  >> 1: return  8;
2529
0
        case skcms_PixelFormat_RGB_161616BE     >> 1: return  6;
2530
0
        case skcms_PixelFormat_RGBA_16161616BE  >> 1: return  8;
2531
0
        case skcms_PixelFormat_RGB_hhh_Norm     >> 1: return  6;
2532
0
        case skcms_PixelFormat_RGBA_hhhh_Norm   >> 1: return  8;
2533
0
        case skcms_PixelFormat_RGB_hhh          >> 1: return  6;
2534
0
        case skcms_PixelFormat_RGBA_hhhh        >> 1: return  8;
2535
2.70k
        case skcms_PixelFormat_RGB_fff          >> 1: return 12;
2536
0
        case skcms_PixelFormat_RGBA_ffff        >> 1: return 16;
2537
6.98k
    }
2538
0
    assert(false);
2539
0
    return 0;
2540
6.98k
}
2541
2542
static bool prep_for_destination(const skcms_ICCProfile* profile,
2543
                                 skcms_Matrix3x3* fromXYZD50,
2544
                                 skcms_TransferFunction* invR,
2545
                                 skcms_TransferFunction* invG,
2546
3.49k
                                 skcms_TransferFunction* invB) {
2547
    // skcms_Transform() supports B2A destinations...
2548
3.49k
    if (profile->has_B2A) { return true; }
2549
    // ...and destinations with parametric transfer functions and an XYZD50 gamut matrix.
2550
3.49k
    return profile->has_trc
2551
3.49k
        && profile->has_toXYZD50
2552
3.49k
        && profile->trc[0].table_entries == 0
2553
3.49k
        && profile->trc[1].table_entries == 0
2554
3.49k
        && profile->trc[2].table_entries == 0
2555
3.49k
        && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2556
3.49k
        && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2557
3.49k
        && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2558
3.49k
        && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2559
3.49k
}
2560
2561
bool skcms_Transform(const void*             src,
2562
                     skcms_PixelFormat       srcFmt,
2563
                     skcms_AlphaFormat       srcAlpha,
2564
                     const skcms_ICCProfile* srcProfile,
2565
                     void*                   dst,
2566
                     skcms_PixelFormat       dstFmt,
2567
                     skcms_AlphaFormat       dstAlpha,
2568
                     const skcms_ICCProfile* dstProfile,
2569
3.49k
                     size_t                  nz) {
2570
3.49k
    const size_t dst_bpp = bytes_per_pixel(dstFmt),
2571
3.49k
                 src_bpp = bytes_per_pixel(srcFmt);
2572
    // Let's just refuse if the request is absurdly big.
2573
3.49k
    if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2574
0
        return false;
2575
0
    }
2576
3.49k
    int n = (int)nz;
2577
2578
    // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2579
3.49k
    if (!srcProfile) {
2580
0
        srcProfile = skcms_sRGB_profile();
2581
0
    }
2582
3.49k
    if (!dstProfile) {
2583
0
        dstProfile = skcms_sRGB_profile();
2584
0
    }
2585
2586
    // We can't transform in place unless the PixelFormats are the same size.
2587
3.49k
    if (dst == src && dst_bpp != src_bpp) {
2588
0
        return false;
2589
0
    }
2590
    // TODO: more careful alias rejection (like, dst == src + 1)?
2591
2592
3.49k
    Op          program[32];
2593
3.49k
    const void* context[32];
2594
2595
3.49k
    Op*          ops      = program;
2596
3.49k
    const void** contexts = context;
2597
2598
11.9k
    auto add_op = [&](Op o) {
2599
11.9k
        *ops++ = o;
2600
11.9k
        *contexts++ = nullptr;
2601
11.9k
    };
2602
2603
10.5k
    auto add_op_ctx = [&](Op o, const void* c) {
2604
10.5k
        *ops++ = o;
2605
10.5k
        *contexts++ = c;
2606
10.5k
    };
2607
2608
3.72k
    auto add_curve_ops = [&](const skcms_Curve* curves, int numChannels) {
2609
3.72k
        OpAndArg oa[4];
2610
3.72k
        assert(numChannels <= ARRAY_COUNT(oa));
2611
2612
3.72k
        int numOps = select_curve_ops(curves, numChannels, oa);
2613
2614
10.8k
        for (int i = 0; i < numOps; ++i) {
2615
7.14k
            add_op_ctx(oa[i].op, oa[i].arg);
2616
7.14k
        }
2617
3.72k
    };
2618
2619
    // These are always parametric curves of some sort.
2620
3.49k
    skcms_Curve dst_curves[3];
2621
3.49k
    dst_curves[0].table_entries =
2622
3.49k
    dst_curves[1].table_entries =
2623
3.49k
    dst_curves[2].table_entries = 0;
2624
2625
3.49k
    skcms_Matrix3x3        from_xyz;
2626
2627
3.49k
    switch (srcFmt >> 1) {
2628
0
        default: return false;
2629
0
        case skcms_PixelFormat_A_8              >> 1: add_op(Op::load_a8);          break;
2630
0
        case skcms_PixelFormat_G_8              >> 1: add_op(Op::load_g8);          break;
2631
0
        case skcms_PixelFormat_GA_88            >> 1: add_op(Op::load_ga88);        break;
2632
0
        case skcms_PixelFormat_ABGR_4444        >> 1: add_op(Op::load_4444);        break;
2633
0
        case skcms_PixelFormat_RGB_565          >> 1: add_op(Op::load_565);         break;
2634
2.14k
        case skcms_PixelFormat_RGB_888          >> 1: add_op(Op::load_888);         break;
2635
0
        case skcms_PixelFormat_RGBA_8888        >> 1: add_op(Op::load_8888);        break;
2636
0
        case skcms_PixelFormat_RGBA_1010102     >> 1: add_op(Op::load_1010102);     break;
2637
0
        case skcms_PixelFormat_RGB_101010x_XR   >> 1: add_op(Op::load_101010x_XR);  break;
2638
0
        case skcms_PixelFormat_RGBA_10101010_XR >> 1: add_op(Op::load_10101010_XR); break;
2639
0
        case skcms_PixelFormat_RGB_161616LE     >> 1: add_op(Op::load_161616LE);    break;
2640
0
        case skcms_PixelFormat_RGBA_16161616LE  >> 1: add_op(Op::load_16161616LE);  break;
2641
0
        case skcms_PixelFormat_RGB_161616BE     >> 1: add_op(Op::load_161616BE);    break;
2642
0
        case skcms_PixelFormat_RGBA_16161616BE  >> 1: add_op(Op::load_16161616BE);  break;
2643
0
        case skcms_PixelFormat_RGB_hhh_Norm     >> 1: add_op(Op::load_hhh);         break;
2644
0
        case skcms_PixelFormat_RGBA_hhhh_Norm   >> 1: add_op(Op::load_hhhh);        break;
2645
0
        case skcms_PixelFormat_RGB_hhh          >> 1: add_op(Op::load_hhh);         break;
2646
0
        case skcms_PixelFormat_RGBA_hhhh        >> 1: add_op(Op::load_hhhh);        break;
2647
1.35k
        case skcms_PixelFormat_RGB_fff          >> 1: add_op(Op::load_fff);         break;
2648
0
        case skcms_PixelFormat_RGBA_ffff        >> 1: add_op(Op::load_ffff);        break;
2649
2650
0
        case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2651
0
            add_op(Op::load_8888);
2652
0
            add_op_ctx(Op::tf_rgb, skcms_sRGB_TransferFunction());
2653
0
            break;
2654
3.49k
    }
2655
3.49k
    if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2656
3.49k
        srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2657
0
        add_op(Op::clamp);
2658
0
    }
2659
3.49k
    if (srcFmt & 1) {
2660
0
        add_op(Op::swap_rb);
2661
0
    }
2662
3.49k
    skcms_ICCProfile gray_dst_profile;
2663
3.49k
    switch (dstFmt >> 1) {
2664
0
        case skcms_PixelFormat_G_8:
2665
0
        case skcms_PixelFormat_GA_88:
2666
            // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2667
            // luminance (Y) by the destination transfer function.
2668
0
            gray_dst_profile = *dstProfile;
2669
0
            skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2670
0
            dstProfile = &gray_dst_profile;
2671
0
            break;
2672
3.49k
        default:
2673
3.49k
            break;
2674
3.49k
    }
2675
2676
3.49k
    if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2677
        // Photoshop creates CMYK images as inverse CMYK.
2678
        // These happen to be the only ones we've _ever_ seen.
2679
0
        add_op(Op::invert);
2680
        // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2681
0
        srcAlpha = skcms_AlphaFormat_Unpremul;
2682
0
    }
2683
2684
3.49k
    if (srcAlpha == skcms_AlphaFormat_Opaque) {
2685
1.35k
        add_op(Op::force_opaque);
2686
2.14k
    } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2687
0
        add_op(Op::unpremul);
2688
0
    }
2689
2690
3.49k
    if (dstProfile != srcProfile) {
2691
2692
3.49k
        if (!prep_for_destination(dstProfile,
2693
3.49k
                                  &from_xyz,
2694
3.49k
                                  &dst_curves[0].parametric,
2695
3.49k
                                  &dst_curves[1].parametric,
2696
3.49k
                                  &dst_curves[2].parametric)) {
2697
0
            return false;
2698
0
        }
2699
2700
3.49k
        if (srcProfile->has_A2B) {
2701
117
            if (srcProfile->A2B.input_channels) {
2702
117
                add_curve_ops(srcProfile->A2B.input_curves,
2703
117
                              (int)srcProfile->A2B.input_channels);
2704
117
                add_op(Op::clamp);
2705
117
                add_op_ctx(Op::clut_A2B, &srcProfile->A2B);
2706
117
            }
2707
2708
117
            if (srcProfile->A2B.matrix_channels == 3) {
2709
117
                add_curve_ops(srcProfile->A2B.matrix_curves, /*numChannels=*/3);
2710
2711
117
                static const skcms_Matrix3x4 I = {{
2712
117
                    {1,0,0,0},
2713
117
                    {0,1,0,0},
2714
117
                    {0,0,1,0},
2715
117
                }};
2716
117
                if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2717
117
                    add_op_ctx(Op::matrix_3x4, &srcProfile->A2B.matrix);
2718
117
                }
2719
117
            }
2720
2721
117
            if (srcProfile->A2B.output_channels == 3) {
2722
117
                add_curve_ops(srcProfile->A2B.output_curves, /*numChannels=*/3);
2723
117
            }
2724
2725
117
            if (srcProfile->pcs == skcms_Signature_Lab) {
2726
0
                add_op(Op::lab_to_xyz);
2727
0
            }
2728
2729
3.37k
        } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2730
3.37k
            add_curve_ops(srcProfile->trc, /*numChannels=*/3);
2731
3.37k
        } else {
2732
0
            return false;
2733
0
        }
2734
2735
        // A2B sources are in XYZD50 by now, but TRC sources are still in their original gamut.
2736
3.49k
        assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2737
2738
3.49k
        if (dstProfile->has_B2A) {
2739
            // B2A needs its input in XYZD50, so transform TRC sources now.
2740
0
            if (!srcProfile->has_A2B) {
2741
0
                add_op_ctx(Op::matrix_3x3, &srcProfile->toXYZD50);
2742
0
            }
2743
2744
0
            if (dstProfile->pcs == skcms_Signature_Lab) {
2745
0
                add_op(Op::xyz_to_lab);
2746
0
            }
2747
2748
0
            if (dstProfile->B2A.input_channels == 3) {
2749
0
                add_curve_ops(dstProfile->B2A.input_curves, /*numChannels=*/3);
2750
0
            }
2751
2752
0
            if (dstProfile->B2A.matrix_channels == 3) {
2753
0
                static const skcms_Matrix3x4 I = {{
2754
0
                    {1,0,0,0},
2755
0
                    {0,1,0,0},
2756
0
                    {0,0,1,0},
2757
0
                }};
2758
0
                if (0 != memcmp(&I, &dstProfile->B2A.matrix, sizeof(I))) {
2759
0
                    add_op_ctx(Op::matrix_3x4, &dstProfile->B2A.matrix);
2760
0
                }
2761
2762
0
                add_curve_ops(dstProfile->B2A.matrix_curves, /*numChannels=*/3);
2763
0
            }
2764
2765
0
            if (dstProfile->B2A.output_channels) {
2766
0
                add_op(Op::clamp);
2767
0
                add_op_ctx(Op::clut_B2A, &dstProfile->B2A);
2768
2769
0
                add_curve_ops(dstProfile->B2A.output_curves,
2770
0
                              (int)dstProfile->B2A.output_channels);
2771
0
            }
2772
3.49k
        } else {
2773
            // This is a TRC destination.
2774
            // We'll concat any src->xyz matrix with our xyz->dst matrix into one src->dst matrix.
2775
            // (A2B sources are already in XYZD50, making that src->xyz matrix I.)
2776
3.49k
            static const skcms_Matrix3x3 I = {{
2777
3.49k
                { 1.0f, 0.0f, 0.0f },
2778
3.49k
                { 0.0f, 1.0f, 0.0f },
2779
3.49k
                { 0.0f, 0.0f, 1.0f },
2780
3.49k
            }};
2781
3.49k
            const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2782
2783
            // There's a chance the source and destination gamuts are identical,
2784
            // in which case we can skip the gamut transform.
2785
3.49k
            if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2786
                // Concat the entire gamut transform into from_xyz,
2787
                // now slightly misnamed but it's a handy spot to stash the result.
2788
3.13k
                from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2789
3.13k
                add_op_ctx(Op::matrix_3x3, &from_xyz);
2790
3.13k
            }
2791
2792
            // Encode back to dst RGB using its parametric transfer functions.
2793
3.49k
            OpAndArg oa[3];
2794
3.49k
            int numOps = select_curve_ops(dst_curves, /*numChannels=*/3, oa);
2795
3.49k
            for (int index = 0; index < numOps; ++index) {
2796
0
                assert(oa[index].op != Op::table_r &&
2797
0
                       oa[index].op != Op::table_g &&
2798
0
                       oa[index].op != Op::table_b &&
2799
0
                       oa[index].op != Op::table_a);
2800
0
                add_op_ctx(oa[index].op, oa[index].arg);
2801
0
            }
2802
3.49k
        }
2803
3.49k
    }
2804
2805
    // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2806
    // not just to values that fit in [0,1].
2807
    //
2808
    // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2809
    // but would be carrying r > 1, which is really unexpected for downstream consumers.
2810
3.49k
    if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2811
2.14k
        add_op(Op::clamp);
2812
2.14k
    }
2813
2814
3.49k
    if (dstProfile->data_color_space == skcms_Signature_CMYK) {
2815
        // Photoshop creates CMYK images as inverse CMYK.
2816
        // These happen to be the only ones we've _ever_ seen.
2817
0
        add_op(Op::invert);
2818
2819
        // CMYK has no alpha channel, so make sure dstAlpha is a no-op.
2820
0
        dstAlpha = skcms_AlphaFormat_Unpremul;
2821
0
    }
2822
2823
3.49k
    if (dstAlpha == skcms_AlphaFormat_Opaque) {
2824
1.35k
        add_op(Op::force_opaque);
2825
2.14k
    } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2826
0
        add_op(Op::premul);
2827
0
    }
2828
3.49k
    if (dstFmt & 1) {
2829
0
        add_op(Op::swap_rb);
2830
0
    }
2831
3.49k
    switch (dstFmt >> 1) {
2832
0
        default: return false;
2833
0
        case skcms_PixelFormat_A_8             >> 1: add_op(Op::store_a8);         break;
2834
0
        case skcms_PixelFormat_G_8             >> 1: add_op(Op::store_g8);         break;
2835
0
        case skcms_PixelFormat_GA_88           >> 1: add_op(Op::store_ga88);       break;
2836
0
        case skcms_PixelFormat_ABGR_4444       >> 1: add_op(Op::store_4444);       break;
2837
0
        case skcms_PixelFormat_RGB_565         >> 1: add_op(Op::store_565);        break;
2838
2.14k
        case skcms_PixelFormat_RGB_888         >> 1: add_op(Op::store_888);        break;
2839
0
        case skcms_PixelFormat_RGBA_8888       >> 1: add_op(Op::store_8888);       break;
2840
0
        case skcms_PixelFormat_RGBA_1010102    >> 1: add_op(Op::store_1010102);    break;
2841
0
        case skcms_PixelFormat_RGB_161616LE    >> 1: add_op(Op::store_161616LE);   break;
2842
0
        case skcms_PixelFormat_RGBA_16161616LE >> 1: add_op(Op::store_16161616LE); break;
2843
0
        case skcms_PixelFormat_RGB_161616BE    >> 1: add_op(Op::store_161616BE);   break;
2844
0
        case skcms_PixelFormat_RGBA_16161616BE >> 1: add_op(Op::store_16161616BE); break;
2845
0
        case skcms_PixelFormat_RGB_hhh_Norm    >> 1: add_op(Op::store_hhh);        break;
2846
0
        case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: add_op(Op::store_hhhh);       break;
2847
0
        case skcms_PixelFormat_RGB_101010x_XR  >> 1: add_op(Op::store_101010x_XR); break;
2848
0
        case skcms_PixelFormat_RGB_hhh         >> 1: add_op(Op::store_hhh);        break;
2849
0
        case skcms_PixelFormat_RGBA_hhhh       >> 1: add_op(Op::store_hhhh);       break;
2850
1.35k
        case skcms_PixelFormat_RGB_fff         >> 1: add_op(Op::store_fff);        break;
2851
0
        case skcms_PixelFormat_RGBA_ffff       >> 1: add_op(Op::store_ffff);       break;
2852
2853
0
        case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2854
0
            add_op_ctx(Op::tf_rgb, skcms_sRGB_Inverse_TransferFunction());
2855
0
            add_op(Op::store_8888);
2856
0
            break;
2857
3.49k
    }
2858
2859
3.49k
    assert(ops      <= program + ARRAY_COUNT(program));
2860
3.49k
    assert(contexts <= context + ARRAY_COUNT(context));
2861
2862
3.49k
    auto run = baseline::run_program;
2863
3.49k
    switch (cpu_type()) {
2864
0
        case CpuType::SKX:
2865
0
            #if !defined(SKCMS_DISABLE_SKX)
2866
0
                run = skx::run_program;
2867
0
                break;
2868
0
            #endif
2869
2870
3.49k
        case CpuType::HSW:
2871
3.49k
            #if !defined(SKCMS_DISABLE_HSW)
2872
3.49k
                run = hsw::run_program;
2873
3.49k
                break;
2874
0
            #endif
2875
2876
0
        case CpuType::Baseline:
2877
0
            break;
2878
3.49k
    }
2879
2880
3.49k
    run(program, context, ops - program, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2881
3.49k
    return true;
2882
3.49k
}
2883
2884
240
static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2885
240
#if defined(NDEBUG)
2886
240
    (void)profile;
2887
#else
2888
    skcms_Matrix3x3 fromXYZD50;
2889
    skcms_TransferFunction invR, invG, invB;
2890
    assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2891
#endif
2892
240
}
2893
2894
240
bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2895
240
    if (!profile->has_B2A) {
2896
240
        skcms_Matrix3x3 fromXYZD50;
2897
240
        if (!profile->has_trc || !profile->has_toXYZD50
2898
240
            || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2899
0
            return false;
2900
0
        }
2901
2902
240
        skcms_TransferFunction tf[3];
2903
960
        for (int i = 0; i < 3; i++) {
2904
720
            skcms_TransferFunction inv;
2905
720
            if (profile->trc[i].table_entries == 0
2906
720
                && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2907
720
                tf[i] = profile->trc[i].parametric;
2908
720
                continue;
2909
720
            }
2910
2911
0
            float max_error;
2912
            // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2913
0
            if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2914
0
                return false;
2915
0
            }
2916
0
        }
2917
2918
960
        for (int i = 0; i < 3; ++i) {
2919
720
            profile->trc[i].table_entries = 0;
2920
720
            profile->trc[i].parametric = tf[i];
2921
720
        }
2922
240
    }
2923
240
    assert_usable_as_destination(profile);
2924
240
    return true;
2925
240
}
2926
2927
0
bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2928
    // Call skcms_MakeUsableAsDestination() with B2A disabled;
2929
    // on success that'll return a TRC/XYZ profile with three skcms_TransferFunctions.
2930
0
    skcms_ICCProfile result = *profile;
2931
0
    result.has_B2A = false;
2932
0
    if (!skcms_MakeUsableAsDestination(&result)) {
2933
0
        return false;
2934
0
    }
2935
2936
    // Of the three, pick the transfer function that best fits the other two.
2937
0
    int best_tf = 0;
2938
0
    float min_max_error = INFINITY_;
2939
0
    for (int i = 0; i < 3; i++) {
2940
0
        skcms_TransferFunction inv;
2941
0
        if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2942
0
            return false;
2943
0
        }
2944
2945
0
        float err = 0;
2946
0
        for (int j = 0; j < 3; ++j) {
2947
0
            err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
2948
0
        }
2949
0
        if (min_max_error > err) {
2950
0
            min_max_error = err;
2951
0
            best_tf = i;
2952
0
        }
2953
0
    }
2954
2955
0
    for (int i = 0; i < 3; i++) {
2956
0
        result.trc[i].parametric = result.trc[best_tf].parametric;
2957
0
    }
2958
2959
0
    *profile = result;
2960
0
    assert_usable_as_destination(profile);
2961
0
    return true;
2962
0
}