Coverage Report

Created: 2024-05-20 07:14

/src/skia/modules/skcms/skcms.cc
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2018 Google Inc.
3
 *
4
 * Use of this source code is governed by a BSD-style license that can be
5
 * found in the LICENSE file.
6
 */
7
8
#include "src/skcms_public.h"  // NO_G3_REWRITE
9
#include "src/skcms_internals.h"  // NO_G3_REWRITE
10
#include "src/skcms_Transform.h"  // NO_G3_REWRITE
11
#include <assert.h>
12
#include <float.h>
13
#include <limits.h>
14
#include <stdlib.h>
15
#include <string.h>
16
17
#if defined(__ARM_NEON)
18
    #include <arm_neon.h>
19
#elif defined(__SSE__)
20
    #include <immintrin.h>
21
22
    #if defined(__clang__)
23
        // That #include <immintrin.h> is usually enough, but Clang's headers
24
        // "helpfully" skip including the whole kitchen sink when _MSC_VER is
25
        // defined, because lots of programs on Windows would include that and
26
        // it'd be a lot slower.  But we want all those headers included so we
27
        // can use their features after runtime checks later.
28
        #include <smmintrin.h>
29
        #include <avxintrin.h>
30
        #include <avx2intrin.h>
31
        #include <avx512fintrin.h>
32
        #include <avx512dqintrin.h>
33
    #endif
34
#endif
35
36
using namespace skcms_private;
37
38
static bool sAllowRuntimeCPUDetection = true;
39
40
0
void skcms_DisableRuntimeCPUDetection() {
41
0
    sAllowRuntimeCPUDetection = false;
42
0
}
43
44
1.28M
static float log2f_(float x) {
45
    // The first approximation of log2(x) is its exponent 'e', minus 127.
46
1.28M
    int32_t bits;
47
1.28M
    memcpy(&bits, &x, sizeof(bits));
48
49
1.28M
    float e = (float)bits * (1.0f / (1<<23));
50
51
    // If we use the mantissa too we can refine the error signficantly.
52
1.28M
    int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
53
1.28M
    float m;
54
1.28M
    memcpy(&m, &m_bits, sizeof(m));
55
56
1.28M
    return (e - 124.225514990f
57
1.28M
              -   1.498030302f*m
58
1.28M
              -   1.725879990f/(0.3520887068f + m));
59
1.28M
}
60
0
static float logf_(float x) {
61
0
    const float ln2 = 0.69314718f;
62
0
    return ln2*log2f_(x);
63
0
}
64
65
1.28M
static float exp2f_(float x) {
66
1.28M
    if (x > 128.0f) {
67
1.40k
        return INFINITY_;
68
1.28M
    } else if (x < -127.0f) {
69
3.51k
        return 0.0f;
70
3.51k
    }
71
1.28M
    float fract = x - floorf_(x);
72
73
1.28M
    float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
74
1.28M
                                        -   1.490129070f*fract
75
1.28M
                                        +  27.728023300f/(4.84252568f - fract));
76
77
    // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
78
    // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
79
    // Negative values are effectively underflow - we'll end up returning a (different) negative
80
    // value, which makes no sense. So clamp to zero.
81
1.28M
    if (fbits >= (float)INT_MAX) {
82
0
        return INFINITY_;
83
1.28M
    } else if (fbits < 0) {
84
1
        return 0;
85
1
    }
86
87
1.28M
    int32_t bits = (int32_t)fbits;
88
1.28M
    memcpy(&x, &bits, sizeof(x));
89
1.28M
    return x;
90
1.28M
}
91
92
// Not static, as it's used by some test tools.
93
1.84M
float powf_(float x, float y) {
94
1.84M
    if (x <= 0.f) {
95
45.5k
        return 0.f;
96
45.5k
    }
97
1.79M
    if (x == 1.f) {
98
509k
        return 1.f;
99
509k
    }
100
1.28M
    return exp2f_(log2f_(x) * y);
101
1.79M
}
102
103
0
static float expf_(float x) {
104
0
    const float log2_e = 1.4426950408889634074f;
105
0
    return exp2f_(log2_e * x);
106
0
}
107
108
515k
static float fmaxf_(float x, float y) { return x > y ? x : y; }
109
301k
static float fminf_(float x, float y) { return x < y ? x : y; }
110
111
4.27M
static bool isfinitef_(float x) { return 0 == x*0; }
112
113
251k
static float minus_1_ulp(float x) {
114
251k
    int32_t bits;
115
251k
    memcpy(&bits, &x, sizeof(bits));
116
251k
    bits = bits - 1;
117
251k
    memcpy(&x, &bits, sizeof(bits));
118
251k
    return x;
119
251k
}
120
121
// Most transfer functions we work with are sRGBish.
122
// For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
123
// and repurpose the other fields to hold the parameters of the HDR functions.
124
struct TF_PQish  { float A,B,C,D,E,F; };
125
struct TF_HLGish { float R,G,a,b,c,K_minus_1; };
126
// We didn't originally support a scale factor K for HLG, and instead just stored 0 in
127
// the unused `f` field of skcms_TransferFunction for HLGish and HLGInvish transfer functions.
128
// By storing f=K-1, those old unusued f=0 values now mean K=1, a noop scale factor.
129
130
6
static float TFKind_marker(skcms_TFType kind) {
131
    // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
132
6
    return -(float)kind;
133
6
}
134
135
static skcms_TFType classify(const skcms_TransferFunction& tf, TF_PQish*   pq = nullptr
136
2.44M
                                                             , TF_HLGish* hlg = nullptr) {
137
2.44M
    if (tf.g < 0) {
138
        // Negative "g" is mapped to enum values; large negative are for sure invalid.
139
134
        if (tf.g < -128) {
140
107
            return skcms_TFType_Invalid;
141
107
        }
142
27
        int enum_g = -static_cast<int>(tf.g);
143
        // Non-whole "g" values are invalid as well.
144
27
        if (static_cast<float>(-enum_g) != tf.g) {
145
6
            return skcms_TFType_Invalid;
146
6
        }
147
        // TODO: soundness checks for PQ/HLG like we do for sRGBish?
148
21
        switch (enum_g) {
149
5
            case skcms_TFType_PQish:
150
5
                if (pq) {
151
2
                    memcpy(pq , &tf.a, sizeof(*pq ));
152
2
                }
153
5
                return skcms_TFType_PQish;
154
4
            case skcms_TFType_HLGish:
155
4
                if (hlg) {
156
2
                    memcpy(hlg, &tf.a, sizeof(*hlg));
157
2
                }
158
4
                return skcms_TFType_HLGish;
159
8
            case skcms_TFType_HLGinvish:
160
8
                if (hlg) {
161
2
                    memcpy(hlg, &tf.a, sizeof(*hlg));
162
2
                }
163
8
                return skcms_TFType_HLGinvish;
164
21
        }
165
4
        return skcms_TFType_Invalid;
166
21
    }
167
168
    // Basic soundness checks for sRGBish transfer functions.
169
2.44M
    if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
170
            // a,c,d,g should be non-negative to make any sense.
171
2.44M
            && tf.a >= 0
172
2.44M
            && tf.c >= 0
173
2.44M
            && tf.d >= 0
174
2.44M
            && tf.g >= 0
175
            // Raising a negative value to a fractional tf->g produces complex numbers.
176
2.44M
            && tf.a * tf.d + tf.b >= 0) {
177
2.44M
        return skcms_TFType_sRGBish;
178
2.44M
    }
179
180
599
    return skcms_TFType_Invalid;
181
2.44M
}
182
183
51.2k
skcms_TFType skcms_TransferFunction_getType(const skcms_TransferFunction* tf) {
184
51.2k
    return classify(*tf);
185
51.2k
}
186
4.44k
bool skcms_TransferFunction_isSRGBish(const skcms_TransferFunction* tf) {
187
4.44k
    return classify(*tf) == skcms_TFType_sRGBish;
188
4.44k
}
189
0
bool skcms_TransferFunction_isPQish(const skcms_TransferFunction* tf) {
190
0
    return classify(*tf) == skcms_TFType_PQish;
191
0
}
192
0
bool skcms_TransferFunction_isHLGish(const skcms_TransferFunction* tf) {
193
0
    return classify(*tf) == skcms_TFType_HLGish;
194
0
}
195
196
bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
197
                                      float A, float B, float C,
198
0
                                      float D, float E, float F) {
199
0
    *tf = { TFKind_marker(skcms_TFType_PQish), A,B,C,D,E,F };
200
0
    assert(skcms_TransferFunction_isPQish(tf));
201
0
    return true;
202
0
}
203
204
bool skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction* tf,
205
                                             float K, float R, float G,
206
0
                                             float a, float b, float c) {
207
0
    *tf = { TFKind_marker(skcms_TFType_HLGish), R,G, a,b,c, K-1.0f };
208
0
    assert(skcms_TransferFunction_isHLGish(tf));
209
0
    return true;
210
0
}
211
212
684k
float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
213
684k
    float sign = x < 0 ? -1.0f : 1.0f;
214
684k
    x *= sign;
215
216
684k
    TF_PQish  pq;
217
684k
    TF_HLGish hlg;
218
684k
    switch (classify(*tf, &pq, &hlg)) {
219
0
        case skcms_TFType_Invalid: break;
220
221
0
        case skcms_TFType_HLGish: {
222
0
            const float K = hlg.K_minus_1 + 1.0f;
223
0
            return K * sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
224
0
                                            : expf_((x-hlg.c)*hlg.a) + hlg.b);
225
0
        }
226
227
        // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
228
0
        case skcms_TFType_HLGinvish: {
229
0
            const float K = hlg.K_minus_1 + 1.0f;
230
0
            x /= K;
231
0
            return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
232
0
                                  : hlg.a * logf_(x - hlg.b) + hlg.c);
233
0
        }
234
235
684k
        case skcms_TFType_sRGBish:
236
684k
            return sign * (x < tf->d ?       tf->c * x + tf->f
237
684k
                                     : powf_(tf->a * x + tf->b, tf->g) + tf->e);
238
239
0
        case skcms_TFType_PQish:
240
0
            return sign *
241
0
                   powf_((pq.A + pq.B * powf_(x, pq.C)) / (pq.D + pq.E * powf_(x, pq.C)), pq.F);
242
684k
    }
243
0
    return 0;
244
684k
}
245
246
247
266k
static float eval_curve(const skcms_Curve* curve, float x) {
248
266k
    if (curve->table_entries == 0) {
249
14.5k
        return skcms_TransferFunction_eval(&curve->parametric, x);
250
14.5k
    }
251
252
251k
    float ix = fmaxf_(0, fminf_(x, 1)) * static_cast<float>(curve->table_entries - 1);
253
251k
    int   lo = (int)                   ix        ,
254
251k
          hi = (int)(float)minus_1_ulp(ix + 1.0f);
255
251k
    float t = ix - (float)lo;
256
257
251k
    float l, h;
258
251k
    if (curve->table_8) {
259
44.9k
        l = curve->table_8[lo] * (1/255.0f);
260
44.9k
        h = curve->table_8[hi] * (1/255.0f);
261
206k
    } else {
262
206k
        uint16_t be_l, be_h;
263
206k
        memcpy(&be_l, curve->table_16 + 2*lo, 2);
264
206k
        memcpy(&be_h, curve->table_16 + 2*hi, 2);
265
206k
        uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
266
206k
        uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
267
206k
        l = le_l * (1/65535.0f);
268
206k
        h = le_h * (1/65535.0f);
269
206k
    }
270
251k
    return l + (h-l)*t;
271
266k
}
272
273
593
float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
274
593
    uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
275
593
    const float dx = 1.0f / static_cast<float>(N - 1);
276
593
    float err = 0;
277
214k
    for (uint32_t i = 0; i < N; i++) {
278
214k
        float x = static_cast<float>(i) * dx,
279
214k
              y = eval_curve(curve, x);
280
214k
        err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
281
214k
    }
282
593
    return err;
283
593
}
284
285
593
bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
286
593
    return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
287
593
}
288
289
// Additional ICC signature values that are only used internally
290
enum {
291
    // File signature
292
    skcms_Signature_acsp = 0x61637370,
293
294
    // Tag signatures
295
    skcms_Signature_rTRC = 0x72545243,
296
    skcms_Signature_gTRC = 0x67545243,
297
    skcms_Signature_bTRC = 0x62545243,
298
    skcms_Signature_kTRC = 0x6B545243,
299
300
    skcms_Signature_rXYZ = 0x7258595A,
301
    skcms_Signature_gXYZ = 0x6758595A,
302
    skcms_Signature_bXYZ = 0x6258595A,
303
304
    skcms_Signature_A2B0 = 0x41324230,
305
    skcms_Signature_B2A0 = 0x42324130,
306
307
    skcms_Signature_CHAD = 0x63686164,
308
    skcms_Signature_WTPT = 0x77747074,
309
310
    skcms_Signature_CICP = 0x63696370,
311
312
    // Type signatures
313
    skcms_Signature_curv = 0x63757276,
314
    skcms_Signature_mft1 = 0x6D667431,
315
    skcms_Signature_mft2 = 0x6D667432,
316
    skcms_Signature_mAB  = 0x6D414220,
317
    skcms_Signature_mBA  = 0x6D424120,
318
    skcms_Signature_para = 0x70617261,
319
    skcms_Signature_sf32 = 0x73663332,
320
    // XYZ is also a PCS signature, so it's defined in skcms.h
321
    // skcms_Signature_XYZ = 0x58595A20,
322
};
323
324
5.94k
static uint16_t read_big_u16(const uint8_t* ptr) {
325
5.94k
    uint16_t be;
326
5.94k
    memcpy(&be, ptr, sizeof(be));
327
#if defined(_MSC_VER)
328
    return _byteswap_ushort(be);
329
#else
330
5.94k
    return __builtin_bswap16(be);
331
5.94k
#endif
332
5.94k
}
333
334
633k
static uint32_t read_big_u32(const uint8_t* ptr) {
335
633k
    uint32_t be;
336
633k
    memcpy(&be, ptr, sizeof(be));
337
#if defined(_MSC_VER)
338
    return _byteswap_ulong(be);
339
#else
340
633k
    return __builtin_bswap32(be);
341
633k
#endif
342
633k
}
343
344
60.2k
static int32_t read_big_i32(const uint8_t* ptr) {
345
60.2k
    return (int32_t)read_big_u32(ptr);
346
60.2k
}
347
348
60.2k
static float read_big_fixed(const uint8_t* ptr) {
349
60.2k
    return static_cast<float>(read_big_i32(ptr)) * (1.0f / 65536.0f);
350
60.2k
}
351
352
// Maps to an in-memory profile so that fields line up to the locations specified
353
// in ICC.1:2010, section 7.2
354
typedef struct {
355
    uint8_t size                [ 4];
356
    uint8_t cmm_type            [ 4];
357
    uint8_t version             [ 4];
358
    uint8_t profile_class       [ 4];
359
    uint8_t data_color_space    [ 4];
360
    uint8_t pcs                 [ 4];
361
    uint8_t creation_date_time  [12];
362
    uint8_t signature           [ 4];
363
    uint8_t platform            [ 4];
364
    uint8_t flags               [ 4];
365
    uint8_t device_manufacturer [ 4];
366
    uint8_t device_model        [ 4];
367
    uint8_t device_attributes   [ 8];
368
    uint8_t rendering_intent    [ 4];
369
    uint8_t illuminant_X        [ 4];
370
    uint8_t illuminant_Y        [ 4];
371
    uint8_t illuminant_Z        [ 4];
372
    uint8_t creator             [ 4];
373
    uint8_t profile_id          [16];
374
    uint8_t reserved            [28];
375
    uint8_t tag_count           [ 4]; // Technically not part of header, but required
376
} header_Layout;
377
378
typedef struct {
379
    uint8_t signature [4];
380
    uint8_t offset    [4];
381
    uint8_t size      [4];
382
} tag_Layout;
383
384
48.6k
static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
385
48.6k
    return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
386
48.6k
}
387
388
// s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
389
// use of the type is for the CHAD tag that stores exactly nine values.
390
typedef struct {
391
    uint8_t type     [ 4];
392
    uint8_t reserved [ 4];
393
    uint8_t values   [36];
394
} sf32_Layout;
395
396
0
bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
397
0
    skcms_ICCTag tag;
398
0
    if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
399
0
        return false;
400
0
    }
401
402
0
    if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
403
0
        return false;
404
0
    }
405
406
0
    const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
407
0
    const uint8_t* values = sf32Tag->values;
408
0
    for (int r = 0; r < 3; ++r)
409
0
    for (int c = 0; c < 3; ++c, values += 4) {
410
0
        m->vals[r][c] = read_big_fixed(values);
411
0
    }
412
0
    return true;
413
0
}
414
415
// XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
416
// the type are for tags/data that store exactly one triple.
417
typedef struct {
418
    uint8_t type     [4];
419
    uint8_t reserved [4];
420
    uint8_t X        [4];
421
    uint8_t Y        [4];
422
    uint8_t Z        [4];
423
} XYZ_Layout;
424
425
5.90k
static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
426
5.90k
    if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
427
1.14k
        return false;
428
1.14k
    }
429
430
4.76k
    const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
431
432
4.76k
    *x = read_big_fixed(xyzTag->X);
433
4.76k
    *y = read_big_fixed(xyzTag->Y);
434
4.76k
    *z = read_big_fixed(xyzTag->Z);
435
4.76k
    return true;
436
5.90k
}
437
438
0
bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
439
0
    skcms_ICCTag tag;
440
0
    return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
441
0
           read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
442
0
}
443
444
static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
445
2.41k
                           const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
446
2.41k
    return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
447
2.41k
           read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
448
2.41k
           read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
449
2.41k
}
450
451
typedef struct {
452
    uint8_t type          [4];
453
    uint8_t reserved_a    [4];
454
    uint8_t function_type [2];
455
    uint8_t reserved_b    [2];
456
    uint8_t variable      [1/*variable*/];  // 1, 3, 4, 5, or 7 s15.16, depending on function_type
457
} para_Layout;
458
459
static bool read_curve_para(const uint8_t* buf, uint32_t size,
460
4.81k
                            skcms_Curve* curve, uint32_t* curve_size) {
461
4.81k
    if (size < SAFE_FIXED_SIZE(para_Layout)) {
462
7
        return false;
463
7
    }
464
465
4.80k
    const para_Layout* paraTag = (const para_Layout*)buf;
466
467
4.80k
    enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
468
4.80k
    uint16_t function_type = read_big_u16(paraTag->function_type);
469
4.80k
    if (function_type > kGABCDEF) {
470
152
        return false;
471
152
    }
472
473
4.65k
    static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
474
4.65k
    if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
475
183
        return false;
476
183
    }
477
478
4.46k
    if (curve_size) {
479
1.70k
        *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
480
1.70k
    }
481
482
4.46k
    curve->table_entries = 0;
483
4.46k
    curve->parametric.a  = 1.0f;
484
4.46k
    curve->parametric.b  = 0.0f;
485
4.46k
    curve->parametric.c  = 0.0f;
486
4.46k
    curve->parametric.d  = 0.0f;
487
4.46k
    curve->parametric.e  = 0.0f;
488
4.46k
    curve->parametric.f  = 0.0f;
489
4.46k
    curve->parametric.g  = read_big_fixed(paraTag->variable);
490
491
4.46k
    switch (function_type) {
492
147
        case kGAB:
493
147
            curve->parametric.a = read_big_fixed(paraTag->variable + 4);
494
147
            curve->parametric.b = read_big_fixed(paraTag->variable + 8);
495
147
            if (curve->parametric.a == 0) {
496
21
                return false;
497
21
            }
498
126
            curve->parametric.d = -curve->parametric.b / curve->parametric.a;
499
126
            break;
500
115
        case kGABC:
501
115
            curve->parametric.a = read_big_fixed(paraTag->variable + 4);
502
115
            curve->parametric.b = read_big_fixed(paraTag->variable + 8);
503
115
            curve->parametric.e = read_big_fixed(paraTag->variable + 12);
504
115
            if (curve->parametric.a == 0) {
505
3
                return false;
506
3
            }
507
112
            curve->parametric.d = -curve->parametric.b / curve->parametric.a;
508
112
            curve->parametric.f = curve->parametric.e;
509
112
            break;
510
2.05k
        case kGABCD:
511
2.05k
            curve->parametric.a = read_big_fixed(paraTag->variable + 4);
512
2.05k
            curve->parametric.b = read_big_fixed(paraTag->variable + 8);
513
2.05k
            curve->parametric.c = read_big_fixed(paraTag->variable + 12);
514
2.05k
            curve->parametric.d = read_big_fixed(paraTag->variable + 16);
515
2.05k
            break;
516
722
        case kGABCDEF:
517
722
            curve->parametric.a = read_big_fixed(paraTag->variable + 4);
518
722
            curve->parametric.b = read_big_fixed(paraTag->variable + 8);
519
722
            curve->parametric.c = read_big_fixed(paraTag->variable + 12);
520
722
            curve->parametric.d = read_big_fixed(paraTag->variable + 16);
521
722
            curve->parametric.e = read_big_fixed(paraTag->variable + 20);
522
722
            curve->parametric.f = read_big_fixed(paraTag->variable + 24);
523
722
            break;
524
4.46k
    }
525
4.44k
    return skcms_TransferFunction_isSRGBish(&curve->parametric);
526
4.46k
}
527
528
typedef struct {
529
    uint8_t type          [4];
530
    uint8_t reserved      [4];
531
    uint8_t value_count   [4];
532
    uint8_t variable      [1/*variable*/];  // value_count, 8.8 if 1, uint16 (n*65535) if > 1
533
} curv_Layout;
534
535
static bool read_curve_curv(const uint8_t* buf, uint32_t size,
536
3.64k
                            skcms_Curve* curve, uint32_t* curve_size) {
537
3.64k
    if (size < SAFE_FIXED_SIZE(curv_Layout)) {
538
184
        return false;
539
184
    }
540
541
3.46k
    const curv_Layout* curvTag = (const curv_Layout*)buf;
542
543
3.46k
    uint32_t value_count = read_big_u32(curvTag->value_count);
544
3.46k
    if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
545
205
        return false;
546
205
    }
547
548
3.25k
    if (curve_size) {
549
29
        *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
550
29
    }
551
552
3.25k
    if (value_count < 2) {
553
2.40k
        curve->table_entries = 0;
554
2.40k
        curve->parametric.a  = 1.0f;
555
2.40k
        curve->parametric.b  = 0.0f;
556
2.40k
        curve->parametric.c  = 0.0f;
557
2.40k
        curve->parametric.d  = 0.0f;
558
2.40k
        curve->parametric.e  = 0.0f;
559
2.40k
        curve->parametric.f  = 0.0f;
560
2.40k
        if (value_count == 0) {
561
            // Empty tables are a shorthand for an identity curve
562
1.66k
            curve->parametric.g = 1.0f;
563
1.66k
        } else {
564
            // Single entry tables are a shorthand for simple gamma
565
736
            curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
566
736
        }
567
2.40k
    } else {
568
853
        curve->table_8       = nullptr;
569
853
        curve->table_16      = curvTag->variable;
570
853
        curve->table_entries = value_count;
571
853
    }
572
573
3.25k
    return true;
574
3.46k
}
575
576
// Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
577
// If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
578
static bool read_curve(const uint8_t* buf, uint32_t size,
579
8.80k
                       skcms_Curve* curve, uint32_t* curve_size) {
580
8.80k
    if (!buf || size < 4 || !curve) {
581
21
        return false;
582
21
    }
583
584
8.78k
    uint32_t type = read_big_u32(buf);
585
8.78k
    if (type == skcms_Signature_para) {
586
4.81k
        return read_curve_para(buf, size, curve, curve_size);
587
4.81k
    } else if (type == skcms_Signature_curv) {
588
3.64k
        return read_curve_curv(buf, size, curve, curve_size);
589
3.64k
    }
590
591
326
    return false;
592
8.78k
}
593
594
// mft1 and mft2 share a large chunk of data
595
typedef struct {
596
    uint8_t type                 [ 4];
597
    uint8_t reserved_a           [ 4];
598
    uint8_t input_channels       [ 1];
599
    uint8_t output_channels      [ 1];
600
    uint8_t grid_points          [ 1];
601
    uint8_t reserved_b           [ 1];
602
    uint8_t matrix               [36];
603
} mft_CommonLayout;
604
605
typedef struct {
606
    mft_CommonLayout common      [1];
607
608
    uint8_t variable             [1/*variable*/];
609
} mft1_Layout;
610
611
typedef struct {
612
    mft_CommonLayout common      [1];
613
614
    uint8_t input_table_entries  [2];
615
    uint8_t output_table_entries [2];
616
    uint8_t variable             [1/*variable*/];
617
} mft2_Layout;
618
619
269
static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
620
    // MFT matrices are applied before the first set of curves, but must be identity unless the
621
    // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
622
    // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
623
    // field/flag.
624
269
    a2b->matrix_channels = 0;
625
269
    a2b-> input_channels = mftTag-> input_channels[0];
626
269
    a2b->output_channels = mftTag->output_channels[0];
627
628
    // We require exactly three (ie XYZ/Lab/RGB) output channels
629
269
    if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
630
6
        return false;
631
6
    }
632
    // We require at least one, and no more than four (ie CMYK) input channels
633
263
    if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
634
12
        return false;
635
12
    }
636
637
939
    for (uint32_t i = 0; i < a2b->input_channels; ++i) {
638
688
        a2b->grid_points[i] = mftTag->grid_points[0];
639
688
    }
640
    // The grid only makes sense with at least two points along each axis
641
251
    if (a2b->grid_points[0] < 2) {
642
5
        return false;
643
5
    }
644
246
    return true;
645
251
}
646
647
// All as the A2B version above, except where noted.
648
129
static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_B2A* b2a) {
649
    // Same as A2B.
650
129
    b2a->matrix_channels = 0;
651
129
    b2a-> input_channels = mftTag-> input_channels[0];
652
129
    b2a->output_channels = mftTag->output_channels[0];
653
654
655
    // For B2A, exactly 3 input channels (XYZ) and 3 (RGB) or 4 (CMYK) output channels.
656
129
    if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
657
6
        return false;
658
6
    }
659
123
    if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
660
9
        return false;
661
9
    }
662
663
    // Same as A2B.
664
456
    for (uint32_t i = 0; i < b2a->input_channels; ++i) {
665
342
        b2a->grid_points[i] = mftTag->grid_points[0];
666
342
    }
667
114
    if (b2a->grid_points[0] < 2) {
668
5
        return false;
669
5
    }
670
109
    return true;
671
114
}
672
673
template <typename A2B_or_B2A>
674
static bool init_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
675
                        uint32_t input_table_entries, uint32_t output_table_entries,
676
317
                        A2B_or_B2A* out) {
677
    // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
678
317
    uint32_t byte_len_per_input_table  = input_table_entries * byte_width;
679
317
    uint32_t byte_len_per_output_table = output_table_entries * byte_width;
680
681
    // [input|output]_channels are <= 4, so still no overflow
682
317
    uint32_t byte_len_all_input_tables  = out->input_channels * byte_len_per_input_table;
683
317
    uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
684
685
317
    uint64_t grid_size = out->output_channels * byte_width;
686
1.21k
    for (uint32_t axis = 0; axis < out->input_channels; ++axis) {
687
901
        grid_size *= out->grid_points[axis];
688
901
    }
689
690
317
    if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
691
89
        return false;
692
89
    }
693
694
859
    for (uint32_t i = 0; i < out->input_channels; ++i) {
695
631
        out->input_curves[i].table_entries = input_table_entries;
696
631
        if (byte_width == 1) {
697
314
            out->input_curves[i].table_8  = table_base + i * byte_len_per_input_table;
698
314
            out->input_curves[i].table_16 = nullptr;
699
317
        } else {
700
317
            out->input_curves[i].table_8  = nullptr;
701
317
            out->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
702
317
        }
703
631
    }
704
705
228
    if (byte_width == 1) {
706
110
        out->grid_8  = table_base + byte_len_all_input_tables;
707
110
        out->grid_16 = nullptr;
708
118
    } else {
709
118
        out->grid_8  = nullptr;
710
118
        out->grid_16 = table_base + byte_len_all_input_tables;
711
118
    }
712
713
228
    const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
714
939
    for (uint32_t i = 0; i < out->output_channels; ++i) {
715
711
        out->output_curves[i].table_entries = output_table_entries;
716
711
        if (byte_width == 1) {
717
333
            out->output_curves[i].table_8  = output_table_base + i * byte_len_per_output_table;
718
333
            out->output_curves[i].table_16 = nullptr;
719
378
        } else {
720
378
            out->output_curves[i].table_8  = nullptr;
721
378
            out->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
722
378
        }
723
711
    }
724
725
228
    return true;
726
317
}
skcms.cc:bool init_tables<skcms_A2B>(unsigned char const*, unsigned long, unsigned int, unsigned int, unsigned int, skcms_A2B*)
Line
Count
Source
676
221
                        A2B_or_B2A* out) {
677
    // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
678
221
    uint32_t byte_len_per_input_table  = input_table_entries * byte_width;
679
221
    uint32_t byte_len_per_output_table = output_table_entries * byte_width;
680
681
    // [input|output]_channels are <= 4, so still no overflow
682
221
    uint32_t byte_len_all_input_tables  = out->input_channels * byte_len_per_input_table;
683
221
    uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
684
685
221
    uint64_t grid_size = out->output_channels * byte_width;
686
834
    for (uint32_t axis = 0; axis < out->input_channels; ++axis) {
687
613
        grid_size *= out->grid_points[axis];
688
613
    }
689
690
221
    if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
691
64
        return false;
692
64
    }
693
694
575
    for (uint32_t i = 0; i < out->input_channels; ++i) {
695
418
        out->input_curves[i].table_entries = input_table_entries;
696
418
        if (byte_width == 1) {
697
212
            out->input_curves[i].table_8  = table_base + i * byte_len_per_input_table;
698
212
            out->input_curves[i].table_16 = nullptr;
699
212
        } else {
700
206
            out->input_curves[i].table_8  = nullptr;
701
206
            out->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
702
206
        }
703
418
    }
704
705
157
    if (byte_width == 1) {
706
76
        out->grid_8  = table_base + byte_len_all_input_tables;
707
76
        out->grid_16 = nullptr;
708
81
    } else {
709
81
        out->grid_8  = nullptr;
710
81
        out->grid_16 = table_base + byte_len_all_input_tables;
711
81
    }
712
713
157
    const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
714
628
    for (uint32_t i = 0; i < out->output_channels; ++i) {
715
471
        out->output_curves[i].table_entries = output_table_entries;
716
471
        if (byte_width == 1) {
717
228
            out->output_curves[i].table_8  = output_table_base + i * byte_len_per_output_table;
718
228
            out->output_curves[i].table_16 = nullptr;
719
243
        } else {
720
243
            out->output_curves[i].table_8  = nullptr;
721
243
            out->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
722
243
        }
723
471
    }
724
725
157
    return true;
726
221
}
skcms.cc:bool init_tables<skcms_B2A>(unsigned char const*, unsigned long, unsigned int, unsigned int, unsigned int, skcms_B2A*)
Line
Count
Source
676
96
                        A2B_or_B2A* out) {
677
    // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
678
96
    uint32_t byte_len_per_input_table  = input_table_entries * byte_width;
679
96
    uint32_t byte_len_per_output_table = output_table_entries * byte_width;
680
681
    // [input|output]_channels are <= 4, so still no overflow
682
96
    uint32_t byte_len_all_input_tables  = out->input_channels * byte_len_per_input_table;
683
96
    uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
684
685
96
    uint64_t grid_size = out->output_channels * byte_width;
686
384
    for (uint32_t axis = 0; axis < out->input_channels; ++axis) {
687
288
        grid_size *= out->grid_points[axis];
688
288
    }
689
690
96
    if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
691
25
        return false;
692
25
    }
693
694
284
    for (uint32_t i = 0; i < out->input_channels; ++i) {
695
213
        out->input_curves[i].table_entries = input_table_entries;
696
213
        if (byte_width == 1) {
697
102
            out->input_curves[i].table_8  = table_base + i * byte_len_per_input_table;
698
102
            out->input_curves[i].table_16 = nullptr;
699
111
        } else {
700
111
            out->input_curves[i].table_8  = nullptr;
701
111
            out->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
702
111
        }
703
213
    }
704
705
71
    if (byte_width == 1) {
706
34
        out->grid_8  = table_base + byte_len_all_input_tables;
707
34
        out->grid_16 = nullptr;
708
37
    } else {
709
37
        out->grid_8  = nullptr;
710
37
        out->grid_16 = table_base + byte_len_all_input_tables;
711
37
    }
712
713
71
    const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
714
311
    for (uint32_t i = 0; i < out->output_channels; ++i) {
715
240
        out->output_curves[i].table_entries = output_table_entries;
716
240
        if (byte_width == 1) {
717
105
            out->output_curves[i].table_8  = output_table_base + i * byte_len_per_output_table;
718
105
            out->output_curves[i].table_16 = nullptr;
719
135
        } else {
720
135
            out->output_curves[i].table_8  = nullptr;
721
135
            out->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
722
135
        }
723
240
    }
724
725
71
    return true;
726
96
}
727
728
template <typename A2B_or_B2A>
729
181
static bool read_tag_mft1(const skcms_ICCTag* tag, A2B_or_B2A* out) {
730
181
    if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
731
11
        return false;
732
11
    }
733
734
170
    const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
735
170
    if (!read_mft_common(mftTag->common, out)) {
736
20
        return false;
737
20
    }
738
739
150
    uint32_t input_table_entries  = 256;
740
150
    uint32_t output_table_entries = 256;
741
742
150
    return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
743
150
                       input_table_entries, output_table_entries, out);
744
170
}
skcms.cc:bool read_tag_mft1<skcms_A2B>(skcms_ICCTag const*, skcms_A2B*)
Line
Count
Source
729
121
static bool read_tag_mft1(const skcms_ICCTag* tag, A2B_or_B2A* out) {
730
121
    if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
731
6
        return false;
732
6
    }
733
734
115
    const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
735
115
    if (!read_mft_common(mftTag->common, out)) {
736
11
        return false;
737
11
    }
738
739
104
    uint32_t input_table_entries  = 256;
740
104
    uint32_t output_table_entries = 256;
741
742
104
    return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
743
104
                       input_table_entries, output_table_entries, out);
744
115
}
skcms.cc:bool read_tag_mft1<skcms_B2A>(skcms_ICCTag const*, skcms_B2A*)
Line
Count
Source
729
60
static bool read_tag_mft1(const skcms_ICCTag* tag, A2B_or_B2A* out) {
730
60
    if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
731
5
        return false;
732
5
    }
733
734
55
    const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
735
55
    if (!read_mft_common(mftTag->common, out)) {
736
9
        return false;
737
9
    }
738
739
46
    uint32_t input_table_entries  = 256;
740
46
    uint32_t output_table_entries = 256;
741
742
46
    return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
743
46
                       input_table_entries, output_table_entries, out);
744
55
}
745
746
template <typename A2B_or_B2A>
747
238
static bool read_tag_mft2(const skcms_ICCTag* tag, A2B_or_B2A* out) {
748
238
    if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
749
10
        return false;
750
10
    }
751
752
228
    const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
753
228
    if (!read_mft_common(mftTag->common, out)) {
754
23
        return false;
755
23
    }
756
757
205
    uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
758
205
    uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
759
760
    // ICC spec mandates that 2 <= table_entries <= 4096
761
205
    if (input_table_entries < 2 || input_table_entries > 4096 ||
762
205
        output_table_entries < 2 || output_table_entries > 4096) {
763
38
        return false;
764
38
    }
765
766
167
    return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
767
167
                       input_table_entries, output_table_entries, out);
768
205
}
skcms.cc:bool read_tag_mft2<skcms_A2B>(skcms_ICCTag const*, skcms_A2B*)
Line
Count
Source
747
159
static bool read_tag_mft2(const skcms_ICCTag* tag, A2B_or_B2A* out) {
748
159
    if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
749
5
        return false;
750
5
    }
751
752
154
    const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
753
154
    if (!read_mft_common(mftTag->common, out)) {
754
12
        return false;
755
12
    }
756
757
142
    uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
758
142
    uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
759
760
    // ICC spec mandates that 2 <= table_entries <= 4096
761
142
    if (input_table_entries < 2 || input_table_entries > 4096 ||
762
142
        output_table_entries < 2 || output_table_entries > 4096) {
763
25
        return false;
764
25
    }
765
766
117
    return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
767
117
                       input_table_entries, output_table_entries, out);
768
142
}
skcms.cc:bool read_tag_mft2<skcms_B2A>(skcms_ICCTag const*, skcms_B2A*)
Line
Count
Source
747
79
static bool read_tag_mft2(const skcms_ICCTag* tag, A2B_or_B2A* out) {
748
79
    if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
749
5
        return false;
750
5
    }
751
752
74
    const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
753
74
    if (!read_mft_common(mftTag->common, out)) {
754
11
        return false;
755
11
    }
756
757
63
    uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
758
63
    uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
759
760
    // ICC spec mandates that 2 <= table_entries <= 4096
761
63
    if (input_table_entries < 2 || input_table_entries > 4096 ||
762
63
        output_table_entries < 2 || output_table_entries > 4096) {
763
13
        return false;
764
13
    }
765
766
50
    return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
767
50
                       input_table_entries, output_table_entries, out);
768
63
}
769
770
static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
771
1.04k
                        uint32_t num_curves, skcms_Curve* curves) {
772
2.76k
    for (uint32_t i = 0; i < num_curves; ++i) {
773
2.10k
        if (curve_offset > size) {
774
284
            return false;
775
284
        }
776
777
1.82k
        uint32_t curve_bytes;
778
1.82k
        if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
779
98
            return false;
780
98
        }
781
782
1.72k
        if (curve_bytes > UINT32_MAX - 3) {
783
0
            return false;
784
0
        }
785
1.72k
        curve_bytes = (curve_bytes + 3) & ~3U;
786
787
1.72k
        uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
788
1.72k
        curve_offset = (uint32_t)new_offset_64;
789
1.72k
        if (new_offset_64 != curve_offset) {
790
0
            return false;
791
0
        }
792
1.72k
    }
793
794
659
    return true;
795
1.04k
}
796
797
// mAB and mBA tags use the same encoding, including color lookup tables.
798
typedef struct {
799
    uint8_t type                 [ 4];
800
    uint8_t reserved_a           [ 4];
801
    uint8_t input_channels       [ 1];
802
    uint8_t output_channels      [ 1];
803
    uint8_t reserved_b           [ 2];
804
    uint8_t b_curve_offset       [ 4];
805
    uint8_t matrix_offset        [ 4];
806
    uint8_t m_curve_offset       [ 4];
807
    uint8_t clut_offset          [ 4];
808
    uint8_t a_curve_offset       [ 4];
809
} mAB_or_mBA_Layout;
810
811
typedef struct {
812
    uint8_t grid_points          [16];
813
    uint8_t grid_byte_width      [ 1];
814
    uint8_t reserved             [ 3];
815
    uint8_t variable             [1/*variable*/];
816
} CLUT_Layout;
817
818
535
static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
819
535
    if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
820
6
        return false;
821
6
    }
822
823
529
    const mAB_or_mBA_Layout* mABTag = (const mAB_or_mBA_Layout*)tag->buf;
824
825
529
    a2b->input_channels  = mABTag->input_channels[0];
826
529
    a2b->output_channels = mABTag->output_channels[0];
827
828
    // We require exactly three (ie XYZ/Lab/RGB) output channels
829
529
    if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
830
30
        return false;
831
30
    }
832
    // We require no more than four (ie CMYK) input channels
833
499
    if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
834
5
        return false;
835
5
    }
836
837
494
    uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
838
494
    uint32_t matrix_offset  = read_big_u32(mABTag->matrix_offset);
839
494
    uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
840
494
    uint32_t clut_offset    = read_big_u32(mABTag->clut_offset);
841
494
    uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
842
843
    // "B" curves must be present
844
494
    if (0 == b_curve_offset) {
845
9
        return false;
846
9
    }
847
848
485
    if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
849
485
                     a2b->output_curves)) {
850
165
        return false;
851
165
    }
852
853
    // "M" curves and Matrix must be used together
854
320
    if (0 != m_curve_offset) {
855
285
        if (0 == matrix_offset) {
856
8
            return false;
857
8
        }
858
277
        a2b->matrix_channels = a2b->output_channels;
859
277
        if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
860
277
                         a2b->matrix_curves)) {
861
94
            return false;
862
94
        }
863
864
        // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
865
183
        if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
866
8
            return false;
867
8
        }
868
175
        float encoding_factor = pcs_is_xyz ? (65535 / 32768.0f) : 1.0f;
869
175
        const uint8_t* mtx_buf = tag->buf + matrix_offset;
870
175
        a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf +  0);
871
175
        a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf +  4);
872
175
        a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf +  8);
873
175
        a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
874
175
        a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
875
175
        a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
876
175
        a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
877
175
        a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
878
175
        a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
879
175
        a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
880
175
        a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
881
175
        a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
882
175
    } else {
883
35
        if (0 != matrix_offset) {
884
7
            return false;
885
7
        }
886
28
        a2b->matrix_channels = 0;
887
28
    }
888
889
    // "A" curves and CLUT must be used together
890
203
    if (0 != a_curve_offset) {
891
197
        if (0 == clut_offset) {
892
3
            return false;
893
3
        }
894
194
        if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
895
194
                         a2b->input_curves)) {
896
38
            return false;
897
38
        }
898
899
156
        if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
900
41
            return false;
901
41
        }
902
115
        const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
903
904
115
        if (clut->grid_byte_width[0] == 1) {
905
69
            a2b->grid_8  = clut->variable;
906
69
            a2b->grid_16 = nullptr;
907
69
        } else if (clut->grid_byte_width[0] == 2) {
908
42
            a2b->grid_8  = nullptr;
909
42
            a2b->grid_16 = clut->variable;
910
42
        } else {
911
4
            return false;
912
4
        }
913
914
111
        uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];  // the payload
915
260
        for (uint32_t i = 0; i < a2b->input_channels; ++i) {
916
155
            a2b->grid_points[i] = clut->grid_points[i];
917
            // The grid only makes sense with at least two points along each axis
918
155
            if (a2b->grid_points[i] < 2) {
919
6
                return false;
920
6
            }
921
149
            grid_size *= a2b->grid_points[i];
922
149
        }
923
105
        if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
924
13
            return false;
925
13
        }
926
105
    } else {
927
6
        if (0 != clut_offset) {
928
3
            return false;
929
3
        }
930
931
        // If there is no CLUT, the number of input and output channels must match
932
3
        if (a2b->input_channels != a2b->output_channels) {
933
2
            return false;
934
2
        }
935
936
        // Zero out the number of input channels to signal that we're skipping this stage
937
1
        a2b->input_channels = 0;
938
1
    }
939
940
93
    return true;
941
203
}
942
943
// Exactly the same as read_tag_mab(), except where there are comments.
944
// TODO: refactor the two to eliminate common code?
945
129
static bool read_tag_mba(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
946
129
    if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
947
7
        return false;
948
7
    }
949
950
122
    const mAB_or_mBA_Layout* mBATag = (const mAB_or_mBA_Layout*)tag->buf;
951
952
122
    b2a->input_channels  = mBATag->input_channels[0];
953
122
    b2a->output_channels = mBATag->output_channels[0];
954
955
    // Require exactly 3 inputs (XYZ) and 3 (RGB) or 4 (CMYK) outputs.
956
122
    if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
957
26
        return false;
958
26
    }
959
96
    if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
960
7
        return false;
961
7
    }
962
963
89
    uint32_t b_curve_offset = read_big_u32(mBATag->b_curve_offset);
964
89
    uint32_t matrix_offset  = read_big_u32(mBATag->matrix_offset);
965
89
    uint32_t m_curve_offset = read_big_u32(mBATag->m_curve_offset);
966
89
    uint32_t clut_offset    = read_big_u32(mBATag->clut_offset);
967
89
    uint32_t a_curve_offset = read_big_u32(mBATag->a_curve_offset);
968
969
89
    if (0 == b_curve_offset) {
970
4
        return false;
971
4
    }
972
973
    // "B" curves are our inputs, not outputs.
974
85
    if (!read_curves(tag->buf, tag->size, b_curve_offset, b2a->input_channels,
975
85
                     b2a->input_curves)) {
976
85
        return false;
977
85
    }
978
979
0
    if (0 != m_curve_offset) {
980
0
        if (0 == matrix_offset) {
981
0
            return false;
982
0
        }
983
        // Matrix channels is tied to input_channels (3), not output_channels.
984
0
        b2a->matrix_channels = b2a->input_channels;
985
986
0
        if (!read_curves(tag->buf, tag->size, m_curve_offset, b2a->matrix_channels,
987
0
                         b2a->matrix_curves)) {
988
0
            return false;
989
0
        }
990
991
0
        if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
992
0
            return false;
993
0
        }
994
0
        float encoding_factor = pcs_is_xyz ? (32768 / 65535.0f) : 1.0f;  // TODO: understand
995
0
        const uint8_t* mtx_buf = tag->buf + matrix_offset;
996
0
        b2a->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf +  0);
997
0
        b2a->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf +  4);
998
0
        b2a->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf +  8);
999
0
        b2a->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
1000
0
        b2a->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
1001
0
        b2a->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
1002
0
        b2a->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
1003
0
        b2a->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
1004
0
        b2a->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
1005
0
        b2a->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
1006
0
        b2a->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
1007
0
        b2a->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
1008
0
    } else {
1009
0
        if (0 != matrix_offset) {
1010
0
            return false;
1011
0
        }
1012
0
        b2a->matrix_channels = 0;
1013
0
    }
1014
1015
0
    if (0 != a_curve_offset) {
1016
0
        if (0 == clut_offset) {
1017
0
            return false;
1018
0
        }
1019
1020
        // "A" curves are our output, not input.
1021
0
        if (!read_curves(tag->buf, tag->size, a_curve_offset, b2a->output_channels,
1022
0
                         b2a->output_curves)) {
1023
0
            return false;
1024
0
        }
1025
1026
0
        if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
1027
0
            return false;
1028
0
        }
1029
0
        const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
1030
1031
0
        if (clut->grid_byte_width[0] == 1) {
1032
0
            b2a->grid_8  = clut->variable;
1033
0
            b2a->grid_16 = nullptr;
1034
0
        } else if (clut->grid_byte_width[0] == 2) {
1035
0
            b2a->grid_8  = nullptr;
1036
0
            b2a->grid_16 = clut->variable;
1037
0
        } else {
1038
0
            return false;
1039
0
        }
1040
1041
0
        uint64_t grid_size = b2a->output_channels * clut->grid_byte_width[0];
1042
0
        for (uint32_t i = 0; i < b2a->input_channels; ++i) {
1043
0
            b2a->grid_points[i] = clut->grid_points[i];
1044
0
            if (b2a->grid_points[i] < 2) {
1045
0
                return false;
1046
0
            }
1047
0
            grid_size *= b2a->grid_points[i];
1048
0
        }
1049
0
        if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
1050
0
            return false;
1051
0
        }
1052
0
    } else {
1053
0
        if (0 != clut_offset) {
1054
0
            return false;
1055
0
        }
1056
1057
0
        if (b2a->input_channels != b2a->output_channels) {
1058
0
            return false;
1059
0
        }
1060
1061
        // Zero out *output* channels to skip this stage.
1062
0
        b2a->output_channels = 0;
1063
0
    }
1064
0
    return true;
1065
0
}
1066
1067
// If you pass f, we'll fit a possibly-non-zero value for *f.
1068
// If you pass nullptr, we'll assume you want *f to be treated as zero.
1069
static int fit_linear(const skcms_Curve* curve, int N, float tol,
1070
1.35k
                      float* c, float* d, float* f = nullptr) {
1071
1.35k
    assert(N > 1);
1072
    // We iteratively fit the first points to the TF's linear piece.
1073
    // We want the cx + f line to pass through the first and last points we fit exactly.
1074
    //
1075
    // As we walk along the points we find the minimum and maximum slope of the line before the
1076
    // error would exceed our tolerance.  We stop when the range [slope_min, slope_max] becomes
1077
    // emtpy, when we definitely can't add any more points.
1078
    //
1079
    // Some points' error intervals may intersect the running interval but not lie fully
1080
    // within it.  So we keep track of the last point we saw that is a valid end point candidate,
1081
    // and once the search is done, back up to build the line through *that* point.
1082
1.35k
    const float dx = 1.0f / static_cast<float>(N - 1);
1083
1084
1.35k
    int lin_points = 1;
1085
1086
1.35k
    float f_zero = 0.0f;
1087
1.35k
    if (f) {
1088
1.35k
        *f = eval_curve(curve, 0);
1089
1.35k
    } else {
1090
0
        f = &f_zero;
1091
0
    }
1092
1093
1094
1.35k
    float slope_min = -INFINITY_;
1095
1.35k
    float slope_max = +INFINITY_;
1096
51.1k
    for (int i = 1; i < N; ++i) {
1097
50.7k
        float x = static_cast<float>(i) * dx;
1098
50.7k
        float y = eval_curve(curve, x);
1099
1100
50.7k
        float slope_max_i = (y + tol - *f) / x,
1101
50.7k
              slope_min_i = (y - tol - *f) / x;
1102
50.7k
        if (slope_max_i < slope_min || slope_max < slope_min_i) {
1103
            // Slope intervals would no longer overlap.
1104
1.03k
            break;
1105
1.03k
        }
1106
49.7k
        slope_max = fminf_(slope_max, slope_max_i);
1107
49.7k
        slope_min = fmaxf_(slope_min, slope_min_i);
1108
1109
49.7k
        float cur_slope = (y - *f) / x;
1110
49.7k
        if (slope_min <= cur_slope && cur_slope <= slope_max) {
1111
47.5k
            lin_points = i + 1;
1112
47.5k
            *c = cur_slope;
1113
47.5k
        }
1114
49.7k
    }
1115
1116
    // Set D to the last point that met our tolerance.
1117
1.35k
    *d = static_cast<float>(lin_points - 1) * dx;
1118
1.35k
    return lin_points;
1119
1.35k
}
1120
1121
// If this skcms_Curve holds an identity table, rewrite it as an identity skcms_TransferFunction.
1122
1.95k
static void canonicalize_identity(skcms_Curve* curve) {
1123
1.95k
    if (curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
1124
1.35k
        int N = (int)curve->table_entries;
1125
1126
1.35k
        float c = 0.0f, d = 0.0f, f = 0.0f;
1127
1.35k
        if (N == fit_linear(curve, N, 1.0f/static_cast<float>(2*N), &c,&d,&f)
1128
1.35k
            && c == 1.0f
1129
1.35k
            && f == 0.0f) {
1130
39
            curve->table_entries = 0;
1131
39
            curve->table_8       = nullptr;
1132
39
            curve->table_16      = nullptr;
1133
39
            curve->parametric    = skcms_TransferFunction{1,1,0,0,0,0,0};
1134
39
        }
1135
1.35k
    }
1136
1.95k
}
1137
1138
986
static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
1139
986
    bool ok = false;
1140
986
    if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, a2b); }
1141
986
    if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, a2b); }
1142
986
    if (tag->type == skcms_Signature_mAB ) { ok = read_tag_mab(tag, a2b, pcs_is_xyz); }
1143
986
    if (!ok) {
1144
736
        return false;
1145
736
    }
1146
1147
250
    if (a2b->input_channels > 0) { canonicalize_identity(a2b->input_curves + 0); }
1148
250
    if (a2b->input_channels > 1) { canonicalize_identity(a2b->input_curves + 1); }
1149
250
    if (a2b->input_channels > 2) { canonicalize_identity(a2b->input_curves + 2); }
1150
250
    if (a2b->input_channels > 3) { canonicalize_identity(a2b->input_curves + 3); }
1151
1152
250
    if (a2b->matrix_channels > 0) { canonicalize_identity(a2b->matrix_curves + 0); }
1153
250
    if (a2b->matrix_channels > 1) { canonicalize_identity(a2b->matrix_curves + 1); }
1154
250
    if (a2b->matrix_channels > 2) { canonicalize_identity(a2b->matrix_curves + 2); }
1155
1156
250
    if (a2b->output_channels > 0) { canonicalize_identity(a2b->output_curves + 0); }
1157
250
    if (a2b->output_channels > 1) { canonicalize_identity(a2b->output_curves + 1); }
1158
250
    if (a2b->output_channels > 2) { canonicalize_identity(a2b->output_curves + 2); }
1159
1160
250
    return true;
1161
986
}
1162
1163
404
static bool read_b2a(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
1164
404
    bool ok = false;
1165
404
    if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, b2a); }
1166
404
    if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, b2a); }
1167
404
    if (tag->type == skcms_Signature_mBA ) { ok = read_tag_mba(tag, b2a, pcs_is_xyz); }
1168
404
    if (!ok) {
1169
333
        return false;
1170
333
    }
1171
1172
71
    if (b2a->input_channels > 0) { canonicalize_identity(b2a->input_curves + 0); }
1173
71
    if (b2a->input_channels > 1) { canonicalize_identity(b2a->input_curves + 1); }
1174
71
    if (b2a->input_channels > 2) { canonicalize_identity(b2a->input_curves + 2); }
1175
1176
71
    if (b2a->matrix_channels > 0) { canonicalize_identity(b2a->matrix_curves + 0); }
1177
71
    if (b2a->matrix_channels > 1) { canonicalize_identity(b2a->matrix_curves + 1); }
1178
71
    if (b2a->matrix_channels > 2) { canonicalize_identity(b2a->matrix_curves + 2); }
1179
1180
71
    if (b2a->output_channels > 0) { canonicalize_identity(b2a->output_curves + 0); }
1181
71
    if (b2a->output_channels > 1) { canonicalize_identity(b2a->output_curves + 1); }
1182
71
    if (b2a->output_channels > 2) { canonicalize_identity(b2a->output_curves + 2); }
1183
71
    if (b2a->output_channels > 3) { canonicalize_identity(b2a->output_curves + 3); }
1184
1185
71
    return true;
1186
404
}
1187
1188
typedef struct {
1189
    uint8_t type                     [4];
1190
    uint8_t reserved                 [4];
1191
    uint8_t color_primaries          [1];
1192
    uint8_t transfer_characteristics [1];
1193
    uint8_t matrix_coefficients      [1];
1194
    uint8_t video_full_range_flag    [1];
1195
} CICP_Layout;
1196
1197
47
static bool read_cicp(const skcms_ICCTag* tag, skcms_CICP* cicp) {
1198
47
    if (tag->type != skcms_Signature_CICP || tag->size < SAFE_SIZEOF(CICP_Layout)) {
1199
44
        return false;
1200
44
    }
1201
1202
3
    const CICP_Layout* cicpTag = (const CICP_Layout*)tag->buf;
1203
1204
3
    cicp->color_primaries          = cicpTag->color_primaries[0];
1205
3
    cicp->transfer_characteristics = cicpTag->transfer_characteristics[0];
1206
3
    cicp->matrix_coefficients      = cicpTag->matrix_coefficients[0];
1207
3
    cicp->video_full_range_flag    = cicpTag->video_full_range_flag[0];
1208
3
    return true;
1209
47
}
1210
1211
0
void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
1212
0
    if (!profile || !profile->buffer || !tag) { return; }
1213
0
    if (idx > profile->tag_count) { return; }
1214
0
    const tag_Layout* tags = get_tag_table(profile);
1215
0
    tag->signature = read_big_u32(tags[idx].signature);
1216
0
    tag->size      = read_big_u32(tags[idx].size);
1217
0
    tag->buf       = read_big_u32(tags[idx].offset) + profile->buffer;
1218
0
    tag->type      = read_big_u32(tag->buf);
1219
0
}
1220
1221
41.3k
bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
1222
41.3k
    if (!profile || !profile->buffer || !tag) { return false; }
1223
41.3k
    const tag_Layout* tags = get_tag_table(profile);
1224
331k
    for (uint32_t i = 0; i < profile->tag_count; ++i) {
1225
311k
        if (read_big_u32(tags[i].signature) == sig) {
1226
21.2k
            tag->signature = sig;
1227
21.2k
            tag->size      = read_big_u32(tags[i].size);
1228
21.2k
            tag->buf       = read_big_u32(tags[i].offset) + profile->buffer;
1229
21.2k
            tag->type      = read_big_u32(tag->buf);
1230
21.2k
            return true;
1231
21.2k
        }
1232
311k
    }
1233
20.1k
    return false;
1234
41.3k
}
1235
1236
2.78k
static bool usable_as_src(const skcms_ICCProfile* profile) {
1237
2.78k
    return profile->has_A2B
1238
2.78k
       || (profile->has_trc && profile->has_toXYZD50);
1239
2.78k
}
1240
1241
bool skcms_ParseWithA2BPriority(const void* buf, size_t len,
1242
                                const int priority[], const int priorities,
1243
8.88k
                                skcms_ICCProfile* profile) {
1244
8.88k
    static_assert(SAFE_SIZEOF(header_Layout) == 132, "need to update header code");
1245
1246
8.88k
    if (!profile) {
1247
0
        return false;
1248
0
    }
1249
8.88k
    memset(profile, 0, SAFE_SIZEOF(*profile));
1250
1251
8.88k
    if (len < SAFE_SIZEOF(header_Layout)) {
1252
152
        return false;
1253
152
    }
1254
1255
    // Byte-swap all header fields
1256
8.72k
    const header_Layout* header  = (const header_Layout*)buf;
1257
8.72k
    profile->buffer              = (const uint8_t*)buf;
1258
8.72k
    profile->size                = read_big_u32(header->size);
1259
8.72k
    uint32_t version             = read_big_u32(header->version);
1260
8.72k
    profile->data_color_space    = read_big_u32(header->data_color_space);
1261
8.72k
    profile->pcs                 = read_big_u32(header->pcs);
1262
8.72k
    uint32_t signature           = read_big_u32(header->signature);
1263
8.72k
    float illuminant_X           = read_big_fixed(header->illuminant_X);
1264
8.72k
    float illuminant_Y           = read_big_fixed(header->illuminant_Y);
1265
8.72k
    float illuminant_Z           = read_big_fixed(header->illuminant_Z);
1266
8.72k
    profile->tag_count           = read_big_u32(header->tag_count);
1267
1268
    // Validate signature, size (smaller than buffer, large enough to hold tag table),
1269
    // and major version
1270
8.72k
    uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1271
8.72k
    if (signature != skcms_Signature_acsp ||
1272
8.72k
        profile->size > len ||
1273
8.72k
        profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1274
8.72k
        (version >> 24) > 4) {
1275
680
        return false;
1276
680
    }
1277
1278
    // Validate that illuminant is D50 white
1279
8.04k
    if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1280
8.04k
        fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1281
8.04k
        fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1282
768
        return false;
1283
768
    }
1284
1285
    // Validate that all tag entries have sane offset + size
1286
7.28k
    const tag_Layout* tags = get_tag_table(profile);
1287
72.1k
    for (uint32_t i = 0; i < profile->tag_count; ++i) {
1288
65.2k
        uint32_t tag_offset = read_big_u32(tags[i].offset);
1289
65.2k
        uint32_t tag_size   = read_big_u32(tags[i].size);
1290
65.2k
        uint64_t tag_end    = (uint64_t)tag_offset + (uint64_t)tag_size;
1291
65.2k
        if (tag_size < 4 || tag_end > profile->size) {
1292
429
            return false;
1293
429
        }
1294
65.2k
    }
1295
1296
6.85k
    if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1297
138
        return false;
1298
138
    }
1299
1300
6.71k
    bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1301
1302
    // Pre-parse commonly used tags.
1303
6.71k
    skcms_ICCTag kTRC;
1304
6.71k
    if (profile->data_color_space == skcms_Signature_Gray &&
1305
6.71k
        skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1306
437
        if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1307
            // Malformed tag
1308
117
            return false;
1309
117
        }
1310
320
        profile->trc[1] = profile->trc[0];
1311
320
        profile->trc[2] = profile->trc[0];
1312
320
        profile->has_trc = true;
1313
1314
320
        if (pcs_is_xyz) {
1315
320
            profile->toXYZD50.vals[0][0] = illuminant_X;
1316
320
            profile->toXYZD50.vals[1][1] = illuminant_Y;
1317
320
            profile->toXYZD50.vals[2][2] = illuminant_Z;
1318
320
            profile->has_toXYZD50 = true;
1319
320
        }
1320
6.27k
    } else {
1321
6.27k
        skcms_ICCTag rTRC, gTRC, bTRC;
1322
6.27k
        if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1323
6.27k
            skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1324
6.27k
            skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1325
3.20k
            if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1326
3.20k
                !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1327
3.20k
                !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1328
                // Malformed TRC tags
1329
1.55k
                return false;
1330
1.55k
            }
1331
1.65k
            profile->has_trc = true;
1332
1.65k
        }
1333
1334
4.72k
        skcms_ICCTag rXYZ, gXYZ, bXYZ;
1335
4.72k
        if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1336
4.72k
            skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1337
4.72k
            skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1338
2.41k
            if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1339
                // Malformed XYZ tags
1340
1.14k
                return false;
1341
1.14k
            }
1342
1.27k
            profile->has_toXYZD50 = true;
1343
1.27k
        }
1344
4.72k
    }
1345
1346
9.92k
    for (int i = 0; i < priorities; i++) {
1347
        // enum { perceptual, relative_colormetric, saturation }
1348
7.01k
        if (priority[i] < 0 || priority[i] > 2) {
1349
0
            return false;
1350
0
        }
1351
7.01k
        uint32_t sig = skcms_Signature_A2B0 + static_cast<uint32_t>(priority[i]);
1352
7.01k
        skcms_ICCTag tag;
1353
7.01k
        if (skcms_GetTagBySignature(profile, sig, &tag)) {
1354
986
            if (!read_a2b(&tag, &profile->A2B, pcs_is_xyz)) {
1355
                // Malformed A2B tag
1356
736
                return false;
1357
736
            }
1358
250
            profile->has_A2B = true;
1359
250
            break;
1360
986
        }
1361
7.01k
    }
1362
1363
8.88k
    for (int i = 0; i < priorities; i++) {
1364
        // enum { perceptual, relative_colormetric, saturation }
1365
6.12k
        if (priority[i] < 0 || priority[i] > 2) {
1366
0
            return false;
1367
0
        }
1368
6.12k
        uint32_t sig = skcms_Signature_B2A0 + static_cast<uint32_t>(priority[i]);
1369
6.12k
        skcms_ICCTag tag;
1370
6.12k
        if (skcms_GetTagBySignature(profile, sig, &tag)) {
1371
404
            if (!read_b2a(&tag, &profile->B2A, pcs_is_xyz)) {
1372
                // Malformed B2A tag
1373
333
                return false;
1374
333
            }
1375
71
            profile->has_B2A = true;
1376
71
            break;
1377
404
        }
1378
6.12k
    }
1379
1380
2.82k
    skcms_ICCTag cicp_tag;
1381
2.82k
    if (skcms_GetTagBySignature(profile, skcms_Signature_CICP, &cicp_tag)) {
1382
47
        if (!read_cicp(&cicp_tag, &profile->CICP)) {
1383
            // Malformed CICP tag
1384
44
            return false;
1385
44
        }
1386
3
        profile->has_CICP = true;
1387
3
    }
1388
1389
2.78k
    return usable_as_src(profile);
1390
2.82k
}
1391
1392
1393
1.15M
const skcms_ICCProfile* skcms_sRGB_profile() {
1394
1.15M
    static const skcms_ICCProfile sRGB_profile = {
1395
1.15M
        nullptr,               // buffer, moot here
1396
1397
1.15M
        0,                     // size, moot here
1398
1.15M
        skcms_Signature_RGB,   // data_color_space
1399
1.15M
        skcms_Signature_XYZ,   // pcs
1400
1.15M
        0,                     // tag count, moot here
1401
1402
        // We choose to represent sRGB with its canonical transfer function,
1403
        // and with its canonical XYZD50 gamut matrix.
1404
1.15M
        true,  // has_trc, followed by the 3 trc curves
1405
1.15M
        {
1406
1.15M
            {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1407
1.15M
            {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1408
1.15M
            {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1409
1.15M
        },
1410
1411
1.15M
        true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1412
1.15M
        {{
1413
1.15M
            { 0.436065674f, 0.385147095f, 0.143066406f },
1414
1.15M
            { 0.222488403f, 0.716873169f, 0.060607910f },
1415
1.15M
            { 0.013916016f, 0.097076416f, 0.714096069f },
1416
1.15M
        }},
1417
1418
1.15M
        false, // has_A2B, followed by A2B itself, which we don't care about.
1419
1.15M
        {
1420
1.15M
            0,
1421
1.15M
            {
1422
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1423
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1424
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1425
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1426
1.15M
            },
1427
1.15M
            {0,0,0,0},
1428
1.15M
            nullptr,
1429
1.15M
            nullptr,
1430
1431
1.15M
            0,
1432
1.15M
            {
1433
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1434
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1435
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1436
1.15M
            },
1437
1.15M
            {{
1438
1.15M
                { 0,0,0,0 },
1439
1.15M
                { 0,0,0,0 },
1440
1.15M
                { 0,0,0,0 },
1441
1.15M
            }},
1442
1443
1.15M
            0,
1444
1.15M
            {
1445
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1446
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1447
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1448
1.15M
            },
1449
1.15M
        },
1450
1451
1.15M
        false, // has_B2A, followed by B2A itself, which we also don't care about.
1452
1.15M
        {
1453
1.15M
            0,
1454
1.15M
            {
1455
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1456
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1457
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1458
1.15M
            },
1459
1460
1.15M
            0,
1461
1.15M
            {{
1462
1.15M
                { 0,0,0,0 },
1463
1.15M
                { 0,0,0,0 },
1464
1.15M
                { 0,0,0,0 },
1465
1.15M
            }},
1466
1.15M
            {
1467
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1468
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1469
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1470
1.15M
            },
1471
1472
1.15M
            0,
1473
1.15M
            {0,0,0,0},
1474
1.15M
            nullptr,
1475
1.15M
            nullptr,
1476
1.15M
            {
1477
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1478
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1479
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1480
1.15M
                {{0, {0,0, 0,0,0,0,0}}},
1481
1.15M
            },
1482
1.15M
        },
1483
1484
1.15M
        false, // has_CICP, followed by cicp itself which we don't care about.
1485
1.15M
        { 0, 0, 0, 0 },
1486
1.15M
    };
1487
1.15M
    return &sRGB_profile;
1488
1.15M
}
1489
1490
108k
const skcms_ICCProfile* skcms_XYZD50_profile() {
1491
    // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1492
108k
    static const skcms_ICCProfile XYZD50_profile = {
1493
108k
        nullptr,               // buffer, moot here
1494
1495
108k
        0,                     // size, moot here
1496
108k
        skcms_Signature_RGB,   // data_color_space
1497
108k
        skcms_Signature_XYZ,   // pcs
1498
108k
        0,                     // tag count, moot here
1499
1500
108k
        true,  // has_trc, followed by the 3 trc curves
1501
108k
        {
1502
108k
            {{0, {1,1, 0,0,0,0,0}}},
1503
108k
            {{0, {1,1, 0,0,0,0,0}}},
1504
108k
            {{0, {1,1, 0,0,0,0,0}}},
1505
108k
        },
1506
1507
108k
        true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1508
108k
        {{
1509
108k
            { 1,0,0 },
1510
108k
            { 0,1,0 },
1511
108k
            { 0,0,1 },
1512
108k
        }},
1513
1514
108k
        false, // has_A2B, followed by A2B itself, which we don't care about.
1515
108k
        {
1516
108k
            0,
1517
108k
            {
1518
108k
                {{0, {0,0, 0,0,0,0,0}}},
1519
108k
                {{0, {0,0, 0,0,0,0,0}}},
1520
108k
                {{0, {0,0, 0,0,0,0,0}}},
1521
108k
                {{0, {0,0, 0,0,0,0,0}}},
1522
108k
            },
1523
108k
            {0,0,0,0},
1524
108k
            nullptr,
1525
108k
            nullptr,
1526
1527
108k
            0,
1528
108k
            {
1529
108k
                {{0, {0,0, 0,0,0,0,0}}},
1530
108k
                {{0, {0,0, 0,0,0,0,0}}},
1531
108k
                {{0, {0,0, 0,0,0,0,0}}},
1532
108k
            },
1533
108k
            {{
1534
108k
                { 0,0,0,0 },
1535
108k
                { 0,0,0,0 },
1536
108k
                { 0,0,0,0 },
1537
108k
            }},
1538
1539
108k
            0,
1540
108k
            {
1541
108k
                {{0, {0,0, 0,0,0,0,0}}},
1542
108k
                {{0, {0,0, 0,0,0,0,0}}},
1543
108k
                {{0, {0,0, 0,0,0,0,0}}},
1544
108k
            },
1545
108k
        },
1546
1547
108k
        false, // has_B2A, followed by B2A itself, which we also don't care about.
1548
108k
        {
1549
108k
            0,
1550
108k
            {
1551
108k
                {{0, {0,0, 0,0,0,0,0}}},
1552
108k
                {{0, {0,0, 0,0,0,0,0}}},
1553
108k
                {{0, {0,0, 0,0,0,0,0}}},
1554
108k
            },
1555
1556
108k
            0,
1557
108k
            {{
1558
108k
                { 0,0,0,0 },
1559
108k
                { 0,0,0,0 },
1560
108k
                { 0,0,0,0 },
1561
108k
            }},
1562
108k
            {
1563
108k
                {{0, {0,0, 0,0,0,0,0}}},
1564
108k
                {{0, {0,0, 0,0,0,0,0}}},
1565
108k
                {{0, {0,0, 0,0,0,0,0}}},
1566
108k
            },
1567
1568
108k
            0,
1569
108k
            {0,0,0,0},
1570
108k
            nullptr,
1571
108k
            nullptr,
1572
108k
            {
1573
108k
                {{0, {0,0, 0,0,0,0,0}}},
1574
108k
                {{0, {0,0, 0,0,0,0,0}}},
1575
108k
                {{0, {0,0, 0,0,0,0,0}}},
1576
108k
                {{0, {0,0, 0,0,0,0,0}}},
1577
108k
            },
1578
108k
        },
1579
1580
108k
        false, // has_CICP, followed by cicp itself which we don't care about.
1581
108k
        { 0, 0, 0, 0 },
1582
108k
    };
1583
1584
108k
    return &XYZD50_profile;
1585
108k
}
1586
1587
13.6k
const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1588
13.6k
    return &skcms_sRGB_profile()->trc[0].parametric;
1589
13.6k
}
1590
1591
664
const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1592
664
    static const skcms_TransferFunction sRGB_inv =
1593
664
        {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1594
664
    return &sRGB_inv;
1595
664
}
1596
1597
0
const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1598
0
    static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1599
0
    return &identity;
1600
0
}
1601
1602
const uint8_t skcms_252_random_bytes[] = {
1603
    8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1604
    119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1605
    154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1606
    194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1607
    108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1608
    70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1609
    137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1610
    9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1611
    129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1612
    140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1613
    219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1614
    123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1615
    189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1616
    174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1617
    2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1618
    112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1619
};
1620
1621
94.7k
bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1622
    // Test for exactly equal profiles first.
1623
94.7k
    if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1624
90.7k
        return true;
1625
90.7k
    }
1626
1627
    // For now this is the essentially the same strategy we use in test_only.c
1628
    // for our skcms_Transform() smoke tests:
1629
    //    1) transform A to XYZD50
1630
    //    2) transform B to XYZD50
1631
    //    3) return true if they're similar enough
1632
    // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1633
1634
    // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1635
    // 252 is evenly divisible by 3 and 4.  Only 192, 10, 241, and 43 are missing.
1636
1637
    // We want to allow otherwise equivalent profiles tagged as grayscale and RGB
1638
    // to be treated as equal.  But CMYK profiles are a totally different ballgame.
1639
3.96k
    const auto CMYK = skcms_Signature_CMYK;
1640
3.96k
    if ((A->data_color_space == CMYK) != (B->data_color_space == CMYK)) {
1641
0
        return false;
1642
0
    }
1643
1644
    // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1645
    // TODO: working with RGBA_8888 either way is probably fastest.
1646
3.96k
    skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1647
3.96k
    size_t npixels = 84;
1648
3.96k
    if (A->data_color_space == skcms_Signature_CMYK) {
1649
0
        fmt = skcms_PixelFormat_RGBA_8888;
1650
0
        npixels = 63;
1651
0
    }
1652
1653
    // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1654
    // use pre-canned results and skip that skcms_Transform() call?
1655
3.96k
    uint8_t dstA[252],
1656
3.96k
            dstB[252];
1657
3.96k
    if (!skcms_Transform(
1658
3.96k
                skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, A,
1659
3.96k
                dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1660
3.96k
                npixels)) {
1661
0
        return false;
1662
0
    }
1663
3.96k
    if (!skcms_Transform(
1664
3.96k
                skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, B,
1665
3.96k
                dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1666
3.96k
                npixels)) {
1667
0
        return false;
1668
0
    }
1669
1670
    // TODO: make sure this final check has reasonable codegen.
1671
207k
    for (size_t i = 0; i < 252; i++) {
1672
206k
        if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1673
3.19k
            return false;
1674
3.19k
        }
1675
206k
    }
1676
770
    return true;
1677
3.96k
}
1678
1679
bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1680
573
                                      const skcms_TransferFunction* inv_tf) {
1681
573
    if (!profile || !profile->has_trc) {
1682
0
        return false;
1683
0
    }
1684
1685
573
    return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1686
573
           skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1687
573
           skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1688
573
}
1689
1690
3.88k
static bool is_zero_to_one(float x) {
1691
3.88k
    return 0 <= x && x <= 1;
1692
3.88k
}
1693
1694
typedef struct { float vals[3]; } skcms_Vector3;
1695
1696
1.16k
static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1697
1.16k
    skcms_Vector3 dst = {{0,0,0}};
1698
4.65k
    for (int row = 0; row < 3; ++row) {
1699
3.49k
        dst.vals[row] = m->vals[row][0] * v->vals[0]
1700
3.49k
                      + m->vals[row][1] * v->vals[1]
1701
3.49k
                      + m->vals[row][2] * v->vals[2];
1702
3.49k
    }
1703
1.16k
    return dst;
1704
1.16k
}
1705
1706
bool skcms_AdaptToXYZD50(float wx, float wy,
1707
388
                         skcms_Matrix3x3* toXYZD50) {
1708
388
    if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1709
388
        !toXYZD50) {
1710
0
        return false;
1711
0
    }
1712
1713
    // Assumes that Y is 1.0f.
1714
388
    skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1715
1716
    // Now convert toXYZ matrix to toXYZD50.
1717
388
    skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1718
1719
    // Calculate the chromatic adaptation matrix.  We will use the Bradford method, thus
1720
    // the matrices below.  The Bradford method is used by Adobe and is widely considered
1721
    // to be the best.
1722
388
    skcms_Matrix3x3 xyz_to_lms = {{
1723
388
        {  0.8951f,  0.2664f, -0.1614f },
1724
388
        { -0.7502f,  1.7135f,  0.0367f },
1725
388
        {  0.0389f, -0.0685f,  1.0296f },
1726
388
    }};
1727
388
    skcms_Matrix3x3 lms_to_xyz = {{
1728
388
        {  0.9869929f, -0.1470543f, 0.1599627f },
1729
388
        {  0.4323053f,  0.5183603f, 0.0492912f },
1730
388
        { -0.0085287f,  0.0400428f, 0.9684867f },
1731
388
    }};
1732
1733
388
    skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1734
388
    skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1735
1736
388
    *toXYZD50 = {{
1737
388
        { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1738
388
        { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1739
388
        { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1740
388
    }};
1741
388
    *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1742
388
    *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1743
1744
388
    return true;
1745
388
}
1746
1747
bool skcms_PrimariesToXYZD50(float rx, float ry,
1748
                             float gx, float gy,
1749
                             float bx, float by,
1750
                             float wx, float wy,
1751
388
                             skcms_Matrix3x3* toXYZD50) {
1752
388
    if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1753
388
        !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1754
388
        !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1755
388
        !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1756
388
        !toXYZD50) {
1757
0
        return false;
1758
0
    }
1759
1760
    // First, we need to convert xy values (primaries) to XYZ.
1761
388
    skcms_Matrix3x3 primaries = {{
1762
388
        { rx, gx, bx },
1763
388
        { ry, gy, by },
1764
388
        { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1765
388
    }};
1766
388
    skcms_Matrix3x3 primaries_inv;
1767
388
    if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1768
0
        return false;
1769
0
    }
1770
1771
    // Assumes that Y is 1.0f.
1772
388
    skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1773
388
    skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1774
1775
388
    skcms_Matrix3x3 toXYZ = {{
1776
388
        { XYZ.vals[0],           0,           0 },
1777
388
        {           0, XYZ.vals[1],           0 },
1778
388
        {           0,           0, XYZ.vals[2] },
1779
388
    }};
1780
388
    toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1781
1782
388
    skcms_Matrix3x3 DXtoD50;
1783
388
    if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1784
0
        return false;
1785
0
    }
1786
1787
388
    *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1788
388
    return true;
1789
388
}
1790
1791
1792
141k
bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1793
141k
    double a00 = src->vals[0][0],
1794
141k
           a01 = src->vals[1][0],
1795
141k
           a02 = src->vals[2][0],
1796
141k
           a10 = src->vals[0][1],
1797
141k
           a11 = src->vals[1][1],
1798
141k
           a12 = src->vals[2][1],
1799
141k
           a20 = src->vals[0][2],
1800
141k
           a21 = src->vals[1][2],
1801
141k
           a22 = src->vals[2][2];
1802
1803
141k
    double b0 = a00*a11 - a01*a10,
1804
141k
           b1 = a00*a12 - a02*a10,
1805
141k
           b2 = a01*a12 - a02*a11,
1806
141k
           b3 = a20,
1807
141k
           b4 = a21,
1808
141k
           b5 = a22;
1809
1810
141k
    double determinant = b0*b5
1811
141k
                       - b1*b4
1812
141k
                       + b2*b3;
1813
1814
141k
    if (determinant == 0) {
1815
166
        return false;
1816
166
    }
1817
1818
141k
    double invdet = 1.0 / determinant;
1819
141k
    if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1820
31
        return false;
1821
31
    }
1822
1823
141k
    b0 *= invdet;
1824
141k
    b1 *= invdet;
1825
141k
    b2 *= invdet;
1826
141k
    b3 *= invdet;
1827
141k
    b4 *= invdet;
1828
141k
    b5 *= invdet;
1829
1830
141k
    dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1831
141k
    dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1832
141k
    dst->vals[2][0] = (float)(        +     b2 );
1833
141k
    dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1834
141k
    dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1835
141k
    dst->vals[2][1] = (float)(        -     b1 );
1836
141k
    dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1837
141k
    dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1838
141k
    dst->vals[2][2] = (float)(        +     b0 );
1839
1840
565k
    for (int r = 0; r < 3; ++r)
1841
1.69M
    for (int c = 0; c < 3; ++c) {
1842
1.27M
        if (!isfinitef_(dst->vals[r][c])) {
1843
13
            return false;
1844
13
        }
1845
1.27M
    }
1846
141k
    return true;
1847
141k
}
1848
1849
67.2k
skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1850
67.2k
    skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1851
268k
    for (int r = 0; r < 3; r++)
1852
806k
        for (int c = 0; c < 3; c++) {
1853
604k
            m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1854
604k
                         + A->vals[r][1] * B->vals[1][c]
1855
604k
                         + A->vals[r][2] * B->vals[2][c];
1856
604k
        }
1857
67.2k
    return m;
1858
67.2k
}
1859
1860
#if defined(__clang__)
1861
    [[clang::no_sanitize("float-divide-by-zero")]]  // Checked for by classify() on the way out.
1862
#endif
1863
415k
bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1864
415k
    TF_PQish  pq;
1865
415k
    TF_HLGish hlg;
1866
415k
    switch (classify(*src, &pq, &hlg)) {
1867
0
        case skcms_TFType_Invalid: return false;
1868
415k
        case skcms_TFType_sRGBish: break;  // handled below
1869
1870
2
        case skcms_TFType_PQish:
1871
2
            *dst = { TFKind_marker(skcms_TFType_PQish), -pq.A,  pq.D, 1.0f/pq.F
1872
2
                                                      ,  pq.B, -pq.E, 1.0f/pq.C};
1873
2
            return true;
1874
1875
2
        case skcms_TFType_HLGish:
1876
2
            *dst = { TFKind_marker(skcms_TFType_HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1877
2
                                                          , 1.0f/hlg.a, hlg.b, hlg.c
1878
2
                                                          , hlg.K_minus_1 };
1879
2
            return true;
1880
1881
2
        case skcms_TFType_HLGinvish:
1882
2
            *dst = { TFKind_marker(skcms_TFType_HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1883
2
                                                       , 1.0f/hlg.a, hlg.b, hlg.c
1884
2
                                                       , hlg.K_minus_1 };
1885
2
            return true;
1886
415k
    }
1887
1888
415k
    assert (classify(*src) == skcms_TFType_sRGBish);
1889
1890
    // We're inverting this function, solving for x in terms of y.
1891
    //   y = (cx + f)         x < d
1892
    //       (ax + b)^g + e   x ≥ d
1893
    // The inverse of this function can be expressed in the same piecewise form.
1894
415k
    skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1895
1896
    // We'll start by finding the new threshold inv.d.
1897
    // In principle we should be able to find that by solving for y at x=d from either side.
1898
    // (If those two d values aren't the same, it's a discontinuous transfer function.)
1899
415k
    float d_l =       src->c * src->d + src->f,
1900
415k
          d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1901
415k
    if (fabsf_(d_l - d_r) > 1/512.0f) {
1902
55
        return false;
1903
55
    }
1904
415k
    inv.d = d_l;  // TODO(mtklein): better in practice to choose d_r?
1905
1906
    // When d=0, the linear section collapses to a point.  We leave c,d,f all zero in that case.
1907
415k
    if (inv.d > 0) {
1908
        // Inverting the linear section is pretty straightfoward:
1909
        //        y       = cx + f
1910
        //        y - f   = cx
1911
        //   (1/c)y - f/c = x
1912
370k
        inv.c =    1.0f/src->c;
1913
370k
        inv.f = -src->f/src->c;
1914
370k
    }
1915
1916
    // The interesting part is inverting the nonlinear section:
1917
    //         y                = (ax + b)^g + e.
1918
    //         y - e            = (ax + b)^g
1919
    //        (y - e)^1/g       =  ax + b
1920
    //        (y - e)^1/g - b   =  ax
1921
    //   (1/a)(y - e)^1/g - b/a =   x
1922
    //
1923
    // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1924
    //   let k = (1/a)^g
1925
    //   (1/a)( y -  e)^1/g - b/a = x
1926
    //        (ky - ke)^1/g - b/a = x
1927
1928
415k
    float k = powf_(src->a, -src->g);  // (1/a)^g == a^-g
1929
415k
    inv.g = 1.0f / src->g;
1930
415k
    inv.a = k;
1931
415k
    inv.b = -k * src->e;
1932
415k
    inv.e = -src->b / src->a;
1933
1934
    // We need to enforce the same constraints here that we do when fitting a curve,
1935
    // a >= 0 and ad+b >= 0.  These constraints are checked by classify(), so they're true
1936
    // of the source function if we're here.
1937
1938
    // Just like when fitting the curve, there's really no way to rescue a < 0.
1939
415k
    if (inv.a < 0) {
1940
0
        return false;
1941
0
    }
1942
    // On the other hand we can rescue an ad+b that's gone slightly negative here.
1943
415k
    if (inv.a * inv.d + inv.b < 0) {
1944
6
        inv.b = -inv.a * inv.d;
1945
6
    }
1946
1947
    // That should usually make classify(inv) == sRGBish true, but there are a couple situations
1948
    // where we might still fail here, like non-finite parameter values.
1949
415k
    if (classify(inv) != skcms_TFType_sRGBish) {
1950
20
        return false;
1951
20
    }
1952
1953
415k
    assert (inv.a >= 0);
1954
415k
    assert (inv.a * inv.d + inv.b >= 0);
1955
1956
    // Now in principle we're done.
1957
    // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1958
    // e or f of the inverse, depending on which segment contains src(1.0f).
1959
415k
    float s = skcms_TransferFunction_eval(src, 1.0f);
1960
415k
    if (!isfinitef_(s)) {
1961
3
        return false;
1962
3
    }
1963
1964
415k
    float sign = s < 0 ? -1.0f : 1.0f;
1965
415k
    s *= sign;
1966
415k
    if (s < inv.d) {
1967
2
        inv.f = 1.0f - sign * inv.c * s;
1968
415k
    } else {
1969
415k
        inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
1970
415k
    }
1971
1972
415k
    *dst = inv;
1973
415k
    return classify(*dst) == skcms_TFType_sRGBish;
1974
415k
}
1975
1976
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1977
1978
// From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1979
//
1980
//   tf(x) =  cx + f          x < d
1981
//   tf(x) = (ax + b)^g + e   x ≥ d
1982
//
1983
// When fitting, we add the additional constraint that both pieces meet at d:
1984
//
1985
//   cd + f = (ad + b)^g + e
1986
//
1987
// Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1988
//
1989
//   tf(x) =                           cx + f   x < d
1990
//   tf(x) = (ax + b)^g - (ad + b)^g + cd + f   x ≥ d
1991
//
1992
// Our overall strategy is then:
1993
//    For a couple tolerances,
1994
//       - fit_linear():    fit c,d,f iteratively to as many points as our tolerance allows
1995
//       - invert c,d,f
1996
//       - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1997
//                          (and by constraint, inverted e) to the inverse of the table.
1998
//    Return the parameters with least maximum error.
1999
//
2000
// To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
2001
// of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
2002
//
2003
//    let y = Table(x)
2004
//    r(x) = x - f_inv(y)
2005
//
2006
//    ∂r/∂g = ln(ay + b)*(ay + b)^g
2007
//          - ln(ad + b)*(ad + b)^g
2008
//    ∂r/∂a = yg(ay + b)^(g-1)
2009
//          - dg(ad + b)^(g-1)
2010
//    ∂r/∂b =  g(ay + b)^(g-1)
2011
//          -  g(ad + b)^(g-1)
2012
2013
// Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
2014
// and fill out the gradient of the residual into dfdP.
2015
static float rg_nonlinear(float x,
2016
                          const skcms_Curve* curve,
2017
                          const skcms_TransferFunction* tf,
2018
0
                          float dfdP[3]) {
2019
0
    const float y = eval_curve(curve, x);
2020
2021
0
    const float g = tf->g, a = tf->a, b = tf->b,
2022
0
                c = tf->c, d = tf->d, f = tf->f;
2023
2024
0
    const float Y = fmaxf_(a*y + b, 0.0f),
2025
0
                D =        a*d + b;
2026
0
    assert (D >= 0);
2027
2028
    // The gradient.
2029
0
    dfdP[0] = logf_(Y)*powf_(Y, g)
2030
0
            - logf_(D)*powf_(D, g);
2031
0
    dfdP[1] = y*g*powf_(Y, g-1)
2032
0
            - d*g*powf_(D, g-1);
2033
0
    dfdP[2] =   g*powf_(Y, g-1)
2034
0
            -   g*powf_(D, g-1);
2035
2036
    // The residual.
2037
0
    const float f_inv = powf_(Y, g)
2038
0
                      - powf_(D, g)
2039
0
                      + c*d + f;
2040
0
    return x - f_inv;
2041
0
}
2042
2043
static bool gauss_newton_step(const skcms_Curve* curve,
2044
                                    skcms_TransferFunction* tf,
2045
0
                              float x0, float dx, int N) {
2046
    // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
2047
    //
2048
    // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
2049
    //
2050
    // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
2051
    //   where r(P) is the residual vector
2052
    //   and Jf is the Jacobian matrix of f(), ∂r/∂P.
2053
    //
2054
    // Let's review the shape of each of these expressions:
2055
    //   r(P)   is [N x 1], a column vector with one entry per value of x tested
2056
    //   Jf     is [N x 3], a matrix with an entry for each (x,P) pair
2057
    //   Jf^T   is [3 x N], the transpose of Jf
2058
    //
2059
    //   Jf^T Jf   is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
2060
    //                                              and so is its inverse (Jf^T Jf)^-1
2061
    //   Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
2062
    //
2063
    // Our implementation strategy to get to the final ∆P is
2064
    //   1) evaluate Jf^T Jf,   call that lhs
2065
    //   2) evaluate Jf^T r(P), call that rhs
2066
    //   3) invert lhs
2067
    //   4) multiply inverse lhs by rhs
2068
    //
2069
    // This is a friendly implementation strategy because we don't have to have any
2070
    // buffers that scale with N, and equally nice don't have to perform any matrix
2071
    // operations that are variable size.
2072
    //
2073
    // Other implementation strategies could trade this off, e.g. evaluating the
2074
    // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
2075
    // the residuals.  That would probably require implementing singular value
2076
    // decomposition, and would create a [3 x N] matrix to be multiplied by the
2077
    // [N x 1] residual vector, but on the upside I think that'd eliminate the
2078
    // possibility of this gauss_newton_step() function ever failing.
2079
2080
    // 0) start off with lhs and rhs safely zeroed.
2081
0
    skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
2082
0
    skcms_Vector3   rhs = {  {0,0,0} };
2083
2084
    // 1,2) evaluate lhs and evaluate rhs
2085
    //   We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
2086
    //   so we'll have to update lhs and rhs at the same time.
2087
0
    for (int i = 0; i < N; i++) {
2088
0
        float x = x0 + static_cast<float>(i)*dx;
2089
2090
0
        float dfdP[3] = {0,0,0};
2091
0
        float resid = rg_nonlinear(x,curve,tf, dfdP);
2092
2093
0
        for (int r = 0; r < 3; r++) {
2094
0
            for (int c = 0; c < 3; c++) {
2095
0
                lhs.vals[r][c] += dfdP[r] * dfdP[c];
2096
0
            }
2097
0
            rhs.vals[r] += dfdP[r] * resid;
2098
0
        }
2099
0
    }
2100
2101
    // If any of the 3 P parameters are unused, this matrix will be singular.
2102
    // Detect those cases and fix them up to indentity instead, so we can invert.
2103
0
    for (int k = 0; k < 3; k++) {
2104
0
        if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
2105
0
            lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
2106
0
            lhs.vals[k][k] = 1;
2107
0
        }
2108
0
    }
2109
2110
    // 3) invert lhs
2111
0
    skcms_Matrix3x3 lhs_inv;
2112
0
    if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
2113
0
        return false;
2114
0
    }
2115
2116
    // 4) multiply inverse lhs by rhs
2117
0
    skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
2118
0
    tf->g += dP.vals[0];
2119
0
    tf->a += dP.vals[1];
2120
0
    tf->b += dP.vals[2];
2121
0
    return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
2122
0
}
2123
2124
static float max_roundtrip_error_checked(const skcms_Curve* curve,
2125
0
                                         const skcms_TransferFunction* tf_inv) {
2126
0
    skcms_TransferFunction tf;
2127
0
    if (!skcms_TransferFunction_invert(tf_inv, &tf) || skcms_TFType_sRGBish != classify(tf)) {
2128
0
        return INFINITY_;
2129
0
    }
2130
2131
0
    skcms_TransferFunction tf_inv_again;
2132
0
    if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
2133
0
        return INFINITY_;
2134
0
    }
2135
2136
0
    return skcms_MaxRoundtripError(curve, &tf_inv_again);
2137
0
}
2138
2139
// Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
2140
0
static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
2141
    // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
2142
0
    auto fixup_tf = [tf]() {
2143
        // a must be non-negative. That ensures the function is monotonically increasing.
2144
        // We don't really know how to fix up a if it goes negative.
2145
0
        if (tf->a < 0) {
2146
0
            return false;
2147
0
        }
2148
        // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
2149
        // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
2150
0
        if (tf->a * tf->d + tf->b < 0) {
2151
0
            tf->b = -tf->a * tf->d;
2152
0
        }
2153
0
        assert (tf->a >= 0 &&
2154
0
                tf->a * tf->d + tf->b >= 0);
2155
2156
        // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
2157
        // parameter so we can guarantee this.
2158
0
        tf->e =   tf->c*tf->d + tf->f
2159
0
          - powf_(tf->a*tf->d + tf->b, tf->g);
2160
2161
0
        return isfinitef_(tf->e);
2162
0
    };
2163
2164
0
    if (!fixup_tf()) {
2165
0
        return false;
2166
0
    }
2167
2168
    // No matter where we start, dx should always represent N even steps from 0 to 1.
2169
0
    const float dx = 1.0f / static_cast<float>(N-1);
2170
2171
0
    skcms_TransferFunction best_tf = *tf;
2172
0
    float best_max_error = INFINITY_;
2173
2174
    // Need this or several curves get worse... *sigh*
2175
0
    float init_error = max_roundtrip_error_checked(curve, tf);
2176
0
    if (init_error < best_max_error) {
2177
0
        best_max_error = init_error;
2178
0
        best_tf = *tf;
2179
0
    }
2180
2181
    // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
2182
0
    for (int j = 0; j < 8; j++) {
2183
0
        if (!gauss_newton_step(curve, tf, static_cast<float>(L)*dx, dx, N-L) || !fixup_tf()) {
2184
0
            *tf = best_tf;
2185
0
            return isfinitef_(best_max_error);
2186
0
        }
2187
2188
0
        float max_error = max_roundtrip_error_checked(curve, tf);
2189
0
        if (max_error < best_max_error) {
2190
0
            best_max_error = max_error;
2191
0
            best_tf = *tf;
2192
0
        }
2193
0
    }
2194
2195
0
    *tf = best_tf;
2196
0
    return isfinitef_(best_max_error);
2197
0
}
2198
2199
bool skcms_ApproximateCurve(const skcms_Curve* curve,
2200
                            skcms_TransferFunction* approx,
2201
0
                            float* max_error) {
2202
0
    if (!curve || !approx || !max_error) {
2203
0
        return false;
2204
0
    }
2205
2206
0
    if (curve->table_entries == 0) {
2207
        // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
2208
0
        return false;
2209
0
    }
2210
2211
0
    if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
2212
        // We need at least two points, and must put some reasonable cap on the maximum number.
2213
0
        return false;
2214
0
    }
2215
2216
0
    int N = (int)curve->table_entries;
2217
0
    const float dx = 1.0f / static_cast<float>(N - 1);
2218
2219
0
    *max_error = INFINITY_;
2220
0
    const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
2221
0
    for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
2222
0
        skcms_TransferFunction tf,
2223
0
                               tf_inv;
2224
2225
        // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
2226
0
        tf.f = 0.0f;
2227
0
        int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
2228
2229
0
        if (L == N) {
2230
            // If the entire data set was linear, move the coefficients to the nonlinear portion
2231
            // with G == 1.  This lets use a canonical representation with d == 0.
2232
0
            tf.g = 1;
2233
0
            tf.a = tf.c;
2234
0
            tf.b = tf.f;
2235
0
            tf.c = tf.d = tf.e = tf.f = 0;
2236
0
        } else if (L == N - 1) {
2237
            // Degenerate case with only two points in the nonlinear segment. Solve directly.
2238
0
            tf.g = 1;
2239
0
            tf.a = (eval_curve(curve, static_cast<float>(N-1)*dx) -
2240
0
                    eval_curve(curve, static_cast<float>(N-2)*dx))
2241
0
                 / dx;
2242
0
            tf.b = eval_curve(curve, static_cast<float>(N-2)*dx)
2243
0
                 - tf.a * static_cast<float>(N-2)*dx;
2244
0
            tf.e = 0;
2245
0
        } else {
2246
            // Start by guessing a gamma-only curve through the midpoint.
2247
0
            int mid = (L + N) / 2;
2248
0
            float mid_x = static_cast<float>(mid) / static_cast<float>(N - 1);
2249
0
            float mid_y = eval_curve(curve, mid_x);
2250
0
            tf.g = log2f_(mid_y) / log2f_(mid_x);
2251
0
            tf.a = 1;
2252
0
            tf.b = 0;
2253
0
            tf.e =    tf.c*tf.d + tf.f
2254
0
              - powf_(tf.a*tf.d + tf.b, tf.g);
2255
2256
2257
0
            if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
2258
0
                !fit_nonlinear(curve, L,N, &tf_inv)) {
2259
0
                continue;
2260
0
            }
2261
2262
            // We fit tf_inv, so calculate tf to keep in sync.
2263
            // fit_nonlinear() should guarantee invertibility.
2264
0
            if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
2265
0
                assert(false);
2266
0
                continue;
2267
0
            }
2268
0
        }
2269
2270
        // We'd better have a sane, sRGB-ish TF by now.
2271
        // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
2272
        // anything else is just some accident of math and the way we pun tf.g as a type flag.
2273
        // fit_nonlinear() should guarantee this, but the special cases may fail this test.
2274
0
        if (skcms_TFType_sRGBish != classify(tf)) {
2275
0
            continue;
2276
0
        }
2277
2278
        // We find our error by roundtripping the table through tf_inv.
2279
        //
2280
        // (The most likely use case for this approximation is to be inverted and
2281
        // used as the transfer function for a destination color space.)
2282
        //
2283
        // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
2284
        // invertible, so re-verify that here (and use the new inverse for testing).
2285
        // fit_nonlinear() should guarantee this, but the special cases that don't use
2286
        // it may fail this test.
2287
0
        if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
2288
0
            continue;
2289
0
        }
2290
2291
0
        float err = skcms_MaxRoundtripError(curve, &tf_inv);
2292
0
        if (*max_error > err) {
2293
0
            *max_error = err;
2294
0
            *approx    = tf;
2295
0
        }
2296
0
    }
2297
0
    return isfinitef_(*max_error);
2298
0
}
2299
2300
enum class CpuType { Baseline, HSW, SKX };
2301
2302
660k
static CpuType cpu_type() {
2303
    #if defined(SKCMS_PORTABLE) || !defined(__x86_64__) || defined(SKCMS_FORCE_BASELINE)
2304
        return CpuType::Baseline;
2305
    #elif defined(SKCMS_FORCE_HSW)
2306
        return CpuType::HSW;
2307
    #elif defined(SKCMS_FORCE_SKX)
2308
        return CpuType::SKX;
2309
    #else
2310
660k
        static const CpuType type = []{
2311
6
            if (!sAllowRuntimeCPUDetection) {
2312
0
                return CpuType::Baseline;
2313
0
            }
2314
            // See http://www.sandpile.org/x86/cpuid.htm
2315
2316
            // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2317
6
            uint32_t eax, ebx, ecx, edx;
2318
6
            __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2319
6
                                         : "0"(1), "2"(0));
2320
6
            if ((edx & (1u<<25)) &&  // SSE
2321
6
                (edx & (1u<<26)) &&  // SSE2
2322
6
                (ecx & (1u<< 0)) &&  // SSE3
2323
6
                (ecx & (1u<< 9)) &&  // SSSE3
2324
6
                (ecx & (1u<<12)) &&  // FMA (N.B. not used, avoided even)
2325
6
                (ecx & (1u<<19)) &&  // SSE4.1
2326
6
                (ecx & (1u<<20)) &&  // SSE4.2
2327
6
                (ecx & (1u<<26)) &&  // XSAVE
2328
6
                (ecx & (1u<<27)) &&  // OSXSAVE
2329
6
                (ecx & (1u<<28)) &&  // AVX
2330
6
                (ecx & (1u<<29))) {  // F16C
2331
2332
                // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2333
6
                __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2334
6
                                             : "0"(7), "2"(0));
2335
                // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2336
6
                uint32_t xcr0, dont_need_edx;
2337
6
                __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2338
2339
6
                if ((xcr0 & (1u<<1)) &&  // XMM register state saved?
2340
6
                    (xcr0 & (1u<<2)) &&  // YMM register state saved?
2341
6
                    (ebx  & (1u<<5))) {  // AVX2
2342
                    // At this point we're at least HSW.  Continue checking for SKX.
2343
6
                    if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2344
6
                        (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2345
6
                        (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2346
6
                        (ebx  & (1u<<16)) && // AVX512F
2347
6
                        (ebx  & (1u<<17)) && // AVX512DQ
2348
6
                        (ebx  & (1u<<28)) && // AVX512CD
2349
6
                        (ebx  & (1u<<30)) && // AVX512BW
2350
6
                        (ebx  & (1u<<31))) { // AVX512VL
2351
0
                        return CpuType::SKX;
2352
0
                    }
2353
6
                    return CpuType::HSW;
2354
6
                }
2355
6
            }
2356
0
            return CpuType::Baseline;
2357
6
        }();
2358
660k
        return type;
2359
660k
    #endif
2360
660k
}
2361
2362
574k
static bool tf_is_gamma(const skcms_TransferFunction& tf) {
2363
574k
    return tf.g > 0 && tf.a == 1 &&
2364
574k
           tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0;
2365
574k
}
2366
2367
struct OpAndArg {
2368
    Op          op;
2369
    const void* arg;
2370
};
2371
2372
1.09M
static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2373
1.09M
    struct OpType {
2374
1.09M
        Op sGamma, sRGBish, PQish, HLGish, HLGinvish, table;
2375
1.09M
    };
2376
1.09M
    static constexpr OpType kOps[] = {
2377
1.09M
        { Op::gamma_r, Op::tf_r, Op::pq_r, Op::hlg_r, Op::hlginv_r, Op::table_r },
2378
1.09M
        { Op::gamma_g, Op::tf_g, Op::pq_g, Op::hlg_g, Op::hlginv_g, Op::table_g },
2379
1.09M
        { Op::gamma_b, Op::tf_b, Op::pq_b, Op::hlg_b, Op::hlginv_b, Op::table_b },
2380
1.09M
        { Op::gamma_a, Op::tf_a, Op::pq_a, Op::hlg_a, Op::hlginv_a, Op::table_a },
2381
1.09M
    };
2382
1.09M
    const auto& op = kOps[channel];
2383
2384
1.09M
    if (curve->table_entries == 0) {
2385
574k
        const OpAndArg noop = { Op::load_a8/*doesn't matter*/, nullptr };
2386
2387
574k
        const skcms_TransferFunction& tf = curve->parametric;
2388
2389
574k
        if (tf_is_gamma(tf)) {
2390
117k
            return tf.g != 1 ? OpAndArg{op.sGamma, &tf}
2391
117k
                             : noop;
2392
117k
        }
2393
2394
457k
        switch (classify(tf)) {
2395
0
            case skcms_TFType_Invalid:    return noop;
2396
457k
            case skcms_TFType_sRGBish:    return OpAndArg{op.sRGBish,   &tf};
2397
0
            case skcms_TFType_PQish:      return OpAndArg{op.PQish,     &tf};
2398
0
            case skcms_TFType_HLGish:     return OpAndArg{op.HLGish,    &tf};
2399
0
            case skcms_TFType_HLGinvish:  return OpAndArg{op.HLGinvish, &tf};
2400
457k
        }
2401
457k
    }
2402
519k
    return OpAndArg{op.table, curve};
2403
1.09M
}
2404
2405
357k
static int select_curve_ops(const skcms_Curve* curves, int numChannels, OpAndArg* ops) {
2406
    // We process the channels in reverse order, yielding ops in ABGR order.
2407
    // (Working backwards allows us to fuse trailing B+G+R ops into a single RGB op.)
2408
357k
    int cursor = 0;
2409
1.45M
    for (int index = numChannels; index-- > 0; ) {
2410
1.09M
        ops[cursor] = select_curve_op(&curves[index], index);
2411
1.09M
        if (ops[cursor].arg) {
2412
995k
            ++cursor;
2413
995k
        }
2414
1.09M
    }
2415
2416
    // Identify separate B+G+R ops and fuse them into a single RGB op.
2417
357k
    if (cursor >= 3) {
2418
310k
        struct FusableOps {
2419
310k
            Op r, g, b, rgb;
2420
310k
        };
2421
310k
        static constexpr FusableOps kFusableOps[] = {
2422
310k
            {Op::gamma_r,  Op::gamma_g,  Op::gamma_b,  Op::gamma_rgb},
2423
310k
            {Op::tf_r,     Op::tf_g,     Op::tf_b,     Op::tf_rgb},
2424
310k
            {Op::pq_r,     Op::pq_g,     Op::pq_b,     Op::pq_rgb},
2425
310k
            {Op::hlg_r,    Op::hlg_g,    Op::hlg_b,    Op::hlg_rgb},
2426
310k
            {Op::hlginv_r, Op::hlginv_g, Op::hlginv_b, Op::hlginv_rgb},
2427
310k
        };
2428
2429
310k
        int posR = cursor - 1;
2430
310k
        int posG = cursor - 2;
2431
310k
        int posB = cursor - 3;
2432
1.09M
        for (const FusableOps& fusableOp : kFusableOps) {
2433
1.09M
            if (ops[posR].op == fusableOp.r &&
2434
1.09M
                ops[posG].op == fusableOp.g &&
2435
1.09M
                ops[posB].op == fusableOp.b &&
2436
1.09M
                (0 == memcmp(ops[posR].arg, ops[posG].arg, sizeof(skcms_TransferFunction))) &&
2437
1.09M
                (0 == memcmp(ops[posR].arg, ops[posB].arg, sizeof(skcms_TransferFunction)))) {
2438
                // Fuse the three matching ops into one.
2439
152k
                ops[posB].op = fusableOp.rgb;
2440
152k
                cursor -= 2;
2441
152k
                break;
2442
152k
            }
2443
1.09M
        }
2444
310k
    }
2445
2446
357k
    return cursor;
2447
357k
}
2448
2449
1.32M
static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2450
1.32M
    switch (fmt >> 1) {   // ignore rgb/bgr
2451
0
        case skcms_PixelFormat_A_8             >> 1: return  1;
2452
115k
        case skcms_PixelFormat_G_8             >> 1: return  1;
2453
0
        case skcms_PixelFormat_ABGR_4444       >> 1: return  2;
2454
0
        case skcms_PixelFormat_RGB_565         >> 1: return  2;
2455
15.8k
        case skcms_PixelFormat_RGB_888         >> 1: return  3;
2456
1.18M
        case skcms_PixelFormat_RGBA_8888       >> 1: return  4;
2457
0
        case skcms_PixelFormat_RGBA_8888_sRGB  >> 1: return  4;
2458
0
        case skcms_PixelFormat_RGBA_1010102    >> 1: return  4;
2459
0
        case skcms_PixelFormat_RGB_101010x_XR  >> 1: return  4;
2460
0
        case skcms_PixelFormat_RGB_161616LE    >> 1: return  6;
2461
0
        case skcms_PixelFormat_RGBA_16161616LE >> 1: return  8;
2462
733
        case skcms_PixelFormat_RGB_161616BE    >> 1: return  6;
2463
5.08k
        case skcms_PixelFormat_RGBA_16161616BE >> 1: return  8;
2464
0
        case skcms_PixelFormat_RGB_hhh_Norm    >> 1: return  6;
2465
0
        case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: return  8;
2466
0
        case skcms_PixelFormat_RGB_hhh         >> 1: return  6;
2467
0
        case skcms_PixelFormat_RGBA_hhhh       >> 1: return  8;
2468
0
        case skcms_PixelFormat_RGB_fff         >> 1: return 12;
2469
0
        case skcms_PixelFormat_RGBA_ffff       >> 1: return 16;
2470
1.32M
    }
2471
0
    assert(false);
2472
0
    return 0;
2473
1.32M
}
2474
2475
static bool prep_for_destination(const skcms_ICCProfile* profile,
2476
                                 skcms_Matrix3x3* fromXYZD50,
2477
                                 skcms_TransferFunction* invR,
2478
                                 skcms_TransferFunction* invG,
2479
138k
                                 skcms_TransferFunction* invB) {
2480
    // skcms_Transform() supports B2A destinations...
2481
138k
    if (profile->has_B2A) { return true; }
2482
    // ...and destinations with parametric transfer functions and an XYZD50 gamut matrix.
2483
138k
    return profile->has_trc
2484
138k
        && profile->has_toXYZD50
2485
138k
        && profile->trc[0].table_entries == 0
2486
138k
        && profile->trc[1].table_entries == 0
2487
138k
        && profile->trc[2].table_entries == 0
2488
138k
        && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2489
138k
        && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2490
138k
        && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2491
138k
        && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2492
138k
}
2493
2494
bool skcms_Transform(const void*             src,
2495
                     skcms_PixelFormat       srcFmt,
2496
                     skcms_AlphaFormat       srcAlpha,
2497
                     const skcms_ICCProfile* srcProfile,
2498
                     void*                   dst,
2499
                     skcms_PixelFormat       dstFmt,
2500
                     skcms_AlphaFormat       dstAlpha,
2501
                     const skcms_ICCProfile* dstProfile,
2502
660k
                     size_t                  nz) {
2503
660k
    const size_t dst_bpp = bytes_per_pixel(dstFmt),
2504
660k
                 src_bpp = bytes_per_pixel(srcFmt);
2505
    // Let's just refuse if the request is absurdly big.
2506
660k
    if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2507
0
        return false;
2508
0
    }
2509
660k
    int n = (int)nz;
2510
2511
    // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2512
660k
    if (!srcProfile) {
2513
522k
        srcProfile = skcms_sRGB_profile();
2514
522k
    }
2515
660k
    if (!dstProfile) {
2516
522k
        dstProfile = skcms_sRGB_profile();
2517
522k
    }
2518
2519
    // We can't transform in place unless the PixelFormats are the same size.
2520
660k
    if (dst == src && dst_bpp != src_bpp) {
2521
0
        return false;
2522
0
    }
2523
    // TODO: more careful alias rejection (like, dst == src + 1)?
2524
2525
660k
    Op          program[32];
2526
660k
    const void* context[32];
2527
2528
660k
    Op*          ops      = program;
2529
660k
    const void** contexts = context;
2530
2531
3.14M
    auto add_op = [&](Op o) {
2532
3.14M
        *ops++ = o;
2533
3.14M
        *contexts++ = nullptr;
2534
3.14M
    };
2535
2536
837k
    auto add_op_ctx = [&](Op o, const void* c) {
2537
837k
        *ops++ = o;
2538
837k
        *contexts++ = c;
2539
837k
    };
2540
2541
660k
    auto add_curve_ops = [&](const skcms_Curve* curves, int numChannels) {
2542
219k
        OpAndArg oa[4];
2543
219k
        assert(numChannels <= ARRAY_COUNT(oa));
2544
2545
219k
        int numOps = select_curve_ops(curves, numChannels, oa);
2546
2547
786k
        for (int i = 0; i < numOps; ++i) {
2548
567k
            add_op_ctx(oa[i].op, oa[i].arg);
2549
567k
        }
2550
219k
    };
2551
2552
    // These are always parametric curves of some sort.
2553
660k
    skcms_Curve dst_curves[3];
2554
660k
    dst_curves[0].table_entries =
2555
660k
    dst_curves[1].table_entries =
2556
660k
    dst_curves[2].table_entries = 0;
2557
2558
660k
    skcms_Matrix3x3        from_xyz;
2559
2560
660k
    switch (srcFmt >> 1) {
2561
0
        default: return false;
2562
0
        case skcms_PixelFormat_A_8             >> 1: add_op(Op::load_a8);         break;
2563
14.7k
        case skcms_PixelFormat_G_8             >> 1: add_op(Op::load_g8);         break;
2564
0
        case skcms_PixelFormat_ABGR_4444       >> 1: add_op(Op::load_4444);       break;
2565
0
        case skcms_PixelFormat_RGB_565         >> 1: add_op(Op::load_565);        break;
2566
7.93k
        case skcms_PixelFormat_RGB_888         >> 1: add_op(Op::load_888);        break;
2567
632k
        case skcms_PixelFormat_RGBA_8888       >> 1: add_op(Op::load_8888);       break;
2568
0
        case skcms_PixelFormat_RGBA_1010102    >> 1: add_op(Op::load_1010102);    break;
2569
0
        case skcms_PixelFormat_RGB_101010x_XR  >> 1: add_op(Op::load_101010x_XR); break;
2570
0
        case skcms_PixelFormat_RGB_161616LE    >> 1: add_op(Op::load_161616LE);   break;
2571
0
        case skcms_PixelFormat_RGBA_16161616LE >> 1: add_op(Op::load_16161616LE); break;
2572
733
        case skcms_PixelFormat_RGB_161616BE    >> 1: add_op(Op::load_161616BE);   break;
2573
5.08k
        case skcms_PixelFormat_RGBA_16161616BE >> 1: add_op(Op::load_16161616BE); break;
2574
0
        case skcms_PixelFormat_RGB_hhh_Norm    >> 1: add_op(Op::load_hhh);        break;
2575
0
        case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: add_op(Op::load_hhhh);       break;
2576
0
        case skcms_PixelFormat_RGB_hhh         >> 1: add_op(Op::load_hhh);        break;
2577
0
        case skcms_PixelFormat_RGBA_hhhh       >> 1: add_op(Op::load_hhhh);       break;
2578
0
        case skcms_PixelFormat_RGB_fff         >> 1: add_op(Op::load_fff);        break;
2579
0
        case skcms_PixelFormat_RGBA_ffff       >> 1: add_op(Op::load_ffff);       break;
2580
2581
0
        case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2582
0
            add_op(Op::load_8888);
2583
0
            add_op_ctx(Op::tf_rgb, skcms_sRGB_TransferFunction());
2584
0
            break;
2585
660k
    }
2586
660k
    if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2587
660k
        srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2588
0
        add_op(Op::clamp);
2589
0
    }
2590
660k
    if (srcFmt & 1) {
2591
522k
        add_op(Op::swap_rb);
2592
522k
    }
2593
660k
    skcms_ICCProfile gray_dst_profile;
2594
660k
    if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2595
        // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2596
        // luminance (Y) by the destination transfer function.
2597
100k
        gray_dst_profile = *dstProfile;
2598
100k
        skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2599
100k
        dstProfile = &gray_dst_profile;
2600
100k
    }
2601
2602
660k
    if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2603
        // Photoshop creates CMYK images as inverse CMYK.
2604
        // These happen to be the only ones we've _ever_ seen.
2605
0
        add_op(Op::invert);
2606
        // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2607
0
        srcAlpha = skcms_AlphaFormat_Unpremul;
2608
0
    }
2609
2610
660k
    if (srcAlpha == skcms_AlphaFormat_Opaque) {
2611
0
        add_op(Op::force_opaque);
2612
660k
    } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2613
522k
        add_op(Op::unpremul);
2614
522k
    }
2615
2616
660k
    if (dstProfile != srcProfile) {
2617
2618
138k
        if (!prep_for_destination(dstProfile,
2619
138k
                                  &from_xyz,
2620
138k
                                  &dst_curves[0].parametric,
2621
138k
                                  &dst_curves[1].parametric,
2622
138k
                                  &dst_curves[2].parametric)) {
2623
0
            return false;
2624
0
        }
2625
2626
138k
        if (srcProfile->has_A2B) {
2627
80.1k
            if (srcProfile->A2B.input_channels) {
2628
79.4k
                add_curve_ops(srcProfile->A2B.input_curves,
2629
79.4k
                              (int)srcProfile->A2B.input_channels);
2630
79.4k
                add_op(Op::clamp);
2631
79.4k
                add_op_ctx(Op::clut_A2B, &srcProfile->A2B);
2632
79.4k
            }
2633
2634
80.1k
            if (srcProfile->A2B.matrix_channels == 3) {
2635
1.43k
                add_curve_ops(srcProfile->A2B.matrix_curves, /*numChannels=*/3);
2636
2637
1.43k
                static const skcms_Matrix3x4 I = {{
2638
1.43k
                    {1,0,0,0},
2639
1.43k
                    {0,1,0,0},
2640
1.43k
                    {0,0,1,0},
2641
1.43k
                }};
2642
1.43k
                if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2643
1.43k
                    add_op_ctx(Op::matrix_3x4, &srcProfile->A2B.matrix);
2644
1.43k
                }
2645
1.43k
            }
2646
2647
80.1k
            if (srcProfile->A2B.output_channels == 3) {
2648
80.1k
                add_curve_ops(srcProfile->A2B.output_curves, /*numChannels=*/3);
2649
80.1k
            }
2650
2651
80.1k
            if (srcProfile->pcs == skcms_Signature_Lab) {
2652
0
                add_op(Op::lab_to_xyz);
2653
0
            }
2654
2655
80.1k
        } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2656
58.2k
            add_curve_ops(srcProfile->trc, /*numChannels=*/3);
2657
58.2k
        } else {
2658
0
            return false;
2659
0
        }
2660
2661
        // A2B sources are in XYZD50 by now, but TRC sources are still in their original gamut.
2662
138k
        assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2663
2664
138k
        if (dstProfile->has_B2A) {
2665
            // B2A needs its input in XYZD50, so transform TRC sources now.
2666
0
            if (!srcProfile->has_A2B) {
2667
0
                add_op_ctx(Op::matrix_3x3, &srcProfile->toXYZD50);
2668
0
            }
2669
2670
0
            if (dstProfile->pcs == skcms_Signature_Lab) {
2671
0
                add_op(Op::xyz_to_lab);
2672
0
            }
2673
2674
0
            if (dstProfile->B2A.input_channels == 3) {
2675
0
                add_curve_ops(dstProfile->B2A.input_curves, /*numChannels=*/3);
2676
0
            }
2677
2678
0
            if (dstProfile->B2A.matrix_channels == 3) {
2679
0
                static const skcms_Matrix3x4 I = {{
2680
0
                    {1,0,0,0},
2681
0
                    {0,1,0,0},
2682
0
                    {0,0,1,0},
2683
0
                }};
2684
0
                if (0 != memcmp(&I, &dstProfile->B2A.matrix, sizeof(I))) {
2685
0
                    add_op_ctx(Op::matrix_3x4, &dstProfile->B2A.matrix);
2686
0
                }
2687
2688
0
                add_curve_ops(dstProfile->B2A.matrix_curves, /*numChannels=*/3);
2689
0
            }
2690
2691
0
            if (dstProfile->B2A.output_channels) {
2692
0
                add_op(Op::clamp);
2693
0
                add_op_ctx(Op::clut_B2A, &dstProfile->B2A);
2694
2695
0
                add_curve_ops(dstProfile->B2A.output_curves,
2696
0
                              (int)dstProfile->B2A.output_channels);
2697
0
            }
2698
138k
        } else {
2699
            // This is a TRC destination.
2700
            // We'll concat any src->xyz matrix with our xyz->dst matrix into one src->dst matrix.
2701
            // (A2B sources are already in XYZD50, making that src->xyz matrix I.)
2702
138k
            static const skcms_Matrix3x3 I = {{
2703
138k
                { 1.0f, 0.0f, 0.0f },
2704
138k
                { 0.0f, 1.0f, 0.0f },
2705
138k
                { 0.0f, 0.0f, 1.0f },
2706
138k
            }};
2707
138k
            const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2708
2709
            // There's a chance the source and destination gamuts are identical,
2710
            // in which case we can skip the gamut transform.
2711
138k
            if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2712
                // Concat the entire gamut transform into from_xyz,
2713
                // now slightly misnamed but it's a handy spot to stash the result.
2714
65.4k
                from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2715
65.4k
                add_op_ctx(Op::matrix_3x3, &from_xyz);
2716
65.4k
            }
2717
2718
            // Encode back to dst RGB using its parametric transfer functions.
2719
138k
            OpAndArg oa[3];
2720
138k
            int numOps = select_curve_ops(dst_curves, /*numChannels=*/3, oa);
2721
261k
            for (int index = 0; index < numOps; ++index) {
2722
123k
                assert(oa[index].op != Op::table_r &&
2723
123k
                       oa[index].op != Op::table_g &&
2724
123k
                       oa[index].op != Op::table_b &&
2725
123k
                       oa[index].op != Op::table_a);
2726
123k
                add_op_ctx(oa[index].op, oa[index].arg);
2727
123k
            }
2728
138k
        }
2729
138k
    }
2730
2731
    // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2732
    // not just to values that fit in [0,1].
2733
    //
2734
    // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2735
    // but would be carrying r > 1, which is really unexpected for downstream consumers.
2736
660k
    if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2737
660k
        add_op(Op::clamp);
2738
660k
    }
2739
2740
660k
    if (dstProfile->data_color_space == skcms_Signature_CMYK) {
2741
        // Photoshop creates CMYK images as inverse CMYK.
2742
        // These happen to be the only ones we've _ever_ seen.
2743
0
        add_op(Op::invert);
2744
2745
        // CMYK has no alpha channel, so make sure dstAlpha is a no-op.
2746
0
        dstAlpha = skcms_AlphaFormat_Unpremul;
2747
0
    }
2748
2749
660k
    if (dstAlpha == skcms_AlphaFormat_Opaque) {
2750
0
        add_op(Op::force_opaque);
2751
660k
    } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2752
4.16k
        add_op(Op::premul);
2753
4.16k
    }
2754
660k
    if (dstFmt & 1) {
2755
29.8k
        add_op(Op::swap_rb);
2756
29.8k
    }
2757
660k
    switch (dstFmt >> 1) {
2758
0
        default: return false;
2759
0
        case skcms_PixelFormat_A_8             >> 1: add_op(Op::store_a8);         break;
2760
100k
        case skcms_PixelFormat_G_8             >> 1: add_op(Op::store_g8);         break;
2761
0
        case skcms_PixelFormat_ABGR_4444       >> 1: add_op(Op::store_4444);       break;
2762
0
        case skcms_PixelFormat_RGB_565         >> 1: add_op(Op::store_565);        break;
2763
7.93k
        case skcms_PixelFormat_RGB_888         >> 1: add_op(Op::store_888);        break;
2764
552k
        case skcms_PixelFormat_RGBA_8888       >> 1: add_op(Op::store_8888);       break;
2765
0
        case skcms_PixelFormat_RGBA_1010102    >> 1: add_op(Op::store_1010102);    break;
2766
0
        case skcms_PixelFormat_RGB_161616LE    >> 1: add_op(Op::store_161616LE);   break;
2767
0
        case skcms_PixelFormat_RGBA_16161616LE >> 1: add_op(Op::store_16161616LE); break;
2768
0
        case skcms_PixelFormat_RGB_161616BE    >> 1: add_op(Op::store_161616BE);   break;
2769
0
        case skcms_PixelFormat_RGBA_16161616BE >> 1: add_op(Op::store_16161616BE); break;
2770
0
        case skcms_PixelFormat_RGB_hhh_Norm    >> 1: add_op(Op::store_hhh);        break;
2771
0
        case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: add_op(Op::store_hhhh);       break;
2772
0
        case skcms_PixelFormat_RGB_101010x_XR  >> 1: add_op(Op::store_101010x_XR); break;
2773
0
        case skcms_PixelFormat_RGB_hhh         >> 1: add_op(Op::store_hhh);        break;
2774
0
        case skcms_PixelFormat_RGBA_hhhh       >> 1: add_op(Op::store_hhhh);       break;
2775
0
        case skcms_PixelFormat_RGB_fff         >> 1: add_op(Op::store_fff);        break;
2776
0
        case skcms_PixelFormat_RGBA_ffff       >> 1: add_op(Op::store_ffff);       break;
2777
2778
0
        case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2779
0
            add_op_ctx(Op::tf_rgb, skcms_sRGB_Inverse_TransferFunction());
2780
0
            add_op(Op::store_8888);
2781
0
            break;
2782
660k
    }
2783
2784
660k
    assert(ops      <= program + ARRAY_COUNT(program));
2785
660k
    assert(contexts <= context + ARRAY_COUNT(context));
2786
2787
660k
    auto run = baseline::run_program;
2788
660k
    switch (cpu_type()) {
2789
0
        case CpuType::SKX:
2790
0
            #if !defined(SKCMS_DISABLE_SKX)
2791
0
                run = skx::run_program;
2792
0
                break;
2793
0
            #endif
2794
2795
660k
        case CpuType::HSW:
2796
660k
            #if !defined(SKCMS_DISABLE_HSW)
2797
660k
                run = hsw::run_program;
2798
660k
                break;
2799
0
            #endif
2800
2801
0
        case CpuType::Baseline:
2802
0
            break;
2803
660k
    }
2804
2805
660k
    run(program, context, ops - program, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2806
660k
    return true;
2807
660k
}
2808
2809
0
static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2810
0
#if defined(NDEBUG)
2811
0
    (void)profile;
2812
#else
2813
    skcms_Matrix3x3 fromXYZD50;
2814
    skcms_TransferFunction invR, invG, invB;
2815
    assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2816
#endif
2817
0
}
2818
2819
0
bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2820
0
    if (!profile->has_B2A) {
2821
0
        skcms_Matrix3x3 fromXYZD50;
2822
0
        if (!profile->has_trc || !profile->has_toXYZD50
2823
0
            || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2824
0
            return false;
2825
0
        }
2826
2827
0
        skcms_TransferFunction tf[3];
2828
0
        for (int i = 0; i < 3; i++) {
2829
0
            skcms_TransferFunction inv;
2830
0
            if (profile->trc[i].table_entries == 0
2831
0
                && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2832
0
                tf[i] = profile->trc[i].parametric;
2833
0
                continue;
2834
0
            }
2835
2836
0
            float max_error;
2837
            // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2838
0
            if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2839
0
                return false;
2840
0
            }
2841
0
        }
2842
2843
0
        for (int i = 0; i < 3; ++i) {
2844
0
            profile->trc[i].table_entries = 0;
2845
0
            profile->trc[i].parametric = tf[i];
2846
0
        }
2847
0
    }
2848
0
    assert_usable_as_destination(profile);
2849
0
    return true;
2850
0
}
2851
2852
0
bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2853
    // Call skcms_MakeUsableAsDestination() with B2A disabled;
2854
    // on success that'll return a TRC/XYZ profile with three skcms_TransferFunctions.
2855
0
    skcms_ICCProfile result = *profile;
2856
0
    result.has_B2A = false;
2857
0
    if (!skcms_MakeUsableAsDestination(&result)) {
2858
0
        return false;
2859
0
    }
2860
2861
    // Of the three, pick the transfer function that best fits the other two.
2862
0
    int best_tf = 0;
2863
0
    float min_max_error = INFINITY_;
2864
0
    for (int i = 0; i < 3; i++) {
2865
0
        skcms_TransferFunction inv;
2866
0
        if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2867
0
            return false;
2868
0
        }
2869
2870
0
        float err = 0;
2871
0
        for (int j = 0; j < 3; ++j) {
2872
0
            err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
2873
0
        }
2874
0
        if (min_max_error > err) {
2875
0
            min_max_error = err;
2876
0
            best_tf = i;
2877
0
        }
2878
0
    }
2879
2880
0
    for (int i = 0; i < 3; i++) {
2881
0
        result.trc[i].parametric = result.trc[best_tf].parametric;
2882
0
    }
2883
2884
0
    *profile = result;
2885
0
    assert_usable_as_destination(profile);
2886
0
    return true;
2887
0
}