Coverage Report

Created: 2024-05-21 06:24

/src/libjxl/lib/jxl/enc_icc_codec.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_icc_codec.h"
7
8
#include <jxl/memory_manager.h>
9
10
#include <cstdint>
11
#include <limits>
12
#include <map>
13
#include <vector>
14
15
#include "lib/jxl/color_encoding_internal.h"
16
#include "lib/jxl/enc_ans.h"
17
#include "lib/jxl/enc_aux_out.h"
18
#include "lib/jxl/fields.h"
19
#include "lib/jxl/icc_codec_common.h"
20
#include "lib/jxl/padded_bytes.h"
21
22
namespace jxl {
23
namespace {
24
25
// Unshuffles or de-interleaves bytes, for example with width 2, turns
26
// "AaBbCcDc" into "ABCDabcd", this for example de-interleaves UTF-16 bytes into
27
// first all the high order bytes, then all the low order bytes.
28
// Transposes a matrix of width columns and ceil(size / width) rows. There are
29
// size elements, size may be < width * height, if so the
30
// last elements of the bottom row are missing, the missing spots are
31
// transposed along with the filled spots, and the result has the missing
32
// elements at the bottom of the rightmost column. The input is the input matrix
33
// in scanline order, the output is the result matrix in scanline order, with
34
// missing elements skipped over (this may occur at multiple positions).
35
void Unshuffle(JxlMemoryManager* memory_manager, uint8_t* data, size_t size,
36
34.3k
               size_t width) {
37
34.3k
  size_t height = (size + width - 1) / width;  // amount of rows of input
38
34.3k
  PaddedBytes result(memory_manager, size);
39
  // i = input index, j output index
40
34.3k
  size_t s = 0;
41
34.3k
  size_t j = 0;
42
25.2M
  for (size_t i = 0; i < size; i++) {
43
25.2M
    result[j] = data[i];
44
25.2M
    j += height;
45
25.2M
    if (j >= size) j = ++s;
46
25.2M
  }
47
48
25.2M
  for (size_t i = 0; i < size; i++) {
49
25.2M
    data[i] = result[i];
50
25.2M
  }
51
34.3k
}
52
53
// This is performed by the encoder, the encoder must be able to encode any
54
// random byte stream (not just byte streams that are a valid ICC profile), so
55
// an error returned by this function is an implementation error.
56
Status PredictAndShuffle(size_t stride, size_t width, int order, size_t num,
57
                         const uint8_t* data, size_t size, size_t* pos,
58
34.2k
                         PaddedBytes* result) {
59
34.2k
  JXL_RETURN_IF_ERROR(CheckOutOfBounds(*pos, num, size));
60
34.2k
  JxlMemoryManager* memory_manager = result->memory_manager();
61
  // Required by the specification, see decoder. stride * 4 must be < *pos.
62
34.2k
  if (!*pos || ((*pos - 1u) >> 2u) < stride) {
63
0
    return JXL_FAILURE("Invalid stride");
64
0
  }
65
34.2k
  if (*pos < stride * 4) return JXL_FAILURE("Too large stride");
66
34.2k
  size_t start = result->size();
67
21.4M
  for (size_t i = 0; i < num; i++) {
68
21.4M
    uint8_t predicted =
69
21.4M
        LinearPredictICCValue(data, *pos, i, stride, width, order);
70
21.4M
    result->push_back(data[*pos + i] - predicted);
71
21.4M
  }
72
34.2k
  *pos += num;
73
34.2k
  if (width > 1) Unshuffle(memory_manager, result->data() + start, num, width);
74
34.2k
  return true;
75
34.2k
}
76
77
13.6M
inline void EncodeVarInt(uint64_t value, PaddedBytes* data) {
78
13.6M
  size_t pos = data->size();
79
13.6M
  data->resize(data->size() + 9);
80
13.6M
  size_t output_size = data->size();
81
13.6M
  uint8_t* output = data->data();
82
83
  // While more than 7 bits of data are left,
84
  // store 7 bits and set the next byte flag
85
61.9M
  while (value > 127) {
86
    // TODO(eustas): should it be `<` ?
87
48.2M
    JXL_CHECK(pos <= output_size);
88
    // |128: Set the next byte flag
89
48.2M
    output[pos++] = (static_cast<uint8_t>(value & 127)) | 128;
90
    // Remove the seven bits we just wrote
91
48.2M
    value >>= 7;
92
48.2M
  }
93
  // TODO(eustas): should it be `<` ?
94
13.6M
  JXL_CHECK(pos <= output_size);
95
13.6M
  output[pos++] = static_cast<uint8_t>(value & 127);
96
97
13.6M
  data->resize(pos);
98
13.6M
}
99
100
constexpr size_t kSizeLimit = std::numeric_limits<uint32_t>::max() >> 2;
101
102
}  // namespace
103
104
// Outputs a transformed form of the given icc profile. The result itself is
105
// not particularly smaller than the input data in bytes, but it will be in a
106
// form that is easier to compress (more zeroes, ...) and will compress better
107
// with brotli.
108
2.95k
Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result) {
109
2.95k
  JxlMemoryManager* memory_manager = result->memory_manager();
110
2.95k
  PaddedBytes commands{memory_manager};
111
2.95k
  PaddedBytes data{memory_manager};
112
113
2.95k
  static_assert(sizeof(size_t) >= 4, "size_t is too short");
114
  // Fuzzer expects that PredictICC can accept any input,
115
  // but 1GB should be enough for any purpose.
116
2.95k
  if (size > kSizeLimit) {
117
0
    return JXL_FAILURE("ICC profile is too large");
118
0
  }
119
120
2.95k
  EncodeVarInt(size, result);
121
122
  // Header
123
2.95k
  PaddedBytes header{memory_manager};
124
2.95k
  header.append(ICCInitialHeaderPrediction());
125
2.95k
  EncodeUint32(0, size, &header);
126
371k
  for (size_t i = 0; i < kICCHeaderSize && i < size; i++) {
127
368k
    ICCPredictHeader(icc, size, header.data(), i);
128
368k
    data.push_back(icc[i] - header[i]);
129
368k
  }
130
2.95k
  if (size <= kICCHeaderSize) {
131
117
    EncodeVarInt(0, result);  // 0 commands
132
5.17k
    for (uint8_t b : data) {
133
5.17k
      result->push_back(b);
134
5.17k
    }
135
117
    return true;
136
117
  }
137
138
2.83k
  std::vector<Tag> tags;
139
2.83k
  std::vector<size_t> tagstarts;
140
2.83k
  std::vector<size_t> tagsizes;
141
2.83k
  std::map<size_t, size_t> tagmap;
142
143
  // Tag list
144
2.83k
  size_t pos = kICCHeaderSize;
145
2.83k
  if (pos + 4 <= size) {
146
2.83k
    uint64_t numtags = DecodeUint32(icc, size, pos);
147
2.83k
    pos += 4;
148
2.83k
    EncodeVarInt(numtags + 1, &commands);
149
2.83k
    uint64_t prevtagstart = kICCHeaderSize + numtags * 12;
150
2.83k
    uint32_t prevtagsize = 0;
151
7.30M
    for (size_t i = 0; i < numtags; i++) {
152
7.30M
      if (pos + 12 > size) break;
153
154
7.30M
      Tag tag = DecodeKeyword(icc, size, pos + 0);
155
7.30M
      uint32_t tagstart = DecodeUint32(icc, size, pos + 4);
156
7.30M
      uint32_t tagsize = DecodeUint32(icc, size, pos + 8);
157
7.30M
      pos += 12;
158
159
7.30M
      tags.push_back(tag);
160
7.30M
      tagstarts.push_back(tagstart);
161
7.30M
      tagsizes.push_back(tagsize);
162
7.30M
      tagmap[tagstart] = tags.size() - 1;
163
164
7.30M
      uint8_t tagcode = kCommandTagUnknown;
165
131M
      for (size_t j = 0; j < kNumTagStrings; j++) {
166
124M
        if (tag == *kTagStrings[j]) {
167
21.2k
          tagcode = j + kCommandTagStringFirst;
168
21.2k
          break;
169
21.2k
        }
170
124M
      }
171
172
7.30M
      if (tag == kRtrcTag && pos + 24 < size) {
173
6.28k
        bool ok = true;
174
6.28k
        ok &= DecodeKeyword(icc, size, pos + 0) == kGtrcTag;
175
6.28k
        ok &= DecodeKeyword(icc, size, pos + 12) == kBtrcTag;
176
6.28k
        if (ok) {
177
18.6k
          for (size_t kk = 0; kk < 8; kk++) {
178
16.6k
            if (icc[pos - 8 + kk] != icc[pos + 4 + kk]) ok = false;
179
16.6k
            if (icc[pos - 8 + kk] != icc[pos + 16 + kk]) ok = false;
180
16.6k
          }
181
2.07k
        }
182
6.28k
        if (ok) {
183
703
          tagcode = kCommandTagTRC;
184
703
          pos += 24;
185
703
          i += 2;
186
703
        }
187
6.28k
      }
188
189
7.30M
      if (tag == kRxyzTag && pos + 24 < size) {
190
2.76k
        bool ok = true;
191
2.76k
        ok &= DecodeKeyword(icc, size, pos + 0) == kGxyzTag;
192
2.76k
        ok &= DecodeKeyword(icc, size, pos + 12) == kBxyzTag;
193
2.76k
        uint32_t offsetr = tagstart;
194
2.76k
        uint32_t offsetg = DecodeUint32(icc, size, pos + 4);
195
2.76k
        uint32_t offsetb = DecodeUint32(icc, size, pos + 16);
196
2.76k
        uint32_t sizer = tagsize;
197
2.76k
        uint32_t sizeg = DecodeUint32(icc, size, pos + 8);
198
2.76k
        uint32_t sizeb = DecodeUint32(icc, size, pos + 20);
199
2.76k
        ok &= sizer == 20;
200
2.76k
        ok &= sizeg == 20;
201
2.76k
        ok &= sizeb == 20;
202
2.76k
        ok &= (offsetg == offsetr + 20);
203
2.76k
        ok &= (offsetb == offsetr + 40);
204
2.76k
        if (ok) {
205
405
          tagcode = kCommandTagXYZ;
206
405
          pos += 24;
207
405
          i += 2;
208
405
        }
209
2.76k
      }
210
211
7.30M
      uint8_t command = tagcode;
212
7.30M
      uint64_t predicted_tagstart = prevtagstart + prevtagsize;
213
7.30M
      if (predicted_tagstart != tagstart) command |= kFlagBitOffset;
214
7.30M
      size_t predicted_tagsize = prevtagsize;
215
7.30M
      if (tag == kRxyzTag || tag == kGxyzTag || tag == kBxyzTag ||
216
7.30M
          tag == kKxyzTag || tag == kWtptTag || tag == kBkptTag ||
217
7.30M
          tag == kLumiTag) {
218
7.46k
        predicted_tagsize = 20;
219
7.46k
      }
220
7.30M
      if (predicted_tagsize != tagsize) command |= kFlagBitSize;
221
7.30M
      commands.push_back(command);
222
7.30M
      if (tagcode == 1) {
223
7.28M
        AppendKeyword(tag, &data);
224
7.28M
      }
225
7.30M
      if (command & kFlagBitOffset) EncodeVarInt(tagstart, &commands);
226
7.30M
      if (command & kFlagBitSize) EncodeVarInt(tagsize, &commands);
227
228
7.30M
      prevtagstart = tagstart;
229
7.30M
      prevtagsize = tagsize;
230
7.30M
    }
231
2.83k
  }
232
  // Indicate end of tag list or varint indicating there's none
233
2.83k
  commands.push_back(0);
234
235
  // Main content
236
  // The main content in a valid ICC profile contains tagged elements, with the
237
  // tag types (4 letter names) given by the tag list above, and the tag list
238
  // pointing to the start and indicating the size of each tagged element. It is
239
  // allowed for tagged elements to overlap, e.g. the curve for R, G and B could
240
  // all point to the same one.
241
2.83k
  Tag tag;
242
2.83k
  size_t tagstart = 0;
243
2.83k
  size_t tagsize = 0;
244
2.83k
  size_t clutstart = 0;
245
246
  // Should always check tag_sane before doing math with tagsize.
247
953k
  const auto tag_sane = [&tagsize]() {
248
953k
    return (tagsize > 8) && (tagsize < kSizeLimit);
249
953k
  };
250
251
2.83k
  size_t last0 = pos;
252
  // This loop appends commands to the output, processing some sub-section of a
253
  // current tagged element each time. We need to keep track of the tagtype of
254
  // the current element, and update it when we encounter the boundary of a
255
  // next one.
256
  // It is not required that the input data is a valid ICC profile, if the
257
  // encoder does not recognize the data it will still be able to output bytes
258
  // but will not predict as well.
259
34.3M
  while (pos <= size) {
260
34.3M
    size_t last1 = pos;
261
34.3M
    PaddedBytes commands_add{memory_manager};
262
34.3M
    PaddedBytes data_add{memory_manager};
263
264
    // This means the loop brought the position beyond the tag end.
265
    // If tagsize is nonsensical, any pos looks "ok-ish".
266
34.3M
    if ((pos > tagstart + tagsize) && (tagsize < kSizeLimit)) {
267
15.6M
      tag = {{0, 0, 0, 0}};  // nonsensical value
268
15.6M
    }
269
270
34.3M
    if (commands_add.empty() && data_add.empty() && tagmap.count(pos) &&
271
34.3M
        pos + 4 <= size) {
272
12.8k
      size_t index = tagmap[pos];
273
12.8k
      tag = DecodeKeyword(icc, size, pos);
274
12.8k
      tagstart = tagstarts[index];
275
12.8k
      tagsize = tagsizes[index];
276
277
12.8k
      if (tag == kMlucTag && tag_sane() && pos + tagsize <= size &&
278
12.8k
          icc[pos + 4] == 0 && icc[pos + 5] == 0 && icc[pos + 6] == 0 &&
279
12.8k
          icc[pos + 7] == 0) {
280
126
        size_t num = tagsize - 8;
281
126
        commands_add.push_back(kCommandTypeStartFirst + 3);
282
126
        pos += 8;
283
126
        commands_add.push_back(kCommandShuffle2);
284
126
        EncodeVarInt(num, &commands_add);
285
126
        size_t start = data_add.size();
286
3.78M
        for (size_t i = 0; i < num; i++) {
287
3.78M
          data_add.push_back(icc[pos]);
288
3.78M
          pos++;
289
3.78M
        }
290
126
        Unshuffle(memory_manager, data_add.data() + start, num, 2);
291
126
      }
292
293
12.8k
      if (tag == kCurvTag && tag_sane() && pos + tagsize <= size &&
294
12.8k
          icc[pos + 4] == 0 && icc[pos + 5] == 0 && icc[pos + 6] == 0 &&
295
12.8k
          icc[pos + 7] == 0) {
296
97
        size_t num = tagsize - 8;
297
97
        if (num > 16 && num < (1 << 28) && pos + num <= size && pos > 0) {
298
81
          commands_add.push_back(kCommandTypeStartFirst + 5);
299
81
          pos += 8;
300
81
          commands_add.push_back(kCommandPredict);
301
81
          int order = 1;
302
81
          int width = 2;
303
81
          int stride = width;
304
81
          commands_add.push_back((order << 2) | (width - 1));
305
81
          EncodeVarInt(num, &commands_add);
306
81
          JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
307
81
                                                size, &pos, &data_add));
308
81
        }
309
97
      }
310
12.8k
    }
311
312
34.3M
    if (tag == kMab_Tag || tag == kMba_Tag) {
313
9.21M
      Tag subTag = DecodeKeyword(icc, size, pos);
314
9.21M
      if (pos + 12 < size && (subTag == kCurvTag || subTag == kVcgtTag) &&
315
9.21M
          DecodeUint32(icc, size, pos + 4) == 0) {
316
64.2k
        uint32_t num = DecodeUint32(icc, size, pos + 8) * 2;
317
64.2k
        if (num > 16 && num < (1 << 28) && pos + 12 + num <= size) {
318
33.9k
          pos += 12;
319
33.9k
          last1 = pos;
320
33.9k
          commands_add.push_back(kCommandPredict);
321
33.9k
          int order = 1;
322
33.9k
          int width = 2;
323
33.9k
          int stride = width;
324
33.9k
          commands_add.push_back((order << 2) | (width - 1));
325
33.9k
          EncodeVarInt(num, &commands_add);
326
33.9k
          JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
327
33.9k
                                                size, &pos, &data_add));
328
33.9k
        }
329
64.2k
      }
330
331
9.21M
      if (pos == tagstart + 24 && pos + 4 < size) {
332
        // Note that this value can be remembered for next iterations of the
333
        // loop, so the "pos == clutstart" if below can trigger during a later
334
        // iteration.
335
764
        clutstart = tagstart + DecodeUint32(icc, size, pos);
336
764
      }
337
338
9.21M
      if (pos == clutstart && clutstart + 16 < size) {
339
386
        size_t numi = icc[tagstart + 8];
340
386
        size_t numo = icc[tagstart + 9];
341
386
        size_t width = icc[clutstart + 16];
342
386
        size_t stride = width * numo;
343
386
        size_t num = width * numo;
344
8.41k
        for (size_t i = 0; i < numi && clutstart + i < size; i++) {
345
8.03k
          num *= icc[clutstart + i];
346
8.03k
        }
347
386
        if ((width == 1 || width == 2) && num > 64 && num < (1 << 28) &&
348
386
            pos + num <= size && pos > stride * 4) {
349
109
          commands_add.push_back(kCommandPredict);
350
109
          int order = 1;
351
109
          uint8_t flags =
352
109
              (order << 2) | (width - 1) | (stride == width ? 0 : 16);
353
109
          commands_add.push_back(flags);
354
109
          if (flags & 16) EncodeVarInt(stride, &commands_add);
355
109
          EncodeVarInt(num, &commands_add);
356
109
          JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
357
109
                                                size, &pos, &data_add));
358
109
        }
359
386
      }
360
9.21M
    }
361
362
34.3M
    if (commands_add.empty() && data_add.empty() && tag == kGbd_Tag &&
363
34.3M
        tag_sane() && pos == tagstart + 8 && pos + tagsize - 8 <= size &&
364
34.3M
        pos > 16) {
365
96
      size_t width = 4;
366
96
      size_t order = 0;
367
96
      size_t stride = width;
368
96
      size_t num = tagsize - 8;
369
96
      uint8_t flags = (order << 2) | (width - 1) | (stride == width ? 0 : 16);
370
96
      commands_add.push_back(kCommandPredict);
371
96
      commands_add.push_back(flags);
372
96
      if (flags & 16) EncodeVarInt(stride, &commands_add);
373
96
      EncodeVarInt(num, &commands_add);
374
96
      JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
375
96
                                            size, &pos, &data_add));
376
96
    }
377
378
34.3M
    if (commands_add.empty() && data_add.empty() && pos + 20 <= size) {
379
34.2M
      Tag subTag = DecodeKeyword(icc, size, pos);
380
34.2M
      if (subTag == kXyz_Tag && DecodeUint32(icc, size, pos + 4) == 0) {
381
11.7k
        commands_add.push_back(kCommandXYZ);
382
11.7k
        pos += 8;
383
152k
        for (size_t j = 0; j < 12; j++) data_add.push_back(icc[pos++]);
384
11.7k
      }
385
34.2M
    }
386
387
34.3M
    if (commands_add.empty() && data_add.empty() && pos + 8 <= size) {
388
34.2M
      if (DecodeUint32(icc, size, pos + 4) == 0) {
389
6.68M
        Tag subTag = DecodeKeyword(icc, size, pos);
390
57.9M
        for (size_t i = 0; i < kNumTypeStrings; i++) {
391
51.9M
          if (subTag == *kTypeStrings[i]) {
392
621k
            commands_add.push_back(kCommandTypeStartFirst + i);
393
621k
            pos += 8;
394
621k
            break;
395
621k
          }
396
51.9M
        }
397
6.68M
      }
398
34.2M
    }
399
400
34.3M
    if (!(commands_add.empty() && data_add.empty()) || pos == size) {
401
670k
      if (last0 < last1) {
402
248k
        commands.push_back(kCommandInsert);
403
248k
        EncodeVarInt(last1 - last0, &commands);
404
34.3M
        while (last0 < last1) {
405
34.0M
          data.push_back(icc[last0++]);
406
34.0M
        }
407
248k
      }
408
737k
      for (uint8_t b : commands_add) {
409
737k
        commands.push_back(b);
410
737k
      }
411
25.3M
      for (uint8_t b : data_add) {
412
25.3M
        data.push_back(b);
413
25.3M
      }
414
670k
      last0 = pos;
415
670k
    }
416
34.3M
    if (commands_add.empty() && data_add.empty()) {
417
33.6M
      pos++;
418
33.6M
    }
419
34.3M
  }
420
421
2.83k
  EncodeVarInt(commands.size(), result);
422
70.1M
  for (uint8_t b : commands) {
423
70.1M
    result->push_back(b);
424
70.1M
  }
425
88.9M
  for (uint8_t b : data) {
426
88.9M
    result->push_back(b);
427
88.9M
  }
428
429
2.83k
  return true;
430
2.83k
}
431
432
Status WriteICC(const IccBytes& icc, BitWriter* JXL_RESTRICT writer,
433
0
                size_t layer, AuxOut* JXL_RESTRICT aux_out) {
434
0
  if (icc.empty()) return JXL_FAILURE("ICC must be non-empty");
435
0
  JxlMemoryManager* memory_manager = writer->memory_manager();
436
0
  PaddedBytes enc{memory_manager};
437
0
  JXL_RETURN_IF_ERROR(PredictICC(icc.data(), icc.size(), &enc));
438
0
  std::vector<std::vector<Token>> tokens(1);
439
0
  BitWriter::Allotment allotment(writer, 128);
440
0
  JXL_RETURN_IF_ERROR(U64Coder::Write(enc.size(), writer));
441
0
  allotment.ReclaimAndCharge(writer, layer, aux_out);
442
443
0
  for (size_t i = 0; i < enc.size(); i++) {
444
0
    tokens[0].emplace_back(
445
0
        ICCANSContext(i, i > 0 ? enc[i - 1] : 0, i > 1 ? enc[i - 2] : 0),
446
0
        enc[i]);
447
0
  }
448
0
  HistogramParams params;
449
0
  params.lz77_method = enc.size() < 4096 ? HistogramParams::LZ77Method::kOptimal
450
0
                                         : HistogramParams::LZ77Method::kLZ77;
451
0
  EntropyEncodingData code;
452
0
  std::vector<uint8_t> context_map;
453
0
  params.force_huffman = true;
454
0
  BuildAndEncodeHistograms(memory_manager, params, kNumICCContexts, tokens,
455
0
                           &code, &context_map, writer, layer, aux_out);
456
0
  WriteTokens(tokens[0], code, context_map, 0, writer, layer, aux_out);
457
0
  return true;
458
0
}
459
460
}  // namespace jxl