Coverage Report

Created: 2025-06-16 07:00

/src/libjxl/lib/jxl/enc_patch_dictionary.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_patch_dictionary.h"
7
8
#include <jxl/cms_interface.h>
9
#include <jxl/memory_manager.h>
10
#include <jxl/types.h>
11
12
#include <algorithm>
13
#include <atomic>
14
#include <cmath>
15
#include <cstdint>
16
#include <cstdlib>
17
#include <utility>
18
#include <vector>
19
20
#include "lib/jxl/base/common.h"
21
#include "lib/jxl/base/compiler_specific.h"
22
#include "lib/jxl/base/data_parallel.h"
23
#include "lib/jxl/base/override.h"
24
#include "lib/jxl/base/printf_macros.h"
25
#include "lib/jxl/base/random.h"
26
#include "lib/jxl/base/rect.h"
27
#include "lib/jxl/base/span.h"
28
#include "lib/jxl/base/status.h"
29
#include "lib/jxl/common.h"
30
#include "lib/jxl/dec_cache.h"
31
#include "lib/jxl/dec_frame.h"
32
#include "lib/jxl/dec_patch_dictionary.h"
33
#include "lib/jxl/enc_ans.h"
34
#include "lib/jxl/enc_ans_params.h"
35
#include "lib/jxl/enc_aux_out.h"
36
#include "lib/jxl/enc_bit_writer.h"
37
#include "lib/jxl/enc_cache.h"
38
#include "lib/jxl/enc_debug_image.h"
39
#include "lib/jxl/enc_dot_dictionary.h"
40
#include "lib/jxl/enc_frame.h"
41
#include "lib/jxl/enc_params.h"
42
#include "lib/jxl/frame_header.h"
43
#include "lib/jxl/image.h"
44
#include "lib/jxl/image_bundle.h"
45
#include "lib/jxl/image_ops.h"
46
#include "lib/jxl/modular/options.h"
47
#include "lib/jxl/pack_signed.h"
48
#include "lib/jxl/patch_dictionary_internal.h"
49
50
namespace jxl {
51
52
static constexpr size_t kPatchFrameReferenceId = 3;
53
54
// static
55
Status PatchDictionaryEncoder::Encode(const PatchDictionary& pdic,
56
                                      BitWriter* writer, LayerType layer,
57
97
                                      AuxOut* aux_out) {
58
97
  JXL_ENSURE(pdic.HasAny());
59
97
  JxlMemoryManager* memory_manager = writer->memory_manager();
60
97
  std::vector<std::vector<Token>> tokens(1);
61
62
2.34M
  auto add_num = [&](int context, size_t num) {
63
2.34M
    tokens[0].emplace_back(context, num);
64
2.34M
  };
65
97
  size_t num_ref_patch = 0;
66
16.4k
  for (size_t i = 0; i < pdic.positions_.size();) {
67
16.3k
    size_t ref_pos_idx = pdic.positions_[i].ref_pos_idx;
68
764k
    while (i < pdic.positions_.size() &&
69
764k
           pdic.positions_[i].ref_pos_idx == ref_pos_idx) {
70
747k
      i++;
71
747k
    }
72
16.3k
    num_ref_patch++;
73
16.3k
  }
74
97
  add_num(kNumRefPatchContext, num_ref_patch);
75
97
  size_t blend_pos = 0;
76
97
  size_t blending_stride = pdic.blendings_stride_;
77
  // blending_stride == num_ec + 1; num_ec > 1 =>
78
97
  bool choose_alpha = (blending_stride > 1 + 1);
79
16.4k
  for (size_t i = 0; i < pdic.positions_.size();) {
80
16.3k
    size_t i_start = i;
81
16.3k
    size_t ref_pos_idx = pdic.positions_[i].ref_pos_idx;
82
16.3k
    const auto& ref_pos = pdic.ref_positions_[ref_pos_idx];
83
764k
    while (i < pdic.positions_.size() &&
84
764k
           pdic.positions_[i].ref_pos_idx == ref_pos_idx) {
85
747k
      i++;
86
747k
    }
87
16.3k
    size_t num = i - i_start;
88
16.3k
    JXL_ENSURE(num > 0);
89
16.3k
    add_num(kReferenceFrameContext, ref_pos.ref);
90
16.3k
    add_num(kPatchReferencePositionContext, ref_pos.x0);
91
16.3k
    add_num(kPatchReferencePositionContext, ref_pos.y0);
92
16.3k
    add_num(kPatchSizeContext, ref_pos.xsize - 1);
93
16.3k
    add_num(kPatchSizeContext, ref_pos.ysize - 1);
94
16.3k
    add_num(kPatchCountContext, num - 1);
95
764k
    for (size_t j = i_start; j < i; j++) {
96
747k
      const PatchPosition& pos = pdic.positions_[j];
97
747k
      if (j == i_start) {
98
16.3k
        add_num(kPatchPositionContext, pos.x);
99
16.3k
        add_num(kPatchPositionContext, pos.y);
100
731k
      } else {
101
731k
        add_num(kPatchOffsetContext,
102
731k
                PackSigned(pos.x - pdic.positions_[j - 1].x));
103
731k
        add_num(kPatchOffsetContext,
104
731k
                PackSigned(pos.y - pdic.positions_[j - 1].y));
105
731k
      }
106
1.49M
      for (size_t k = 0; k < blending_stride; ++k, ++blend_pos) {
107
747k
        const PatchBlending& info = pdic.blendings_[blend_pos];
108
747k
        add_num(kPatchBlendModeContext, static_cast<uint32_t>(info.mode));
109
747k
        if (UsesAlpha(info.mode) && choose_alpha) {
110
0
          add_num(kPatchAlphaChannelContext, info.alpha_channel);
111
0
        }
112
747k
        if (UsesClamp(info.mode)) {
113
0
          add_num(kPatchClampContext, TO_JXL_BOOL(info.clamp));
114
0
        }
115
747k
      }
116
747k
    }
117
16.3k
  }
118
119
97
  EntropyEncodingData codes;
120
97
  JXL_ASSIGN_OR_RETURN(
121
97
      size_t cost, BuildAndEncodeHistograms(memory_manager, HistogramParams(),
122
97
                                            kNumPatchDictionaryContexts, tokens,
123
97
                                            &codes, writer, layer, aux_out));
124
97
  (void)cost;
125
97
  JXL_RETURN_IF_ERROR(WriteTokens(tokens[0], codes, 0, writer, layer, aux_out));
126
97
  return true;
127
97
}
128
129
// static
130
Status PatchDictionaryEncoder::SubtractFrom(const PatchDictionary& pdic,
131
283
                                            Image3F* opsin) {
132
  // TODO(veluca): this can likely be optimized knowing it runs on full images.
133
65.3k
  for (size_t y = 0; y < opsin->ysize(); y++) {
134
65.1k
    float* JXL_RESTRICT rows[3] = {
135
65.1k
        opsin->PlaneRow(0, y),
136
65.1k
        opsin->PlaneRow(1, y),
137
65.1k
        opsin->PlaneRow(2, y),
138
65.1k
    };
139
65.1k
    size_t blending_stride = pdic.blendings_stride_;
140
1.31M
    for (size_t pos_idx : pdic.GetPatchesForRow(y)) {
141
1.31M
      const size_t blending_idx = pos_idx * blending_stride;
142
1.31M
      const PatchPosition& pos = pdic.positions_[pos_idx];
143
1.31M
      const PatchReferencePosition& ref_pos =
144
1.31M
          pdic.ref_positions_[pos.ref_pos_idx];
145
1.31M
      const PatchBlendMode mode = pdic.blendings_[blending_idx].mode;
146
1.31M
      size_t by = pos.y;
147
1.31M
      size_t bx = pos.x;
148
1.31M
      size_t xsize = ref_pos.xsize;
149
1.31M
      JXL_ENSURE(y >= by);
150
1.31M
      JXL_ENSURE(y < by + ref_pos.ysize);
151
1.31M
      size_t iy = y - by;
152
1.31M
      size_t ref = ref_pos.ref;
153
1.31M
      const float* JXL_RESTRICT ref_rows[3] = {
154
1.31M
          pdic.reference_frames_->at(ref).frame->color()->ConstPlaneRow(
155
1.31M
              0, ref_pos.y0 + iy) +
156
1.31M
              ref_pos.x0,
157
1.31M
          pdic.reference_frames_->at(ref).frame->color()->ConstPlaneRow(
158
1.31M
              1, ref_pos.y0 + iy) +
159
1.31M
              ref_pos.x0,
160
1.31M
          pdic.reference_frames_->at(ref).frame->color()->ConstPlaneRow(
161
1.31M
              2, ref_pos.y0 + iy) +
162
1.31M
              ref_pos.x0,
163
1.31M
      };
164
5.01M
      for (size_t ix = 0; ix < xsize; ix++) {
165
14.7M
        for (size_t c = 0; c < 3; c++) {
166
11.0M
          if (mode == PatchBlendMode::kAdd) {
167
11.0M
            rows[c][bx + ix] -= ref_rows[c][ix];
168
11.0M
          } else if (mode == PatchBlendMode::kReplace) {
169
0
            rows[c][bx + ix] = 0;
170
0
          } else if (mode == PatchBlendMode::kNone) {
171
            // Nothing to do.
172
0
          } else {
173
0
            return JXL_UNREACHABLE("blending mode %u not yet implemented",
174
0
                                   static_cast<uint32_t>(mode));
175
0
          }
176
11.0M
        }
177
3.69M
      }
178
1.31M
    }
179
65.1k
  }
180
283
  return true;
181
283
}
182
183
namespace {
184
185
struct PatchColorspaceInfo {
186
  float kChannelDequant[3];
187
  float kChannelWeights[3];
188
189
186
  explicit PatchColorspaceInfo(bool is_xyb) {
190
186
    if (is_xyb) {
191
186
      kChannelDequant[0] = 0.01615;
192
186
      kChannelDequant[1] = 0.08875;
193
186
      kChannelDequant[2] = 0.1922;
194
186
      kChannelWeights[0] = 30.0;
195
186
      kChannelWeights[1] = 3.0;
196
186
      kChannelWeights[2] = 1.0;
197
186
    } else {
198
0
      kChannelDequant[0] = 20.0f / 255;
199
0
      kChannelDequant[1] = 22.0f / 255;
200
0
      kChannelDequant[2] = 20.0f / 255;
201
0
      kChannelWeights[0] = 0.017 * 255;
202
0
      kChannelWeights[1] = 0.02 * 255;
203
0
      kChannelWeights[2] = 0.017 * 255;
204
0
    }
205
186
  }
206
207
12.1M
  float ScaleForQuantization(float val, size_t c) {
208
12.1M
    return val / kChannelDequant[c];
209
12.1M
  }
210
211
12.1M
  int Quantize(float val, size_t c) {
212
12.1M
    return std::trunc(ScaleForQuantization(val, c));
213
12.1M
  }
214
215
151M
  bool is_similar_v(const float v1[3], const float v2[3], float threshold) {
216
151M
    float distance = 0;
217
605M
    for (size_t c = 0; c < 3; c++) {
218
453M
      distance += std::abs(v1[c] - v2[c]) * kChannelWeights[c];
219
453M
    }
220
151M
    return distance <= threshold;
221
151M
  }
222
};
223
224
StatusOr<std::vector<PatchInfo>> FindTextLikePatches(
225
    const CompressParams& cparams, const Image3F& opsin,
226
    const PassesEncoderState* JXL_RESTRICT state, ThreadPool* pool,
227
283
    AuxOut* aux_out, bool is_xyb) {
228
283
  std::vector<PatchInfo> info;
229
283
  if (state->cparams.patches == Override::kOff) return info;
230
186
  const auto& frame_dim = state->shared.frame_dim;
231
186
  JxlMemoryManager* memory_manager = opsin.memory_manager();
232
233
186
  PatchColorspaceInfo pci(is_xyb);
234
186
  float kSimilarThreshold = 0.8f;
235
236
186
  auto is_similar_impl = [&pci](std::pair<uint32_t, uint32_t> p1,
237
186
                                std::pair<uint32_t, uint32_t> p2,
238
186
                                const float* JXL_RESTRICT rows[3],
239
124M
                                size_t stride, float threshold) {
240
124M
    float v1[3];
241
124M
    float v2[3];
242
497M
    for (size_t c = 0; c < 3; c++) {
243
373M
      v1[c] = rows[c][p1.second * stride + p1.first];
244
373M
      v2[c] = rows[c][p2.second * stride + p2.first];
245
373M
    }
246
124M
    return pci.is_similar_v(v1, v2, threshold);
247
124M
  };
248
249
186
  std::atomic<uint32_t> has_screenshot_areas{0};
250
186
  const size_t opsin_stride = opsin.PixelsPerRow();
251
186
  const float* JXL_RESTRICT opsin_rows[3] = {opsin.ConstPlaneRow(0, 0),
252
186
                                             opsin.ConstPlaneRow(1, 0),
253
186
                                             opsin.ConstPlaneRow(2, 0)};
254
255
186
  auto is_same = [&opsin_rows, opsin_stride](std::pair<uint32_t, uint32_t> p1,
256
87.1M
                                             std::pair<uint32_t, uint32_t> p2) {
257
252M
    for (auto& opsin_row : opsin_rows) {
258
252M
      float v1 = opsin_row[p1.second * opsin_stride + p1.first];
259
252M
      float v2 = opsin_row[p2.second * opsin_stride + p2.first];
260
252M
      if (std::fabs(v1 - v2) > 1e-4) {
261
5.99M
        return false;
262
5.99M
      }
263
252M
    }
264
81.1M
    return true;
265
87.1M
  };
266
267
186
  auto is_similar = [&](std::pair<uint32_t, uint32_t> p1,
268
108M
                        std::pair<uint32_t, uint32_t> p2) {
269
108M
    return is_similar_impl(p1, p2, opsin_rows, opsin_stride, kSimilarThreshold);
270
108M
  };
271
272
186
  constexpr int64_t kPatchSide = 4;
273
186
  constexpr int64_t kExtraSide = 4;
274
275
  // Look for kPatchSide size squares, naturally aligned, that all have the same
276
  // pixel values.
277
186
  JXL_ASSIGN_OR_RETURN(
278
186
      ImageB is_screenshot_like,
279
186
      ImageB::Create(memory_manager, DivCeil(frame_dim.xsize, kPatchSide),
280
186
                     DivCeil(frame_dim.ysize, kPatchSide)));
281
186
  ZeroFillImage(&is_screenshot_like);
282
186
  uint8_t* JXL_RESTRICT screenshot_row = is_screenshot_like.Row(0);
283
186
  const size_t screenshot_stride = is_screenshot_like.PixelsPerRow();
284
186
  const auto process_row = [&](const uint32_t y,
285
14.4k
                               size_t /* thread */) -> Status {
286
1.57M
    for (uint64_t x = 0; x < frame_dim.xsize / kPatchSide; x++) {
287
1.56M
      bool all_same = true;
288
7.80M
      for (size_t iy = 0; iy < static_cast<size_t>(kPatchSide); iy++) {
289
18.2M
        for (size_t ix = 0; ix < static_cast<size_t>(kPatchSide); ix++) {
290
15.9M
          size_t cx = x * kPatchSide + ix;
291
15.9M
          size_t cy = y * kPatchSide + iy;
292
15.9M
          if (!is_same({cx, cy}, {x * kPatchSide, y * kPatchSide})) {
293
3.94M
            all_same = false;
294
3.94M
            break;
295
3.94M
          }
296
15.9M
        }
297
6.24M
      }
298
1.56M
      if (!all_same) continue;
299
282k
      size_t num = 0;
300
282k
      size_t num_same = 0;
301
3.67M
      for (int64_t iy = -kExtraSide; iy < kExtraSide + kPatchSide; iy++) {
302
44.0M
        for (int64_t ix = -kExtraSide; ix < kExtraSide + kPatchSide; ix++) {
303
40.7M
          int64_t cx = x * kPatchSide + ix;
304
40.7M
          int64_t cy = y * kPatchSide + iy;
305
40.7M
          if (cx < 0 || static_cast<uint64_t>(cx) >= frame_dim.xsize ||  //
306
40.7M
              cy < 0 || static_cast<uint64_t>(cy) >= frame_dim.ysize) {
307
778k
            continue;
308
778k
          }
309
39.9M
          num++;
310
39.9M
          if (is_same({cx, cy}, {x * kPatchSide, y * kPatchSide})) num_same++;
311
39.9M
        }
312
3.39M
      }
313
      // Too few equal pixels nearby.
314
282k
      if (num_same * 8 < num * 7) continue;
315
247k
      screenshot_row[y * screenshot_stride + x] = 1;
316
247k
      has_screenshot_areas = 1;
317
247k
    }
318
14.4k
    return true;
319
14.4k
  };
320
186
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, frame_dim.ysize / kPatchSide,
321
186
                                ThreadPool::NoInit, process_row,
322
186
                                "IsScreenshotLike"));
323
324
  // TODO(veluca): also parallelize the rest of this function.
325
186
  if (WantDebugOutput(cparams)) {
326
0
    JXL_RETURN_IF_ERROR(
327
0
        DumpPlaneNormalized(cparams, "screenshot_like", is_screenshot_like));
328
0
  }
329
330
186
  constexpr int kSearchRadius = 1;
331
332
186
  if (!ApplyOverride(state->cparams.patches, has_screenshot_areas)) {
333
57
    return info;
334
57
  }
335
336
  // Search for "similar enough" pixels near the screenshot-like areas.
337
258
  JXL_ASSIGN_OR_RETURN(
338
258
      ImageB is_background,
339
258
      ImageB::Create(memory_manager, frame_dim.xsize, frame_dim.ysize));
340
258
  ZeroFillImage(&is_background);
341
258
  JXL_ASSIGN_OR_RETURN(
342
129
      Image3F background,
343
129
      Image3F::Create(memory_manager, frame_dim.xsize, frame_dim.ysize));
344
129
  ZeroFillImage(&background);
345
129
  constexpr size_t kDistanceLimit = 50;
346
129
  float* JXL_RESTRICT background_rows[3] = {
347
129
      background.PlaneRow(0, 0),
348
129
      background.PlaneRow(1, 0),
349
129
      background.PlaneRow(2, 0),
350
129
  };
351
129
  const size_t background_stride = background.PixelsPerRow();
352
129
  uint8_t* JXL_RESTRICT is_background_row = is_background.Row(0);
353
129
  const size_t is_background_stride = is_background.PixelsPerRow();
354
129
  std::vector<
355
129
      std::pair<std::pair<uint32_t, uint32_t>, std::pair<uint32_t, uint32_t>>>
356
129
      queue;
357
129
  size_t queue_front = 0;
358
44.1k
  for (size_t y = 0; y < frame_dim.ysize; y++) {
359
19.2M
    for (size_t x = 0; x < frame_dim.xsize; x++) {
360
19.2M
      if (!screenshot_row[screenshot_stride * (y / kPatchSide) +
361
19.2M
                          (x / kPatchSide)])
362
15.2M
        continue;
363
3.95M
      queue.push_back({{x, y}, {x, y}});
364
3.95M
    }
365
44.0k
  }
366
49.7M
  while (queue.size() != queue_front) {
367
49.7M
    std::pair<uint32_t, uint32_t> cur = queue[queue_front].first;
368
49.7M
    std::pair<uint32_t, uint32_t> src = queue[queue_front].second;
369
49.7M
    queue_front++;
370
49.7M
    if (is_background_row[cur.second * is_background_stride + cur.first])
371
36.0M
      continue;
372
13.6M
    is_background_row[cur.second * is_background_stride + cur.first] = 1;
373
54.7M
    for (size_t c = 0; c < 3; c++) {
374
41.0M
      background_rows[c][cur.second * background_stride + cur.first] =
375
41.0M
          opsin_rows[c][src.second * opsin_stride + src.first];
376
41.0M
    }
377
54.7M
    for (int dx = -kSearchRadius; dx <= kSearchRadius; dx++) {
378
164M
      for (int dy = -kSearchRadius; dy <= kSearchRadius; dy++) {
379
123M
        if (dx == 0 && dy == 0) continue;
380
109M
        int next_first = cur.first + dx;
381
109M
        int next_second = cur.second + dy;
382
109M
        if (next_first < 0 || next_second < 0 ||
383
109M
            static_cast<uint32_t>(next_first) >= frame_dim.xsize ||
384
109M
            static_cast<uint32_t>(next_second) >= frame_dim.ysize) {
385
385k
          continue;
386
385k
        }
387
109M
        if (static_cast<uint32_t>(
388
109M
                std::abs(next_first - static_cast<int>(src.first)) +
389
109M
                std::abs(next_second - static_cast<int>(src.second))) >
390
109M
            kDistanceLimit) {
391
176k
          continue;
392
176k
        }
393
108M
        std::pair<uint32_t, uint32_t> next{next_first, next_second};
394
108M
        if (is_similar(src, next)) {
395
91.6M
          if (!screenshot_row[next.second / kPatchSide * screenshot_stride +
396
91.6M
                              next.first / kPatchSide] ||
397
91.6M
              is_same(src, next)) {
398
91.6M
            if (!is_background_row[next.second * is_background_stride +
399
91.6M
                                   next.first])
400
45.8M
              queue.emplace_back(next, src);
401
91.6M
          }
402
91.6M
        }
403
108M
      }
404
41.0M
    }
405
13.6M
  }
406
129
  queue.clear();
407
408
129
  ImageF ccs;
409
129
  Rng rng(0);
410
129
  bool paint_ccs = false;
411
129
  if (WantDebugOutput(cparams)) {
412
0
    JXL_RETURN_IF_ERROR(
413
0
        DumpPlaneNormalized(cparams, "is_background", is_background));
414
0
    if (is_xyb) {
415
0
      JXL_RETURN_IF_ERROR(DumpXybImage(cparams, "background", background));
416
0
    } else {
417
0
      JXL_RETURN_IF_ERROR(DumpImage(cparams, "background", background));
418
0
    }
419
0
    JXL_ASSIGN_OR_RETURN(
420
0
        ccs, ImageF::Create(memory_manager, frame_dim.xsize, frame_dim.ysize));
421
0
    ZeroFillImage(&ccs);
422
0
    paint_ccs = true;
423
0
  }
424
425
129
  constexpr float kVerySimilarThreshold = 0.03f;
426
129
  constexpr float kHasSimilarThreshold = 0.03f;
427
428
129
  const float* JXL_RESTRICT const_background_rows[3] = {
429
129
      background_rows[0], background_rows[1], background_rows[2]};
430
15.5M
  auto is_similar_b = [&](std::pair<int, int> p1, std::pair<int, int> p2) {
431
15.5M
    return is_similar_impl(p1, p2, const_background_rows, background_stride,
432
15.5M
                           kVerySimilarThreshold);
433
15.5M
  };
434
435
129
  constexpr int kMinPeak = 2;
436
129
  constexpr int kHasSimilarRadius = 2;
437
438
  // Find small CC outside the "similar enough" areas, compute bounding boxes,
439
  // and run heuristics to exclude some patches.
440
129
  JXL_ASSIGN_OR_RETURN(
441
129
      ImageB visited,
442
129
      ImageB::Create(memory_manager, frame_dim.xsize, frame_dim.ysize));
443
129
  ZeroFillImage(&visited);
444
129
  uint8_t* JXL_RESTRICT visited_row = visited.Row(0);
445
129
  const size_t visited_stride = visited.PixelsPerRow();
446
129
  std::vector<std::pair<uint32_t, uint32_t>> cc;
447
129
  std::vector<std::pair<uint32_t, uint32_t>> stack;
448
44.1k
  for (size_t y = 0; y < frame_dim.ysize; y++) {
449
19.2M
    for (size_t x = 0; x < frame_dim.xsize; x++) {
450
19.2M
      if (is_background_row[y * is_background_stride + x]) continue;
451
5.54M
      cc.clear();
452
5.54M
      stack.clear();
453
5.54M
      stack.emplace_back(x, y);
454
5.54M
      size_t min_x = x;
455
5.54M
      size_t max_x = x;
456
5.54M
      size_t min_y = y;
457
5.54M
      size_t max_y = y;
458
5.54M
      std::pair<uint32_t, uint32_t> reference;
459
5.54M
      bool found_border = false;
460
5.54M
      bool all_similar = true;
461
38.9M
      while (!stack.empty()) {
462
33.4M
        std::pair<uint32_t, uint32_t> cur = stack.back();
463
33.4M
        stack.pop_back();
464
33.4M
        if (visited_row[cur.second * visited_stride + cur.first]) continue;
465
5.54M
        visited_row[cur.second * visited_stride + cur.first] = 1;
466
5.54M
        if (cur.first < min_x) min_x = cur.first;
467
5.54M
        if (cur.first > max_x) max_x = cur.first;
468
5.54M
        if (cur.second < min_y) min_y = cur.second;
469
5.54M
        if (cur.second > max_y) max_y = cur.second;
470
5.54M
        if (paint_ccs) {
471
0
          cc.push_back(cur);
472
0
        }
473
22.1M
        for (int dx = -kSearchRadius; dx <= kSearchRadius; dx++) {
474
66.5M
          for (int dy = -kSearchRadius; dy <= kSearchRadius; dy++) {
475
49.9M
            if (dx == 0 && dy == 0) continue;
476
44.3M
            int next_first = static_cast<int32_t>(cur.first) + dx;
477
44.3M
            int next_second = static_cast<int32_t>(cur.second) + dy;
478
44.3M
            if (next_first < 0 || next_second < 0 ||
479
44.3M
                static_cast<uint32_t>(next_first) >= frame_dim.xsize ||
480
44.3M
                static_cast<uint32_t>(next_second) >= frame_dim.ysize) {
481
172k
              continue;
482
172k
            }
483
44.1M
            std::pair<uint32_t, uint32_t> next{next_first, next_second};
484
44.1M
            if (!is_background_row[next.second * is_background_stride +
485
44.1M
                                   next.first]) {
486
27.8M
              stack.push_back(next);
487
27.8M
            } else {
488
16.3M
              if (!found_border) {
489
760k
                reference = next;
490
760k
                found_border = true;
491
15.5M
              } else {
492
15.5M
                if (!is_similar_b(next, reference)) all_similar = false;
493
15.5M
              }
494
16.3M
            }
495
44.1M
          }
496
16.6M
        }
497
5.54M
      }
498
5.54M
      if (!found_border || !all_similar || max_x - min_x >= kMaxPatchSize ||
499
5.54M
          max_y - min_y >= kMaxPatchSize) {
500
4.78M
        continue;
501
4.78M
      }
502
756k
      size_t bpos = background_stride * reference.second + reference.first;
503
756k
      float ref[3] = {background_rows[0][bpos], background_rows[1][bpos],
504
756k
                      background_rows[2][bpos]};
505
756k
      bool has_similar = false;
506
756k
      for (size_t iy = std::max<int>(
507
756k
               static_cast<int32_t>(min_y) - kHasSimilarRadius, 0);
508
5.11M
           iy < std::min(max_y + kHasSimilarRadius + 1, frame_dim.ysize);
509
4.36M
           iy++) {
510
4.36M
        for (size_t ix = std::max<int>(
511
4.36M
                 static_cast<int32_t>(min_x) - kHasSimilarRadius, 0);
512
31.2M
             ix < std::min(max_x + kHasSimilarRadius + 1, frame_dim.xsize);
513
26.8M
             ix++) {
514
26.8M
          size_t opos = opsin_stride * iy + ix;
515
26.8M
          float px[3] = {opsin_rows[0][opos], opsin_rows[1][opos],
516
26.8M
                         opsin_rows[2][opos]};
517
26.8M
          if (pci.is_similar_v(ref, px, kHasSimilarThreshold)) {
518
20.7M
            has_similar = true;
519
20.7M
          }
520
26.8M
        }
521
4.36M
      }
522
756k
      if (!has_similar) continue;
523
756k
      info.emplace_back();
524
756k
      info.back().second.emplace_back(min_x, min_y);
525
756k
      QuantizedPatch& patch = info.back().first;
526
756k
      patch.xsize = max_x - min_x + 1;
527
756k
      patch.ysize = max_y - min_y + 1;
528
756k
      int max_value = 0;
529
2.26M
      for (size_t c : {1, 0, 2}) {
530
6.32M
        for (size_t iy = min_y; iy <= max_y; iy++) {
531
16.2M
          for (size_t ix = min_x; ix <= max_x; ix++) {
532
12.1M
            size_t offset = (iy - min_y) * patch.xsize + ix - min_x;
533
12.1M
            patch.fpixels[c][offset] =
534
12.1M
                opsin_rows[c][iy * opsin_stride + ix] - ref[c];
535
12.1M
            int val = pci.Quantize(patch.fpixels[c][offset], c);
536
12.1M
            patch.pixels[c][offset] = val;
537
12.1M
            if (std::abs(val) > max_value) max_value = std::abs(val);
538
12.1M
          }
539
4.06M
        }
540
2.26M
      }
541
756k
      if (max_value < kMinPeak) {
542
1.22k
        info.pop_back();
543
1.22k
        continue;
544
1.22k
      }
545
755k
      if (paint_ccs) {
546
0
        float cc_color = rng.UniformF(0.5, 1.0);
547
0
        for (std::pair<uint32_t, uint32_t> p : cc) {
548
0
          ccs.Row(p.second)[p.first] = cc_color;
549
0
        }
550
0
      }
551
755k
    }
552
44.0k
  }
553
554
129
  if (paint_ccs) {
555
0
    JXL_ENSURE(WantDebugOutput(cparams));
556
0
    JXL_RETURN_IF_ERROR(DumpPlaneNormalized(cparams, "ccs", ccs));
557
0
  }
558
129
  if (info.empty()) {
559
26
    return info;
560
26
  }
561
562
  // Remove duplicates.
563
103
  constexpr size_t kMinPatchOccurrences = 2;
564
103
  std::sort(info.begin(), info.end());
565
103
  size_t unique = 0;
566
755k
  for (size_t i = 1; i < info.size(); i++) {
567
754k
    if (info[i].first == info[unique].first) {
568
732k
      info[unique].second.insert(info[unique].second.end(),
569
732k
                                 info[i].second.begin(), info[i].second.end());
570
732k
    } else {
571
22.4k
      if (info[unique].second.size() >= kMinPatchOccurrences) {
572
16.3k
        unique++;
573
16.3k
      }
574
22.4k
      info[unique] = info[i];
575
22.4k
    }
576
754k
  }
577
103
  if (info[unique].second.size() >= kMinPatchOccurrences) {
578
34
    unique++;
579
34
  }
580
103
  info.resize(unique);
581
582
103
  size_t max_patch_size = 0;
583
584
16.4k
  for (const auto& patch : info) {
585
16.4k
    size_t pixels = patch.first.xsize * patch.first.ysize;
586
16.4k
    if (pixels > max_patch_size) max_patch_size = pixels;
587
16.4k
  }
588
589
  // don't use patches if all patches are smaller than this
590
103
  constexpr size_t kMinMaxPatchSize = 20;
591
103
  if (max_patch_size < kMinMaxPatchSize) {
592
6
    info.clear();
593
6
  }
594
595
103
  return info;
596
129
}
597
598
}  // namespace
599
600
Status FindBestPatchDictionary(const Image3F& opsin,
601
                               PassesEncoderState* JXL_RESTRICT state,
602
                               const JxlCmsInterface& cms, ThreadPool* pool,
603
283
                               AuxOut* aux_out, bool is_xyb) {
604
283
  JXL_ASSIGN_OR_RETURN(
605
283
      std::vector<PatchInfo> info,
606
283
      FindTextLikePatches(state->cparams, opsin, state, pool, aux_out, is_xyb));
607
283
  JxlMemoryManager* memory_manager = opsin.memory_manager();
608
609
  // TODO(veluca): this doesn't work if both dots and patches are enabled.
610
  // For now, since dots and patches are not likely to occur in the same kind of
611
  // images, disable dots if some patches were found.
612
283
  if (info.empty() &&
613
283
      ApplyOverride(
614
186
          state->cparams.dots,
615
186
          state->cparams.speed_tier <= SpeedTier::kSquirrel &&
616
186
              state->cparams.butteraugli_distance >= kMinButteraugliForDots &&
617
186
              !state->cparams.disable_perceptual_optimizations)) {
618
0
    Rect rect(0, 0, state->shared.frame_dim.xsize,
619
0
              state->shared.frame_dim.ysize);
620
0
    JXL_ASSIGN_OR_RETURN(info,
621
0
                         FindDotDictionary(state->cparams, opsin, rect,
622
0
                                           state->shared.cmap.base(), pool));
623
0
  }
624
625
283
  if (info.empty()) return true;
626
627
97
  std::sort(
628
82.6k
      info.begin(), info.end(), [&](const PatchInfo& a, const PatchInfo& b) {
629
82.6k
        return a.first.xsize * a.first.ysize > b.first.xsize * b.first.ysize;
630
82.6k
      });
631
632
97
  size_t max_x_size = 0;
633
97
  size_t max_y_size = 0;
634
97
  size_t total_pixels = 0;
635
636
16.3k
  for (const auto& patch : info) {
637
16.3k
    size_t pixels = patch.first.xsize * patch.first.ysize;
638
16.3k
    if (max_x_size < patch.first.xsize) max_x_size = patch.first.xsize;
639
16.3k
    if (max_y_size < patch.first.ysize) max_y_size = patch.first.ysize;
640
16.3k
    total_pixels += pixels;
641
16.3k
  }
642
643
  // Bin-packing & conversion of patches.
644
97
  constexpr float kBinPackingSlackness = 1.05f;
645
97
  size_t ref_xsize = std::max<float>(max_x_size, std::sqrt(total_pixels));
646
97
  size_t ref_ysize = std::max<float>(max_y_size, std::sqrt(total_pixels));
647
97
  std::vector<std::pair<size_t, size_t>> ref_positions(info.size());
648
  // TODO(veluca): allow partial overlaps of patches that have the same pixels.
649
97
  size_t max_y = 0;
650
108
  do {
651
108
    max_y = 0;
652
    // Increase packed image size.
653
108
    ref_xsize = ref_xsize * kBinPackingSlackness + 1;
654
108
    ref_ysize = ref_ysize * kBinPackingSlackness + 1;
655
656
108
    JXL_ASSIGN_OR_RETURN(ImageB occupied,
657
108
                         ImageB::Create(memory_manager, ref_xsize, ref_ysize));
658
108
    ZeroFillImage(&occupied);
659
108
    uint8_t* JXL_RESTRICT occupied_rows = occupied.Row(0);
660
108
    size_t occupied_stride = occupied.PixelsPerRow();
661
662
108
    bool success = true;
663
    // For every patch...
664
16.5k
    for (size_t patch = 0; patch < info.size(); patch++) {
665
16.4k
      size_t x0 = 0;
666
16.4k
      size_t y0 = 0;
667
16.4k
      size_t xsize = info[patch].first.xsize;
668
16.4k
      size_t ysize = info[patch].first.ysize;
669
16.4k
      bool found = false;
670
      // For every possible start position ...
671
941k
      for (; y0 + ysize <= ref_ysize; y0++) {
672
941k
        x0 = 0;
673
80.8M
        for (; x0 + xsize <= ref_xsize; x0++) {
674
79.9M
          bool has_occupied_pixel = false;
675
79.9M
          size_t x = x0;
676
          // Check if it is possible to place the patch in this position in the
677
          // reference frame.
678
334M
          for (size_t y = y0; y < y0 + ysize; y++) {
679
254M
            x = x0;
680
273M
            for (; x < x0 + xsize; x++) {
681
271M
              if (occupied_rows[y * occupied_stride + x]) {
682
252M
                has_occupied_pixel = true;
683
252M
                break;
684
252M
              }
685
271M
            }
686
254M
          }  // end of positioning check
687
79.9M
          if (!has_occupied_pixel) {
688
16.4k
            found = true;
689
16.4k
            break;
690
16.4k
          }
691
79.9M
          x0 = x;  // Jump to next pixel after the occupied one.
692
79.9M
        }
693
941k
        if (found) break;
694
941k
      }  // end of start position checking
695
696
      // We didn't find a possible position: repeat from the beginning with a
697
      // larger reference frame size.
698
16.4k
      if (!found) {
699
11
        success = false;
700
11
        break;
701
11
      }
702
703
      // We found a position: mark the corresponding positions in the reference
704
      // image as used.
705
16.4k
      ref_positions[patch] = {x0, y0};
706
81.6k
      for (size_t y = y0; y < y0 + ysize; y++) {
707
567k
        for (size_t x = x0; x < x0 + xsize; x++) {
708
502k
          occupied_rows[y * occupied_stride + x] = JXL_TRUE;
709
502k
        }
710
65.1k
      }
711
16.4k
      max_y = std::max(max_y, y0 + ysize);
712
16.4k
    }
713
714
108
    if (success) break;
715
108
  } while (true);
716
717
97
  JXL_ENSURE(ref_ysize >= max_y);
718
719
97
  ref_ysize = max_y;
720
721
97
  JXL_ASSIGN_OR_RETURN(Image3F reference_frame,
722
97
                       Image3F::Create(memory_manager, ref_xsize, ref_ysize));
723
  // TODO(veluca): figure out a better way to fill the image.
724
97
  ZeroFillImage(&reference_frame);
725
97
  std::vector<PatchPosition> positions;
726
97
  std::vector<PatchReferencePosition> pref_positions;
727
97
  std::vector<PatchBlending> blendings;
728
97
  float* JXL_RESTRICT ref_rows[3] = {
729
97
      reference_frame.PlaneRow(0, 0),
730
97
      reference_frame.PlaneRow(1, 0),
731
97
      reference_frame.PlaneRow(2, 0),
732
97
  };
733
97
  size_t ref_stride = reference_frame.PixelsPerRow();
734
97
  size_t num_ec = state->shared.metadata->m.num_extra_channels;
735
736
16.4k
  for (size_t i = 0; i < info.size(); i++) {
737
16.3k
    PatchReferencePosition ref_pos;
738
16.3k
    ref_pos.xsize = info[i].first.xsize;
739
16.3k
    ref_pos.ysize = info[i].first.ysize;
740
16.3k
    ref_pos.x0 = ref_positions[i].first;
741
16.3k
    ref_pos.y0 = ref_positions[i].second;
742
16.3k
    ref_pos.ref = kPatchFrameReferenceId;
743
80.9k
    for (size_t y = 0; y < ref_pos.ysize; y++) {
744
556k
      for (size_t x = 0; x < ref_pos.xsize; x++) {
745
1.96M
        for (size_t c = 0; c < 3; c++) {
746
1.47M
          ref_rows[c][(y + ref_pos.y0) * ref_stride + x + ref_pos.x0] =
747
1.47M
              info[i].first.fpixels[c][y * ref_pos.xsize + x];
748
1.47M
        }
749
491k
      }
750
64.5k
    }
751
747k
    for (const auto& pos : info[i].second) {
752
747k
      JXL_DEBUG_V(4, "Patch %" PRIuS "x%" PRIuS " at position %u,%u",
753
747k
                  ref_pos.xsize, ref_pos.ysize, pos.first, pos.second);
754
747k
      positions.emplace_back(
755
747k
          PatchPosition{pos.first, pos.second, pref_positions.size()});
756
      // Add blending for color channels, ignore other channels.
757
747k
      blendings.push_back({PatchBlendMode::kAdd, 0, false});
758
747k
      for (size_t j = 0; j < num_ec; ++j) {
759
0
        blendings.push_back({PatchBlendMode::kNone, 0, false});
760
0
      }
761
747k
    }
762
16.3k
    pref_positions.emplace_back(ref_pos);
763
16.3k
  }
764
765
97
  CompressParams cparams = state->cparams;
766
  // Recursive application of patches could create very weird issues.
767
97
  cparams.patches = Override::kOff;
768
769
97
  if (WantDebugOutput(cparams)) {
770
0
    if (is_xyb) {
771
0
      JXL_RETURN_IF_ERROR(
772
0
          DumpXybImage(cparams, "patch_reference", reference_frame));
773
0
    } else {
774
0
      JXL_RETURN_IF_ERROR(
775
0
          DumpImage(cparams, "patch_reference", reference_frame));
776
0
    }
777
0
  }
778
779
97
  JXL_RETURN_IF_ERROR(RoundtripPatchFrame(&reference_frame, state,
780
97
                                          kPatchFrameReferenceId, cparams, cms,
781
97
                                          pool, aux_out, /*subtract=*/true));
782
783
  // TODO(veluca): this assumes that applying patches is commutative, which is
784
  // not true for all blending modes. This code only produces kAdd patches, so
785
  // this works out.
786
97
  PatchDictionaryEncoder::SetPositions(
787
97
      &state->shared.image_features.patches, std::move(positions),
788
97
      std::move(pref_positions), std::move(blendings), num_ec + 1);
789
97
  return true;
790
97
}
791
792
Status RoundtripPatchFrame(Image3F* reference_frame,
793
                           PassesEncoderState* JXL_RESTRICT state, int idx,
794
                           CompressParams& cparams, const JxlCmsInterface& cms,
795
97
                           ThreadPool* pool, AuxOut* aux_out, bool subtract) {
796
97
  JxlMemoryManager* memory_manager = state->memory_manager();
797
97
  FrameInfo patch_frame_info;
798
97
  cparams.resampling = 1;
799
97
  cparams.ec_resampling = 1;
800
97
  cparams.dots = Override::kOff;
801
97
  cparams.noise = Override::kOff;
802
97
  cparams.modular_mode = true;
803
97
  cparams.responsive = 0;
804
97
  cparams.progressive_dc = 0;
805
97
  cparams.progressive_mode = Override::kOff;
806
97
  cparams.qprogressive_mode = Override::kOff;
807
  // Use gradient predictor and not Predictor::Best.
808
97
  cparams.options.predictor = Predictor::Gradient;
809
97
  patch_frame_info.save_as_reference = idx;  // always saved.
810
97
  patch_frame_info.frame_type = FrameType::kReferenceOnly;
811
97
  patch_frame_info.save_before_color_transform = true;
812
97
  ImageBundle ib(memory_manager, &state->shared.metadata->m);
813
  // TODO(veluca): metadata.color_encoding is a lie: ib is in XYB, but there is
814
  // no simple way to express that yet.
815
97
  patch_frame_info.ib_needs_color_transform = false;
816
97
  JXL_RETURN_IF_ERROR(ib.SetFromImage(
817
97
      std::move(*reference_frame), state->shared.metadata->m.color_encoding));
818
97
  if (!ib.metadata()->extra_channel_info.empty()) {
819
    // Add placeholder extra channels to the patch image: patch encoding does
820
    // not yet support extra channels, but the codec expects that the amount of
821
    // extra channels in frames matches that in the metadata of the codestream.
822
0
    std::vector<ImageF> extra_channels;
823
0
    extra_channels.reserve(ib.metadata()->extra_channel_info.size());
824
0
    for (size_t i = 0; i < ib.metadata()->extra_channel_info.size(); i++) {
825
0
      JXL_ASSIGN_OR_RETURN(
826
0
          ImageF ch, ImageF::Create(memory_manager, ib.xsize(), ib.ysize()));
827
0
      extra_channels.emplace_back(std::move(ch));
828
      // Must initialize the image with data to not affect blending with
829
      // uninitialized memory.
830
      // TODO(lode): patches must copy and use the real extra channels instead.
831
0
      ZeroFillImage(&extra_channels.back());
832
0
    }
833
0
    JXL_RETURN_IF_ERROR(ib.SetExtraChannels(std::move(extra_channels)));
834
0
  }
835
97
  auto special_frame = jxl::make_unique<BitWriter>(memory_manager);
836
97
  AuxOut patch_aux_out;
837
97
  JXL_RETURN_IF_ERROR(EncodeFrame(
838
97
      memory_manager, cparams, patch_frame_info, state->shared.metadata, ib,
839
97
      cms, pool, special_frame.get(), aux_out ? &patch_aux_out : nullptr));
840
97
  if (aux_out) {
841
0
    for (const auto& l : patch_aux_out.layers) {
842
0
      aux_out->layer(LayerType::Dictionary).Assimilate(l);
843
0
    }
844
0
  }
845
97
  const Span<const uint8_t> encoded = special_frame->GetSpan();
846
97
  state->special_frames.emplace_back(std::move(special_frame));
847
97
  if (subtract) {
848
97
    ImageBundle decoded(memory_manager, &state->shared.metadata->m);
849
97
    auto dec_state = jxl::make_unique<PassesDecoderState>(memory_manager);
850
97
    JXL_RETURN_IF_ERROR(dec_state->output_encoding_info.SetFromMetadata(
851
97
        *state->shared.metadata));
852
97
    const uint8_t* frame_start = encoded.data();
853
97
    size_t encoded_size = encoded.size();
854
97
    JXL_RETURN_IF_ERROR(DecodeFrame(
855
97
        dec_state.get(), pool, frame_start, encoded_size,
856
97
        /*frame_header=*/nullptr, &decoded, *state->shared.metadata));
857
97
    frame_start += decoded.decoded_bytes();
858
97
    encoded_size -= decoded.decoded_bytes();
859
97
    size_t ref_xsize =
860
97
        dec_state->shared_storage.reference_frames[idx].frame->color()->xsize();
861
    // if the frame itself uses patches, we need to decode another frame
862
97
    if (!ref_xsize) {
863
0
      JXL_RETURN_IF_ERROR(DecodeFrame(
864
0
          dec_state.get(), pool, frame_start, encoded_size,
865
0
          /*frame_header=*/nullptr, &decoded, *state->shared.metadata));
866
0
    }
867
97
    JXL_ENSURE(encoded_size == 0);
868
97
    state->shared.reference_frames[idx] =
869
97
        std::move(dec_state->shared_storage.reference_frames[idx]);
870
97
  } else {
871
0
    *state->shared.reference_frames[idx].frame = std::move(ib);
872
0
  }
873
97
  return true;
874
97
}
875
876
}  // namespace jxl