Coverage Report

Created: 2026-06-14 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_modular_simd.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_modular_simd.h"
7
8
#include <jxl/memory_manager.h>
9
10
#include <algorithm>
11
#include <array>
12
#include <cstddef>
13
#include <cstdint>
14
15
#include "lib/jxl/base/common.h"
16
#include "lib/jxl/base/compiler_specific.h"
17
#include "lib/jxl/base/status.h"
18
#include "lib/jxl/dec_ans.h"
19
#include "lib/jxl/enc_ans_params.h"
20
#include "lib/jxl/memory_manager_internal.h"
21
#include "lib/jxl/modular/modular_image.h"
22
23
#undef HWY_TARGET_INCLUDE
24
#define HWY_TARGET_INCLUDE "lib/jxl/enc_modular_simd.cc"
25
#include <hwy/foreach_target.h>
26
#include <hwy/highway.h>
27
28
#if HWY_TARGET == HWY_SCALAR
29
#include "lib/jxl/modular/encoding/context_predict.h"
30
#include "lib/jxl/pack_signed.h"
31
#endif
32
33
HWY_BEFORE_NAMESPACE();
34
namespace jxl {
35
namespace HWY_NAMESPACE {
36
37
// These templates are not found via ADL.
38
using hwy::HWY_NAMESPACE::Add;
39
using hwy::HWY_NAMESPACE::And;
40
using hwy::HWY_NAMESPACE::Ge;
41
using hwy::HWY_NAMESPACE::GetLane;
42
using hwy::HWY_NAMESPACE::Gt;
43
using hwy::HWY_NAMESPACE::IfThenElse;
44
using hwy::HWY_NAMESPACE::IfThenElseZero;
45
using hwy::HWY_NAMESPACE::Iota;
46
using hwy::HWY_NAMESPACE::Load;
47
using hwy::HWY_NAMESPACE::LoadU;
48
using hwy::HWY_NAMESPACE::Lt;
49
using hwy::HWY_NAMESPACE::Max;
50
using hwy::HWY_NAMESPACE::Min;
51
using hwy::HWY_NAMESPACE::Mul;
52
using hwy::HWY_NAMESPACE::Not;
53
using hwy::HWY_NAMESPACE::Set;
54
using hwy::HWY_NAMESPACE::ShiftLeft;
55
using hwy::HWY_NAMESPACE::ShiftRight;
56
using hwy::HWY_NAMESPACE::Store;
57
using hwy::HWY_NAMESPACE::StoreU;
58
using hwy::HWY_NAMESPACE::Sub;
59
using hwy::HWY_NAMESPACE::Xor;
60
using hwy::HWY_NAMESPACE::Zero;
61
62
0
StatusOr<float> EstimateCost(const Image& img) {
63
0
  size_t histo_cost = 0;
64
0
  float histo_cost_frac = 0.0f;
65
0
  size_t extra_bits = 0;
66
67
0
#if HWY_TARGET == HWY_SCALAR
68
0
  HybridUintConfig config;
69
0
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
70
0
                        47, 63, 95, 127, 191, 255, 392, 500};
71
0
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
72
0
  Histogram histo[nc] = {};
73
0
  for (const Channel& ch : img.channel) {
74
0
    const ptrdiff_t onerow = ch.plane.PixelsPerRow();
75
0
    for (size_t y = 0; y < ch.h; y++) {
76
0
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
77
0
      for (size_t x = 0; x < ch.w; x++) {
78
0
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
79
0
        pixel_type_w top = (y ? *(r + x - onerow) : left);
80
0
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
81
0
        size_t max_diff =
82
0
            std::max({left, top, topleft}) - std::min({left, top, topleft});
83
0
        size_t ctx = 0;
84
0
        for (uint32_t c : cutoffs) {
85
0
          ctx += (max_diff < c) ? 1 : 0;
86
0
        }
87
0
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
88
0
        uint32_t token;
89
0
        uint32_t nbits;
90
0
        uint32_t bits;
91
0
        config.Encode(PackSigned(res), &token, &nbits, &bits);
92
0
        histo[ctx].Add(token);
93
0
        extra_bits += nbits;
94
0
      }
95
0
    }
96
0
    for (auto& h : histo) {
97
0
      float f_cost = h.ShannonEntropy();
98
0
      size_t i_cost = f_cost;
99
0
      histo_cost += i_cost;
100
0
      histo_cost_frac += f_cost - i_cost;
101
0
      h.Clear();
102
0
    }
103
0
  }
104
#else
105
  JxlMemoryManager* memory_manager = img.memory_manager();
106
  const auto& ctx_map = estimate_cost_detail::ContextMap();
107
  const HWY_FULL(int32_t) di;
108
  const HWY_FULL(uint32_t) du;
109
  const HWY_FULL(float) df;
110
  const auto kOne = Set(du, 1);
111
  const auto kSplit = Set(du, 16);
112
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
113
  const auto kTokenBias = Set(du, 8);
114
  const auto kTokenMul = Set(du, 4);
115
  const auto kMsbMask = Set(du, 3);
116
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
117
  const auto kLanes = Set(du, Lanes(du));
118
  const auto kIota = Iota(du, 0);
119
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
120
  constexpr size_t kLargeShiftVal = 10;
121
  const auto kLargeShift = Set(du, kLargeShiftVal);
122
123
  size_t max_w = 0;
124
  for (const Channel& ch : img.channel) {
125
    if (ch.h == 0) continue;
126
    max_w = std::max(max_w, ch.w);
127
  }
128
  max_w = RoundUpTo(max_w, Lanes(du));
129
  max_w = std::max(max_w, 2 * Lanes(du));
130
131
  JXL_ASSIGN_OR_RETURN(
132
      AlignedMemory buffer,
133
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
134
  uint32_t* max_diff_row = buffer.address<uint32_t>();
135
  uint32_t* token_row = max_diff_row + max_w;
136
  int32_t* primer = buffer.address<int32_t>();
137
  int32_t* top_primer = primer + max_w;
138
139
  HybridUintConfig config;
140
141
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
142
  auto extra_bits_lanes = Zero(du);
143
  for (const Channel& ch : img.channel) {
144
    if (ch.h == 0 || ch.w == 0) continue;
145
    for (auto& h : histo) {
146
      h.EnsureCapacity(32 * 4);
147
    }
148
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
149
    const pixel_type* JXL_RESTRICT last = primer;
150
    primer[0] = 0;
151
    StoreU(Load(di, r), di, primer + 1);
152
    auto pos = kIota;
153
    const auto last_pos = Set(du, ch.w);
154
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
155
      const auto left = LoadU(di, last);
156
      const auto central = Load(di, r + x);
157
      const auto ures = BitCast(du, Sub(central, left));
158
      const auto packed =
159
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
160
      const auto is_large = Gt(packed, kLargeThreshold);
161
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
162
      const auto not_literal = Ge(packed, kSplit);
163
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
164
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
165
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
166
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
167
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
168
                             And(ShiftRight<21>(v), kMsbMask));
169
      const auto tail_mask = Lt(pos, last_pos);
170
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
171
      const auto token_fixed = IfThenElse(not_literal, token, packed);
172
      extra_bits_lanes =
173
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
174
      Store(token_fixed, du, token_row + x);
175
      pos = Add(pos, kLanes);
176
      last = r + x + Lanes(di) - 1;
177
    }
178
    for (size_t x = 0; x < ch.w; x++) {
179
      histo[0].FastAdd(token_row[x]);
180
    }
181
    for (size_t y = 1; y < ch.h; y++) {
182
      r = ch.Row(y);
183
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
184
      last = primer;
185
      primer[0] = t[0];
186
      StoreU(Load(di, r), di, primer + 1);
187
      top_primer[0] = t[0];
188
      StoreU(Load(di, t), di, top_primer + 1);
189
      const pixel_type* JXL_RESTRICT top_last = top_primer;
190
      pos = kIota;
191
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
192
        const auto left = LoadU(di, last);
193
        const auto central = Load(di, r + x);
194
        const auto topleft = LoadU(di, top_last);
195
        const auto top = Load(di, t + x);
196
        const auto l_ge_t = Ge(left, top);
197
        const auto m = IfThenElse(l_ge_t, top, left);
198
        const auto M = IfThenElse(l_ge_t, left, top);
199
        const auto maxx = Max(topleft, M);
200
        const auto minn = Min(topleft, m);
201
        const auto max_diff = BitCast(du, Sub(maxx, minn));
202
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
203
        const auto overshoot = Lt(topleft, m);
204
        const auto undershoot = Gt(topleft, M);
205
        const auto grad =
206
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
207
                            BitCast(du, topleft)));
208
        const auto prediction =
209
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
210
        const auto ures = BitCast(du, Sub(central, prediction));
211
        const auto packed =
212
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
213
        const auto is_large = Gt(packed, kLargeThreshold);
214
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
215
        const auto not_literal = Ge(packed, kSplit);
216
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
217
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
218
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
219
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
220
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
221
                               And(ShiftRight<21>(v), kMsbMask));
222
        const auto tail_mask = Lt(pos, last_pos);
223
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
224
        const auto token_fixed = IfThenElse(not_literal, token, packed);
225
        extra_bits_lanes =
226
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
227
        Store(token_fixed, du, token_row + x);
228
        pos = Add(pos, kLanes);
229
        last = r + x + Lanes(di) - 1;
230
        top_last = t + x + Lanes(di) - 1;
231
      }
232
      for (size_t x = 0; x < ch.w; x++) {
233
        size_t ctx = ctx_map[max_diff_row[x]];
234
        histo[ctx].FastAdd(token_row[x]);
235
      }
236
    }
237
    for (auto& h : histo) {
238
      h.Condition();
239
      float f_cost = h.ShannonEntropy();
240
      size_t i_cost = f_cost;
241
      histo_cost += i_cost;
242
      histo_cost_frac += f_cost - i_cost;
243
      h.Clear();
244
    }
245
  }
246
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
247
#endif
248
0
  size_t total_cost =
249
0
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
250
0
  return total_cost;
251
0
}
252
253
// NOLINTNEXTLINE(google-readability-namespace-comments)
254
}  // namespace HWY_NAMESPACE
255
}  // namespace jxl
256
HWY_AFTER_NAMESPACE();
257
258
#if HWY_ONCE
259
namespace jxl {
260
261
HWY_EXPORT(EstimateCost);
262
263
0
StatusOr<float> EstimateCost(const Image& img) {
264
0
  return HWY_DYNAMIC_DISPATCH(EstimateCost)(img);
265
0
}
266
267
namespace estimate_cost_detail {
268
/*
269
cutoffs = [0, 1, 3, 5, 7, 11, 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500]
270
ctx_map = [[c for c,v in enumerate(cutoffs) if v <= i][0] for i in range(501)]
271
*/
272
0
const std::array<uint8_t, kLastThreshold>& ContextMap() {
273
0
  static const std::array<uint8_t, kLastThreshold> kCtxMap = {
274
0
      0,  1,  1,  2,  2,  3,  3,  4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,
275
0
      6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
276
0
      8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,
277
0
      9,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10,
278
0
      10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
279
0
      10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
280
0
      11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
281
0
      11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
282
0
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
283
0
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
284
0
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,
285
0
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
286
0
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
287
0
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
288
0
      13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
289
0
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
290
0
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
291
0
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
292
0
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
293
0
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
294
0
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
295
0
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
296
0
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
297
0
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
298
0
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
299
0
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
300
0
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
301
0
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16};
302
0
  return kCtxMap;
303
0
}
304
}  // namespace estimate_cost_detail
305
306
}  // namespace jxl
307
#endif