Coverage Report

Created: 2025-07-23 07:47

/src/libjxl/lib/jxl/enc_modular_simd.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_modular_simd.h"
7
8
#include <cstdint>
9
10
#include "lib/jxl/base/common.h"
11
#include "lib/jxl/base/status.h"
12
#include "lib/jxl/dec_ans.h"
13
#include "lib/jxl/enc_ans_params.h"
14
#include "lib/jxl/memory_manager_internal.h"
15
#include "lib/jxl/modular/modular_image.h"
16
17
#undef HWY_TARGET_INCLUDE
18
#define HWY_TARGET_INCLUDE "lib/jxl/enc_modular_simd.cc"
19
#include <hwy/foreach_target.h>
20
#include <hwy/highway.h>
21
22
#if HWY_TARGET == HWY_SCALAR
23
#include "lib/jxl/modular/encoding/context_predict.h"
24
#include "lib/jxl/pack_signed.h"
25
#endif
26
27
HWY_BEFORE_NAMESPACE();
28
namespace jxl {
29
namespace HWY_NAMESPACE {
30
31
// These templates are not found via ADL.
32
using hwy::HWY_NAMESPACE::Add;
33
using hwy::HWY_NAMESPACE::And;
34
using hwy::HWY_NAMESPACE::Ge;
35
using hwy::HWY_NAMESPACE::GetLane;
36
using hwy::HWY_NAMESPACE::Gt;
37
using hwy::HWY_NAMESPACE::IfThenElse;
38
using hwy::HWY_NAMESPACE::IfThenElseZero;
39
using hwy::HWY_NAMESPACE::Iota;
40
using hwy::HWY_NAMESPACE::Load;
41
using hwy::HWY_NAMESPACE::LoadU;
42
using hwy::HWY_NAMESPACE::Lt;
43
using hwy::HWY_NAMESPACE::Max;
44
using hwy::HWY_NAMESPACE::Min;
45
using hwy::HWY_NAMESPACE::Mul;
46
using hwy::HWY_NAMESPACE::Not;
47
using hwy::HWY_NAMESPACE::Set;
48
using hwy::HWY_NAMESPACE::ShiftLeft;
49
using hwy::HWY_NAMESPACE::ShiftRight;
50
using hwy::HWY_NAMESPACE::Store;
51
using hwy::HWY_NAMESPACE::StoreU;
52
using hwy::HWY_NAMESPACE::Sub;
53
using hwy::HWY_NAMESPACE::Xor;
54
using hwy::HWY_NAMESPACE::Zero;
55
56
0
StatusOr<float> EstimateCost(const Image& img) {
57
0
  size_t histo_cost = 0;
58
0
  float histo_cost_frac = 0.0f;
59
0
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const intptr_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
0
  JxlMemoryManager* memory_manager = img.memory_manager();
100
0
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
0
  const HWY_FULL(int32_t) di;
102
0
  const HWY_FULL(uint32_t) du;
103
0
  const HWY_FULL(float) df;
104
0
  const auto kOne = Set(du, 1);
105
0
  const auto kSplit = Set(du, 16);
106
0
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
0
  const auto kTokenBias = Set(du, 8);
108
0
  const auto kTokenMul = Set(du, 4);
109
0
  const auto kMsbMask = Set(du, 3);
110
0
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
0
  const auto kLanes = Set(du, Lanes(du));
112
0
  const auto kIota = Iota(du, 0);
113
114
0
  size_t max_w = 0;
115
0
  for (const Channel& ch : img.channel) {
116
0
    if (ch.h == 0) continue;
117
0
    max_w = std::max(max_w, ch.w);
118
0
  }
119
0
  max_w = RoundUpTo(max_w, Lanes(du));
120
0
  max_w = std::max(max_w, 2 * Lanes(du));
121
122
0
  JXL_ASSIGN_OR_RETURN(
123
0
      AlignedMemory buffer,
124
0
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
125
0
  uint32_t* max_diff_row = buffer.address<uint32_t>();
126
0
  uint32_t* token_row = max_diff_row + max_w;
127
0
  int32_t* primer = buffer.address<int32_t>();
128
0
  int32_t* top_primer = primer + max_w;
129
130
0
  HybridUintConfig config;
131
132
0
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
133
0
  auto extra_bits_lanes = Zero(du);
134
0
  for (const Channel& ch : img.channel) {
135
0
    if (ch.h == 0 || ch.w == 0) continue;
136
0
    for (auto& h : histo) {
137
0
      h.EnsureCapacity(32 * 4);
138
0
    }
139
0
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
140
0
    const pixel_type* JXL_RESTRICT last = primer;
141
0
    primer[0] = 0;
142
0
    StoreU(Load(di, r), di, primer + 1);
143
0
    auto pos = kIota;
144
0
    const auto last_pos = Set(du, ch.w);
145
0
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
146
0
      const auto left = LoadU(di, last);
147
0
      const auto central = Load(di, r + x);
148
0
      const auto ures = BitCast(du, Sub(central, left));
149
0
      const auto packed =
150
0
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
151
0
      const auto not_literal = Ge(packed, kSplit);
152
0
      const auto v = BitCast(du, ConvertTo(df, packed));
153
0
      const auto eb = Sub(ShiftRight<23>(v), kExpOffset2);
154
0
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
155
0
                             And(ShiftRight<21>(v), kMsbMask));
156
0
      const auto tail_mask = Lt(pos, last_pos);
157
0
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
158
0
      const auto token_fixed = IfThenElse(not_literal, token, packed);
159
0
      extra_bits_lanes =
160
0
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
161
0
      Store(token_fixed, du, token_row + x);
162
0
      pos = Add(pos, kLanes);
163
0
      last = r + x + Lanes(di) - 1;
164
0
    }
165
0
    for (size_t x = 0; x < ch.w; x++) {
166
0
      histo[0].FastAdd(token_row[x]);
167
0
    }
168
0
    for (size_t y = 1; y < ch.h; y++) {
169
0
      r = ch.Row(y);
170
0
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
171
0
      last = primer;
172
0
      primer[0] = t[0];
173
0
      StoreU(Load(di, r), di, primer + 1);
174
0
      top_primer[0] = t[0];
175
0
      StoreU(Load(di, t), di, top_primer + 1);
176
0
      const pixel_type* JXL_RESTRICT top_last = top_primer;
177
0
      pos = kIota;
178
0
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
179
0
        const auto left = LoadU(di, last);
180
0
        const auto central = Load(di, r + x);
181
0
        const auto topleft = LoadU(di, top_last);
182
0
        const auto top = Load(di, t + x);
183
0
        const auto l_ge_t = Ge(left, top);
184
0
        const auto m = IfThenElse(l_ge_t, top, left);
185
0
        const auto M = IfThenElse(l_ge_t, left, top);
186
0
        const auto maxx = Max(topleft, M);
187
0
        const auto minn = Min(topleft, m);
188
0
        const auto max_diff = BitCast(du, Sub(maxx, minn));
189
0
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
190
0
        const auto overshoot = Lt(topleft, m);
191
0
        const auto undershoot = Gt(topleft, M);
192
0
        const auto grad =
193
0
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
194
0
                            BitCast(du, topleft)));
195
0
        const auto prediction =
196
0
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
197
0
        const auto ures = BitCast(du, Sub(central, prediction));
198
0
        const auto packed =
199
0
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
200
0
        const auto not_literal = Ge(packed, kSplit);
201
0
        const auto v = BitCast(du, ConvertTo(df, packed));
202
0
        const auto eb = Sub(ShiftRight<23>(v), kExpOffset2);
203
0
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
204
0
                               And(ShiftRight<21>(v), kMsbMask));
205
0
        const auto tail_mask = Lt(pos, last_pos);
206
0
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
207
0
        const auto token_fixed = IfThenElse(not_literal, token, packed);
208
0
        extra_bits_lanes =
209
0
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
210
0
        Store(token_fixed, du, token_row + x);
211
0
        pos = Add(pos, kLanes);
212
0
        last = r + x + Lanes(di) - 1;
213
0
        top_last = t + x + Lanes(di) - 1;
214
0
      }
215
0
      for (size_t x = 0; x < ch.w; x++) {
216
0
        size_t ctx = ctx_map[max_diff_row[x]];
217
0
        histo[ctx].FastAdd(token_row[x]);
218
0
      }
219
0
    }
220
0
    for (auto& h : histo) {
221
0
      h.Condition();
222
0
      float f_cost = h.ShannonEntropy();
223
0
      size_t i_cost = f_cost;
224
0
      histo_cost += i_cost;
225
0
      histo_cost_frac += f_cost - i_cost;
226
0
      h.Clear();
227
0
    }
228
0
  }
229
0
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
230
0
#endif
231
0
  size_t total_cost =
232
0
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
233
0
  return total_cost;
234
0
}
Unexecuted instantiation: jxl::N_SSE4::EstimateCost(jxl::Image const&)
Unexecuted instantiation: jxl::N_AVX2::EstimateCost(jxl::Image const&)
Unexecuted instantiation: jxl::N_SSE2::EstimateCost(jxl::Image const&)
235
236
// NOLINTNEXTLINE(google-readability-namespace-comments)
237
}  // namespace HWY_NAMESPACE
238
}  // namespace jxl
239
HWY_AFTER_NAMESPACE();
240
241
#if HWY_ONCE
242
namespace jxl {
243
244
HWY_EXPORT(EstimateCost);
245
246
0
StatusOr<float> EstimateCost(const Image& img) {
247
0
  return HWY_DYNAMIC_DISPATCH(EstimateCost)(img);
248
0
}
249
250
namespace estimate_cost_detail {
251
/*
252
cutoffs = [0, 1, 3, 5, 7, 11, 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500]
253
ctx_map = [[c for c,v in enumerate(cutoffs) if v <= i][0] for i in range(501)]
254
*/
255
0
const std::array<uint8_t, kLastThreshold>& ContextMap() {
256
0
  static const std::array<uint8_t, kLastThreshold> kCtxMap = {
257
0
      0,  1,  1,  2,  2,  3,  3,  4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,
258
0
      6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
259
0
      8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,
260
0
      9,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10,
261
0
      10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
262
0
      10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
263
0
      11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
264
0
      11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
265
0
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
266
0
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
267
0
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,
268
0
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
269
0
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
270
0
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
271
0
      13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
272
0
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
273
0
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
274
0
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
275
0
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
276
0
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
277
0
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
278
0
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
279
0
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
280
0
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
281
0
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
282
0
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
283
0
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
284
0
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16};
285
0
  return kCtxMap;
286
0
}
287
}  // namespace estimate_cost_detail
288
289
}  // namespace jxl
290
#endif