Coverage Report

Created: 2026-06-07 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_modular_simd.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_modular_simd.h"
7
8
#include <cstdint>
9
10
#include "lib/jxl/base/common.h"
11
#include "lib/jxl/base/status.h"
12
#include "lib/jxl/dec_ans.h"
13
#include "lib/jxl/enc_ans_params.h"
14
#include "lib/jxl/memory_manager_internal.h"
15
#include "lib/jxl/modular/modular_image.h"
16
17
#undef HWY_TARGET_INCLUDE
18
#define HWY_TARGET_INCLUDE "lib/jxl/enc_modular_simd.cc"
19
#include <hwy/foreach_target.h>
20
#include <hwy/highway.h>
21
22
#if HWY_TARGET == HWY_SCALAR
23
#include "lib/jxl/modular/encoding/context_predict.h"
24
#include "lib/jxl/pack_signed.h"
25
#endif
26
27
HWY_BEFORE_NAMESPACE();
28
namespace jxl {
29
namespace HWY_NAMESPACE {
30
31
// These templates are not found via ADL.
32
using hwy::HWY_NAMESPACE::Add;
33
using hwy::HWY_NAMESPACE::And;
34
using hwy::HWY_NAMESPACE::Ge;
35
using hwy::HWY_NAMESPACE::GetLane;
36
using hwy::HWY_NAMESPACE::Gt;
37
using hwy::HWY_NAMESPACE::IfThenElse;
38
using hwy::HWY_NAMESPACE::IfThenElseZero;
39
using hwy::HWY_NAMESPACE::Iota;
40
using hwy::HWY_NAMESPACE::Load;
41
using hwy::HWY_NAMESPACE::LoadU;
42
using hwy::HWY_NAMESPACE::Lt;
43
using hwy::HWY_NAMESPACE::Max;
44
using hwy::HWY_NAMESPACE::Min;
45
using hwy::HWY_NAMESPACE::Mul;
46
using hwy::HWY_NAMESPACE::Not;
47
using hwy::HWY_NAMESPACE::Set;
48
using hwy::HWY_NAMESPACE::ShiftLeft;
49
using hwy::HWY_NAMESPACE::ShiftRight;
50
using hwy::HWY_NAMESPACE::Store;
51
using hwy::HWY_NAMESPACE::StoreU;
52
using hwy::HWY_NAMESPACE::Sub;
53
using hwy::HWY_NAMESPACE::Xor;
54
using hwy::HWY_NAMESPACE::Zero;
55
56
3.00k
StatusOr<float> EstimateCost(const Image& img) {
57
3.00k
  size_t histo_cost = 0;
58
3.00k
  float histo_cost_frac = 0.0f;
59
3.00k
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const ptrdiff_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
3.00k
  JxlMemoryManager* memory_manager = img.memory_manager();
100
3.00k
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
3.00k
  const HWY_FULL(int32_t) di;
102
3.00k
  const HWY_FULL(uint32_t) du;
103
3.00k
  const HWY_FULL(float) df;
104
3.00k
  const auto kOne = Set(du, 1);
105
3.00k
  const auto kSplit = Set(du, 16);
106
3.00k
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
3.00k
  const auto kTokenBias = Set(du, 8);
108
3.00k
  const auto kTokenMul = Set(du, 4);
109
3.00k
  const auto kMsbMask = Set(du, 3);
110
3.00k
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
3.00k
  const auto kLanes = Set(du, Lanes(du));
112
3.00k
  const auto kIota = Iota(du, 0);
113
3.00k
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
114
3.00k
  constexpr size_t kLargeShiftVal = 10;
115
3.00k
  const auto kLargeShift = Set(du, kLargeShiftVal);
116
117
3.00k
  size_t max_w = 0;
118
3.00k
  for (const Channel& ch : img.channel) {
119
3.00k
    if (ch.h == 0) continue;
120
3.00k
    max_w = std::max(max_w, ch.w);
121
3.00k
  }
122
3.00k
  max_w = RoundUpTo(max_w, Lanes(du));
123
3.00k
  max_w = std::max(max_w, 2 * Lanes(du));
124
125
3.00k
  JXL_ASSIGN_OR_RETURN(
126
3.00k
      AlignedMemory buffer,
127
3.00k
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
128
3.00k
  uint32_t* max_diff_row = buffer.address<uint32_t>();
129
3.00k
  uint32_t* token_row = max_diff_row + max_w;
130
3.00k
  int32_t* primer = buffer.address<int32_t>();
131
3.00k
  int32_t* top_primer = primer + max_w;
132
133
3.00k
  HybridUintConfig config;
134
135
3.00k
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
136
3.00k
  auto extra_bits_lanes = Zero(du);
137
3.00k
  for (const Channel& ch : img.channel) {
138
3.00k
    if (ch.h == 0 || ch.w == 0) continue;
139
51.0k
    for (auto& h : histo) {
140
51.0k
      h.EnsureCapacity(32 * 4);
141
51.0k
    }
142
3.00k
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
143
3.00k
    const pixel_type* JXL_RESTRICT last = primer;
144
3.00k
    primer[0] = 0;
145
3.00k
    StoreU(Load(di, r), di, primer + 1);
146
3.00k
    auto pos = kIota;
147
3.00k
    const auto last_pos = Set(du, ch.w);
148
79.5k
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
149
76.5k
      const auto left = LoadU(di, last);
150
76.5k
      const auto central = Load(di, r + x);
151
76.5k
      const auto ures = BitCast(du, Sub(central, left));
152
76.5k
      const auto packed =
153
76.5k
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
154
76.5k
      const auto is_large = Gt(packed, kLargeThreshold);
155
76.5k
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
156
76.5k
      const auto not_literal = Ge(packed, kSplit);
157
76.5k
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
158
76.5k
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
159
76.5k
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
160
76.5k
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
161
76.5k
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
162
76.5k
                             And(ShiftRight<21>(v), kMsbMask));
163
76.5k
      const auto tail_mask = Lt(pos, last_pos);
164
76.5k
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
165
76.5k
      const auto token_fixed = IfThenElse(not_literal, token, packed);
166
76.5k
      extra_bits_lanes =
167
76.5k
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
168
76.5k
      Store(token_fixed, du, token_row + x);
169
76.5k
      pos = Add(pos, kLanes);
170
76.5k
      last = r + x + Lanes(di) - 1;
171
76.5k
    }
172
610k
    for (size_t x = 0; x < ch.w; x++) {
173
607k
      histo[0].FastAdd(token_row[x]);
174
607k
    }
175
625k
    for (size_t y = 1; y < ch.h; y++) {
176
622k
      r = ch.Row(y);
177
622k
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
178
622k
      last = primer;
179
622k
      primer[0] = t[0];
180
622k
      StoreU(Load(di, r), di, primer + 1);
181
622k
      top_primer[0] = t[0];
182
622k
      StoreU(Load(di, t), di, top_primer + 1);
183
622k
      const pixel_type* JXL_RESTRICT top_last = top_primer;
184
622k
      pos = kIota;
185
19.6M
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
186
18.9M
        const auto left = LoadU(di, last);
187
18.9M
        const auto central = Load(di, r + x);
188
18.9M
        const auto topleft = LoadU(di, top_last);
189
18.9M
        const auto top = Load(di, t + x);
190
18.9M
        const auto l_ge_t = Ge(left, top);
191
18.9M
        const auto m = IfThenElse(l_ge_t, top, left);
192
18.9M
        const auto M = IfThenElse(l_ge_t, left, top);
193
18.9M
        const auto maxx = Max(topleft, M);
194
18.9M
        const auto minn = Min(topleft, m);
195
18.9M
        const auto max_diff = BitCast(du, Sub(maxx, minn));
196
18.9M
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
197
18.9M
        const auto overshoot = Lt(topleft, m);
198
18.9M
        const auto undershoot = Gt(topleft, M);
199
18.9M
        const auto grad =
200
18.9M
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
201
18.9M
                            BitCast(du, topleft)));
202
18.9M
        const auto prediction =
203
18.9M
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
204
18.9M
        const auto ures = BitCast(du, Sub(central, prediction));
205
18.9M
        const auto packed =
206
18.9M
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
207
18.9M
        const auto is_large = Gt(packed, kLargeThreshold);
208
18.9M
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
209
18.9M
        const auto not_literal = Ge(packed, kSplit);
210
18.9M
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
211
18.9M
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
212
18.9M
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
213
18.9M
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
214
18.9M
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
215
18.9M
                               And(ShiftRight<21>(v), kMsbMask));
216
18.9M
        const auto tail_mask = Lt(pos, last_pos);
217
18.9M
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
218
18.9M
        const auto token_fixed = IfThenElse(not_literal, token, packed);
219
18.9M
        extra_bits_lanes =
220
18.9M
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
221
18.9M
        Store(token_fixed, du, token_row + x);
222
18.9M
        pos = Add(pos, kLanes);
223
18.9M
        last = r + x + Lanes(di) - 1;
224
18.9M
        top_last = t + x + Lanes(di) - 1;
225
18.9M
      }
226
151M
      for (size_t x = 0; x < ch.w; x++) {
227
150M
        size_t ctx = ctx_map[max_diff_row[x]];
228
150M
        histo[ctx].FastAdd(token_row[x]);
229
150M
      }
230
622k
    }
231
51.0k
    for (auto& h : histo) {
232
51.0k
      h.Condition();
233
51.0k
      float f_cost = h.ShannonEntropy();
234
51.0k
      size_t i_cost = f_cost;
235
51.0k
      histo_cost += i_cost;
236
51.0k
      histo_cost_frac += f_cost - i_cost;
237
51.0k
      h.Clear();
238
51.0k
    }
239
3.00k
  }
240
3.00k
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
241
3.00k
#endif
242
3.00k
  size_t total_cost =
243
3.00k
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
244
3.00k
  return total_cost;
245
3.00k
}
Unexecuted instantiation: jxl::N_SSE4::EstimateCost(jxl::Image const&)
jxl::N_AVX2::EstimateCost(jxl::Image const&)
Line
Count
Source
56
3.00k
StatusOr<float> EstimateCost(const Image& img) {
57
3.00k
  size_t histo_cost = 0;
58
3.00k
  float histo_cost_frac = 0.0f;
59
3.00k
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const ptrdiff_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
3.00k
  JxlMemoryManager* memory_manager = img.memory_manager();
100
3.00k
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
3.00k
  const HWY_FULL(int32_t) di;
102
3.00k
  const HWY_FULL(uint32_t) du;
103
3.00k
  const HWY_FULL(float) df;
104
3.00k
  const auto kOne = Set(du, 1);
105
3.00k
  const auto kSplit = Set(du, 16);
106
3.00k
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
3.00k
  const auto kTokenBias = Set(du, 8);
108
3.00k
  const auto kTokenMul = Set(du, 4);
109
3.00k
  const auto kMsbMask = Set(du, 3);
110
3.00k
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
3.00k
  const auto kLanes = Set(du, Lanes(du));
112
3.00k
  const auto kIota = Iota(du, 0);
113
3.00k
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
114
3.00k
  constexpr size_t kLargeShiftVal = 10;
115
3.00k
  const auto kLargeShift = Set(du, kLargeShiftVal);
116
117
3.00k
  size_t max_w = 0;
118
3.00k
  for (const Channel& ch : img.channel) {
119
3.00k
    if (ch.h == 0) continue;
120
3.00k
    max_w = std::max(max_w, ch.w);
121
3.00k
  }
122
3.00k
  max_w = RoundUpTo(max_w, Lanes(du));
123
3.00k
  max_w = std::max(max_w, 2 * Lanes(du));
124
125
3.00k
  JXL_ASSIGN_OR_RETURN(
126
3.00k
      AlignedMemory buffer,
127
3.00k
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
128
3.00k
  uint32_t* max_diff_row = buffer.address<uint32_t>();
129
3.00k
  uint32_t* token_row = max_diff_row + max_w;
130
3.00k
  int32_t* primer = buffer.address<int32_t>();
131
3.00k
  int32_t* top_primer = primer + max_w;
132
133
3.00k
  HybridUintConfig config;
134
135
3.00k
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
136
3.00k
  auto extra_bits_lanes = Zero(du);
137
3.00k
  for (const Channel& ch : img.channel) {
138
3.00k
    if (ch.h == 0 || ch.w == 0) continue;
139
51.0k
    for (auto& h : histo) {
140
51.0k
      h.EnsureCapacity(32 * 4);
141
51.0k
    }
142
3.00k
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
143
3.00k
    const pixel_type* JXL_RESTRICT last = primer;
144
3.00k
    primer[0] = 0;
145
3.00k
    StoreU(Load(di, r), di, primer + 1);
146
3.00k
    auto pos = kIota;
147
3.00k
    const auto last_pos = Set(du, ch.w);
148
79.5k
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
149
76.5k
      const auto left = LoadU(di, last);
150
76.5k
      const auto central = Load(di, r + x);
151
76.5k
      const auto ures = BitCast(du, Sub(central, left));
152
76.5k
      const auto packed =
153
76.5k
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
154
76.5k
      const auto is_large = Gt(packed, kLargeThreshold);
155
76.5k
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
156
76.5k
      const auto not_literal = Ge(packed, kSplit);
157
76.5k
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
158
76.5k
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
159
76.5k
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
160
76.5k
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
161
76.5k
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
162
76.5k
                             And(ShiftRight<21>(v), kMsbMask));
163
76.5k
      const auto tail_mask = Lt(pos, last_pos);
164
76.5k
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
165
76.5k
      const auto token_fixed = IfThenElse(not_literal, token, packed);
166
76.5k
      extra_bits_lanes =
167
76.5k
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
168
76.5k
      Store(token_fixed, du, token_row + x);
169
76.5k
      pos = Add(pos, kLanes);
170
76.5k
      last = r + x + Lanes(di) - 1;
171
76.5k
    }
172
610k
    for (size_t x = 0; x < ch.w; x++) {
173
607k
      histo[0].FastAdd(token_row[x]);
174
607k
    }
175
625k
    for (size_t y = 1; y < ch.h; y++) {
176
622k
      r = ch.Row(y);
177
622k
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
178
622k
      last = primer;
179
622k
      primer[0] = t[0];
180
622k
      StoreU(Load(di, r), di, primer + 1);
181
622k
      top_primer[0] = t[0];
182
622k
      StoreU(Load(di, t), di, top_primer + 1);
183
622k
      const pixel_type* JXL_RESTRICT top_last = top_primer;
184
622k
      pos = kIota;
185
19.6M
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
186
18.9M
        const auto left = LoadU(di, last);
187
18.9M
        const auto central = Load(di, r + x);
188
18.9M
        const auto topleft = LoadU(di, top_last);
189
18.9M
        const auto top = Load(di, t + x);
190
18.9M
        const auto l_ge_t = Ge(left, top);
191
18.9M
        const auto m = IfThenElse(l_ge_t, top, left);
192
18.9M
        const auto M = IfThenElse(l_ge_t, left, top);
193
18.9M
        const auto maxx = Max(topleft, M);
194
18.9M
        const auto minn = Min(topleft, m);
195
18.9M
        const auto max_diff = BitCast(du, Sub(maxx, minn));
196
18.9M
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
197
18.9M
        const auto overshoot = Lt(topleft, m);
198
18.9M
        const auto undershoot = Gt(topleft, M);
199
18.9M
        const auto grad =
200
18.9M
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
201
18.9M
                            BitCast(du, topleft)));
202
18.9M
        const auto prediction =
203
18.9M
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
204
18.9M
        const auto ures = BitCast(du, Sub(central, prediction));
205
18.9M
        const auto packed =
206
18.9M
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
207
18.9M
        const auto is_large = Gt(packed, kLargeThreshold);
208
18.9M
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
209
18.9M
        const auto not_literal = Ge(packed, kSplit);
210
18.9M
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
211
18.9M
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
212
18.9M
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
213
18.9M
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
214
18.9M
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
215
18.9M
                               And(ShiftRight<21>(v), kMsbMask));
216
18.9M
        const auto tail_mask = Lt(pos, last_pos);
217
18.9M
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
218
18.9M
        const auto token_fixed = IfThenElse(not_literal, token, packed);
219
18.9M
        extra_bits_lanes =
220
18.9M
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
221
18.9M
        Store(token_fixed, du, token_row + x);
222
18.9M
        pos = Add(pos, kLanes);
223
18.9M
        last = r + x + Lanes(di) - 1;
224
18.9M
        top_last = t + x + Lanes(di) - 1;
225
18.9M
      }
226
151M
      for (size_t x = 0; x < ch.w; x++) {
227
150M
        size_t ctx = ctx_map[max_diff_row[x]];
228
150M
        histo[ctx].FastAdd(token_row[x]);
229
150M
      }
230
622k
    }
231
51.0k
    for (auto& h : histo) {
232
51.0k
      h.Condition();
233
51.0k
      float f_cost = h.ShannonEntropy();
234
51.0k
      size_t i_cost = f_cost;
235
51.0k
      histo_cost += i_cost;
236
51.0k
      histo_cost_frac += f_cost - i_cost;
237
51.0k
      h.Clear();
238
51.0k
    }
239
3.00k
  }
240
3.00k
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
241
3.00k
#endif
242
3.00k
  size_t total_cost =
243
3.00k
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
244
3.00k
  return total_cost;
245
3.00k
}
Unexecuted instantiation: jxl::N_SSE2::EstimateCost(jxl::Image const&)
246
247
// NOLINTNEXTLINE(google-readability-namespace-comments)
248
}  // namespace HWY_NAMESPACE
249
}  // namespace jxl
250
HWY_AFTER_NAMESPACE();
251
252
#if HWY_ONCE
253
namespace jxl {
254
255
HWY_EXPORT(EstimateCost);
256
257
3.00k
StatusOr<float> EstimateCost(const Image& img) {
258
3.00k
  return HWY_DYNAMIC_DISPATCH(EstimateCost)(img);
259
3.00k
}
260
261
namespace estimate_cost_detail {
262
/*
263
cutoffs = [0, 1, 3, 5, 7, 11, 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500]
264
ctx_map = [[c for c,v in enumerate(cutoffs) if v <= i][0] for i in range(501)]
265
*/
266
3.00k
const std::array<uint8_t, kLastThreshold>& ContextMap() {
267
3.00k
  static const std::array<uint8_t, kLastThreshold> kCtxMap = {
268
3.00k
      0,  1,  1,  2,  2,  3,  3,  4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,
269
3.00k
      6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
270
3.00k
      8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,
271
3.00k
      9,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10,
272
3.00k
      10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
273
3.00k
      10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
274
3.00k
      11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
275
3.00k
      11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
276
3.00k
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
277
3.00k
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
278
3.00k
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,
279
3.00k
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
280
3.00k
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
281
3.00k
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
282
3.00k
      13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
283
3.00k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
284
3.00k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
285
3.00k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
286
3.00k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
287
3.00k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
288
3.00k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
289
3.00k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
290
3.00k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
291
3.00k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
292
3.00k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
293
3.00k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
294
3.00k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
295
3.00k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16};
296
3.00k
  return kCtxMap;
297
3.00k
}
298
}  // namespace estimate_cost_detail
299
300
}  // namespace jxl
301
#endif