Coverage Report

Created: 2026-05-16 07:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_modular_simd.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_modular_simd.h"
7
8
#include <cstdint>
9
10
#include "lib/jxl/base/common.h"
11
#include "lib/jxl/base/status.h"
12
#include "lib/jxl/dec_ans.h"
13
#include "lib/jxl/enc_ans_params.h"
14
#include "lib/jxl/memory_manager_internal.h"
15
#include "lib/jxl/modular/modular_image.h"
16
17
#undef HWY_TARGET_INCLUDE
18
#define HWY_TARGET_INCLUDE "lib/jxl/enc_modular_simd.cc"
19
#include <hwy/foreach_target.h>
20
#include <hwy/highway.h>
21
22
#if HWY_TARGET == HWY_SCALAR
23
#include "lib/jxl/modular/encoding/context_predict.h"
24
#include "lib/jxl/pack_signed.h"
25
#endif
26
27
HWY_BEFORE_NAMESPACE();
28
namespace jxl {
29
namespace HWY_NAMESPACE {
30
31
// These templates are not found via ADL.
32
using hwy::HWY_NAMESPACE::Add;
33
using hwy::HWY_NAMESPACE::And;
34
using hwy::HWY_NAMESPACE::Ge;
35
using hwy::HWY_NAMESPACE::GetLane;
36
using hwy::HWY_NAMESPACE::Gt;
37
using hwy::HWY_NAMESPACE::IfThenElse;
38
using hwy::HWY_NAMESPACE::IfThenElseZero;
39
using hwy::HWY_NAMESPACE::Iota;
40
using hwy::HWY_NAMESPACE::Load;
41
using hwy::HWY_NAMESPACE::LoadU;
42
using hwy::HWY_NAMESPACE::Lt;
43
using hwy::HWY_NAMESPACE::Max;
44
using hwy::HWY_NAMESPACE::Min;
45
using hwy::HWY_NAMESPACE::Mul;
46
using hwy::HWY_NAMESPACE::Not;
47
using hwy::HWY_NAMESPACE::Set;
48
using hwy::HWY_NAMESPACE::ShiftLeft;
49
using hwy::HWY_NAMESPACE::ShiftRight;
50
using hwy::HWY_NAMESPACE::Store;
51
using hwy::HWY_NAMESPACE::StoreU;
52
using hwy::HWY_NAMESPACE::Sub;
53
using hwy::HWY_NAMESPACE::Xor;
54
using hwy::HWY_NAMESPACE::Zero;
55
56
2.69k
StatusOr<float> EstimateCost(const Image& img) {
57
2.69k
  size_t histo_cost = 0;
58
2.69k
  float histo_cost_frac = 0.0f;
59
2.69k
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const ptrdiff_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
2.69k
  JxlMemoryManager* memory_manager = img.memory_manager();
100
2.69k
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
2.69k
  const HWY_FULL(int32_t) di;
102
2.69k
  const HWY_FULL(uint32_t) du;
103
2.69k
  const HWY_FULL(float) df;
104
2.69k
  const auto kOne = Set(du, 1);
105
2.69k
  const auto kSplit = Set(du, 16);
106
2.69k
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
2.69k
  const auto kTokenBias = Set(du, 8);
108
2.69k
  const auto kTokenMul = Set(du, 4);
109
2.69k
  const auto kMsbMask = Set(du, 3);
110
2.69k
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
2.69k
  const auto kLanes = Set(du, Lanes(du));
112
2.69k
  const auto kIota = Iota(du, 0);
113
2.69k
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
114
2.69k
  constexpr size_t kLargeShiftVal = 10;
115
2.69k
  const auto kLargeShift = Set(du, kLargeShiftVal);
116
117
2.69k
  size_t max_w = 0;
118
2.69k
  for (const Channel& ch : img.channel) {
119
2.69k
    if (ch.h == 0) continue;
120
2.69k
    max_w = std::max(max_w, ch.w);
121
2.69k
  }
122
2.69k
  max_w = RoundUpTo(max_w, Lanes(du));
123
2.69k
  max_w = std::max(max_w, 2 * Lanes(du));
124
125
2.69k
  JXL_ASSIGN_OR_RETURN(
126
2.69k
      AlignedMemory buffer,
127
2.69k
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
128
2.69k
  uint32_t* max_diff_row = buffer.address<uint32_t>();
129
2.69k
  uint32_t* token_row = max_diff_row + max_w;
130
2.69k
  int32_t* primer = buffer.address<int32_t>();
131
2.69k
  int32_t* top_primer = primer + max_w;
132
133
2.69k
  HybridUintConfig config;
134
135
2.69k
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
136
2.69k
  auto extra_bits_lanes = Zero(du);
137
2.69k
  for (const Channel& ch : img.channel) {
138
2.69k
    if (ch.h == 0 || ch.w == 0) continue;
139
45.8k
    for (auto& h : histo) {
140
45.8k
      h.EnsureCapacity(32 * 4);
141
45.8k
    }
142
2.69k
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
143
2.69k
    const pixel_type* JXL_RESTRICT last = primer;
144
2.69k
    primer[0] = 0;
145
2.69k
    StoreU(Load(di, r), di, primer + 1);
146
2.69k
    auto pos = kIota;
147
2.69k
    const auto last_pos = Set(du, ch.w);
148
70.2k
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
149
67.5k
      const auto left = LoadU(di, last);
150
67.5k
      const auto central = Load(di, r + x);
151
67.5k
      const auto ures = BitCast(du, Sub(central, left));
152
67.5k
      const auto packed =
153
67.5k
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
154
67.5k
      const auto is_large = Gt(packed, kLargeThreshold);
155
67.5k
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
156
67.5k
      const auto not_literal = Ge(packed, kSplit);
157
67.5k
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
158
67.5k
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
159
67.5k
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
160
67.5k
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
161
67.5k
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
162
67.5k
                             And(ShiftRight<21>(v), kMsbMask));
163
67.5k
      const auto tail_mask = Lt(pos, last_pos);
164
67.5k
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
165
67.5k
      const auto token_fixed = IfThenElse(not_literal, token, packed);
166
67.5k
      extra_bits_lanes =
167
67.5k
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
168
67.5k
      Store(token_fixed, du, token_row + x);
169
67.5k
      pos = Add(pos, kLanes);
170
67.5k
      last = r + x + Lanes(di) - 1;
171
67.5k
    }
172
538k
    for (size_t x = 0; x < ch.w; x++) {
173
535k
      histo[0].FastAdd(token_row[x]);
174
535k
    }
175
553k
    for (size_t y = 1; y < ch.h; y++) {
176
550k
      r = ch.Row(y);
177
550k
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
178
550k
      last = primer;
179
550k
      primer[0] = t[0];
180
550k
      StoreU(Load(di, r), di, primer + 1);
181
550k
      top_primer[0] = t[0];
182
550k
      StoreU(Load(di, t), di, top_primer + 1);
183
550k
      const pixel_type* JXL_RESTRICT top_last = top_primer;
184
550k
      pos = kIota;
185
17.2M
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
186
16.7M
        const auto left = LoadU(di, last);
187
16.7M
        const auto central = Load(di, r + x);
188
16.7M
        const auto topleft = LoadU(di, top_last);
189
16.7M
        const auto top = Load(di, t + x);
190
16.7M
        const auto l_ge_t = Ge(left, top);
191
16.7M
        const auto m = IfThenElse(l_ge_t, top, left);
192
16.7M
        const auto M = IfThenElse(l_ge_t, left, top);
193
16.7M
        const auto maxx = Max(topleft, M);
194
16.7M
        const auto minn = Min(topleft, m);
195
16.7M
        const auto max_diff = BitCast(du, Sub(maxx, minn));
196
16.7M
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
197
16.7M
        const auto overshoot = Lt(topleft, m);
198
16.7M
        const auto undershoot = Gt(topleft, M);
199
16.7M
        const auto grad =
200
16.7M
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
201
16.7M
                            BitCast(du, topleft)));
202
16.7M
        const auto prediction =
203
16.7M
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
204
16.7M
        const auto ures = BitCast(du, Sub(central, prediction));
205
16.7M
        const auto packed =
206
16.7M
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
207
16.7M
        const auto is_large = Gt(packed, kLargeThreshold);
208
16.7M
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
209
16.7M
        const auto not_literal = Ge(packed, kSplit);
210
16.7M
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
211
16.7M
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
212
16.7M
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
213
16.7M
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
214
16.7M
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
215
16.7M
                               And(ShiftRight<21>(v), kMsbMask));
216
16.7M
        const auto tail_mask = Lt(pos, last_pos);
217
16.7M
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
218
16.7M
        const auto token_fixed = IfThenElse(not_literal, token, packed);
219
16.7M
        extra_bits_lanes =
220
16.7M
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
221
16.7M
        Store(token_fixed, du, token_row + x);
222
16.7M
        pos = Add(pos, kLanes);
223
16.7M
        last = r + x + Lanes(di) - 1;
224
16.7M
        top_last = t + x + Lanes(di) - 1;
225
16.7M
      }
226
133M
      for (size_t x = 0; x < ch.w; x++) {
227
132M
        size_t ctx = ctx_map[max_diff_row[x]];
228
132M
        histo[ctx].FastAdd(token_row[x]);
229
132M
      }
230
550k
    }
231
45.8k
    for (auto& h : histo) {
232
45.8k
      h.Condition();
233
45.8k
      float f_cost = h.ShannonEntropy();
234
45.8k
      size_t i_cost = f_cost;
235
45.8k
      histo_cost += i_cost;
236
45.8k
      histo_cost_frac += f_cost - i_cost;
237
45.8k
      h.Clear();
238
45.8k
    }
239
2.69k
  }
240
2.69k
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
241
2.69k
#endif
242
2.69k
  size_t total_cost =
243
2.69k
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
244
2.69k
  return total_cost;
245
2.69k
}
Unexecuted instantiation: jxl::N_SSE4::EstimateCost(jxl::Image const&)
jxl::N_AVX2::EstimateCost(jxl::Image const&)
Line
Count
Source
56
2.69k
StatusOr<float> EstimateCost(const Image& img) {
57
2.69k
  size_t histo_cost = 0;
58
2.69k
  float histo_cost_frac = 0.0f;
59
2.69k
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const ptrdiff_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
2.69k
  JxlMemoryManager* memory_manager = img.memory_manager();
100
2.69k
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
2.69k
  const HWY_FULL(int32_t) di;
102
2.69k
  const HWY_FULL(uint32_t) du;
103
2.69k
  const HWY_FULL(float) df;
104
2.69k
  const auto kOne = Set(du, 1);
105
2.69k
  const auto kSplit = Set(du, 16);
106
2.69k
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
2.69k
  const auto kTokenBias = Set(du, 8);
108
2.69k
  const auto kTokenMul = Set(du, 4);
109
2.69k
  const auto kMsbMask = Set(du, 3);
110
2.69k
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
2.69k
  const auto kLanes = Set(du, Lanes(du));
112
2.69k
  const auto kIota = Iota(du, 0);
113
2.69k
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
114
2.69k
  constexpr size_t kLargeShiftVal = 10;
115
2.69k
  const auto kLargeShift = Set(du, kLargeShiftVal);
116
117
2.69k
  size_t max_w = 0;
118
2.69k
  for (const Channel& ch : img.channel) {
119
2.69k
    if (ch.h == 0) continue;
120
2.69k
    max_w = std::max(max_w, ch.w);
121
2.69k
  }
122
2.69k
  max_w = RoundUpTo(max_w, Lanes(du));
123
2.69k
  max_w = std::max(max_w, 2 * Lanes(du));
124
125
2.69k
  JXL_ASSIGN_OR_RETURN(
126
2.69k
      AlignedMemory buffer,
127
2.69k
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
128
2.69k
  uint32_t* max_diff_row = buffer.address<uint32_t>();
129
2.69k
  uint32_t* token_row = max_diff_row + max_w;
130
2.69k
  int32_t* primer = buffer.address<int32_t>();
131
2.69k
  int32_t* top_primer = primer + max_w;
132
133
2.69k
  HybridUintConfig config;
134
135
2.69k
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
136
2.69k
  auto extra_bits_lanes = Zero(du);
137
2.69k
  for (const Channel& ch : img.channel) {
138
2.69k
    if (ch.h == 0 || ch.w == 0) continue;
139
45.8k
    for (auto& h : histo) {
140
45.8k
      h.EnsureCapacity(32 * 4);
141
45.8k
    }
142
2.69k
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
143
2.69k
    const pixel_type* JXL_RESTRICT last = primer;
144
2.69k
    primer[0] = 0;
145
2.69k
    StoreU(Load(di, r), di, primer + 1);
146
2.69k
    auto pos = kIota;
147
2.69k
    const auto last_pos = Set(du, ch.w);
148
70.2k
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
149
67.5k
      const auto left = LoadU(di, last);
150
67.5k
      const auto central = Load(di, r + x);
151
67.5k
      const auto ures = BitCast(du, Sub(central, left));
152
67.5k
      const auto packed =
153
67.5k
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
154
67.5k
      const auto is_large = Gt(packed, kLargeThreshold);
155
67.5k
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
156
67.5k
      const auto not_literal = Ge(packed, kSplit);
157
67.5k
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
158
67.5k
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
159
67.5k
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
160
67.5k
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
161
67.5k
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
162
67.5k
                             And(ShiftRight<21>(v), kMsbMask));
163
67.5k
      const auto tail_mask = Lt(pos, last_pos);
164
67.5k
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
165
67.5k
      const auto token_fixed = IfThenElse(not_literal, token, packed);
166
67.5k
      extra_bits_lanes =
167
67.5k
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
168
67.5k
      Store(token_fixed, du, token_row + x);
169
67.5k
      pos = Add(pos, kLanes);
170
67.5k
      last = r + x + Lanes(di) - 1;
171
67.5k
    }
172
538k
    for (size_t x = 0; x < ch.w; x++) {
173
535k
      histo[0].FastAdd(token_row[x]);
174
535k
    }
175
553k
    for (size_t y = 1; y < ch.h; y++) {
176
550k
      r = ch.Row(y);
177
550k
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
178
550k
      last = primer;
179
550k
      primer[0] = t[0];
180
550k
      StoreU(Load(di, r), di, primer + 1);
181
550k
      top_primer[0] = t[0];
182
550k
      StoreU(Load(di, t), di, top_primer + 1);
183
550k
      const pixel_type* JXL_RESTRICT top_last = top_primer;
184
550k
      pos = kIota;
185
17.2M
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
186
16.7M
        const auto left = LoadU(di, last);
187
16.7M
        const auto central = Load(di, r + x);
188
16.7M
        const auto topleft = LoadU(di, top_last);
189
16.7M
        const auto top = Load(di, t + x);
190
16.7M
        const auto l_ge_t = Ge(left, top);
191
16.7M
        const auto m = IfThenElse(l_ge_t, top, left);
192
16.7M
        const auto M = IfThenElse(l_ge_t, left, top);
193
16.7M
        const auto maxx = Max(topleft, M);
194
16.7M
        const auto minn = Min(topleft, m);
195
16.7M
        const auto max_diff = BitCast(du, Sub(maxx, minn));
196
16.7M
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
197
16.7M
        const auto overshoot = Lt(topleft, m);
198
16.7M
        const auto undershoot = Gt(topleft, M);
199
16.7M
        const auto grad =
200
16.7M
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
201
16.7M
                            BitCast(du, topleft)));
202
16.7M
        const auto prediction =
203
16.7M
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
204
16.7M
        const auto ures = BitCast(du, Sub(central, prediction));
205
16.7M
        const auto packed =
206
16.7M
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
207
16.7M
        const auto is_large = Gt(packed, kLargeThreshold);
208
16.7M
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
209
16.7M
        const auto not_literal = Ge(packed, kSplit);
210
16.7M
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
211
16.7M
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
212
16.7M
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
213
16.7M
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
214
16.7M
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
215
16.7M
                               And(ShiftRight<21>(v), kMsbMask));
216
16.7M
        const auto tail_mask = Lt(pos, last_pos);
217
16.7M
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
218
16.7M
        const auto token_fixed = IfThenElse(not_literal, token, packed);
219
16.7M
        extra_bits_lanes =
220
16.7M
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
221
16.7M
        Store(token_fixed, du, token_row + x);
222
16.7M
        pos = Add(pos, kLanes);
223
16.7M
        last = r + x + Lanes(di) - 1;
224
16.7M
        top_last = t + x + Lanes(di) - 1;
225
16.7M
      }
226
133M
      for (size_t x = 0; x < ch.w; x++) {
227
132M
        size_t ctx = ctx_map[max_diff_row[x]];
228
132M
        histo[ctx].FastAdd(token_row[x]);
229
132M
      }
230
550k
    }
231
45.8k
    for (auto& h : histo) {
232
45.8k
      h.Condition();
233
45.8k
      float f_cost = h.ShannonEntropy();
234
45.8k
      size_t i_cost = f_cost;
235
45.8k
      histo_cost += i_cost;
236
45.8k
      histo_cost_frac += f_cost - i_cost;
237
45.8k
      h.Clear();
238
45.8k
    }
239
2.69k
  }
240
2.69k
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
241
2.69k
#endif
242
2.69k
  size_t total_cost =
243
2.69k
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
244
2.69k
  return total_cost;
245
2.69k
}
Unexecuted instantiation: jxl::N_SSE2::EstimateCost(jxl::Image const&)
246
247
// NOLINTNEXTLINE(google-readability-namespace-comments)
248
}  // namespace HWY_NAMESPACE
249
}  // namespace jxl
250
HWY_AFTER_NAMESPACE();
251
252
#if HWY_ONCE
253
namespace jxl {
254
255
HWY_EXPORT(EstimateCost);
256
257
2.69k
StatusOr<float> EstimateCost(const Image& img) {
258
2.69k
  return HWY_DYNAMIC_DISPATCH(EstimateCost)(img);
259
2.69k
}
260
261
namespace estimate_cost_detail {
262
/*
263
cutoffs = [0, 1, 3, 5, 7, 11, 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500]
264
ctx_map = [[c for c,v in enumerate(cutoffs) if v <= i][0] for i in range(501)]
265
*/
266
2.69k
const std::array<uint8_t, kLastThreshold>& ContextMap() {
267
2.69k
  static const std::array<uint8_t, kLastThreshold> kCtxMap = {
268
2.69k
      0,  1,  1,  2,  2,  3,  3,  4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,
269
2.69k
      6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
270
2.69k
      8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,
271
2.69k
      9,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10,
272
2.69k
      10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
273
2.69k
      10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
274
2.69k
      11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
275
2.69k
      11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
276
2.69k
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
277
2.69k
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
278
2.69k
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,
279
2.69k
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
280
2.69k
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
281
2.69k
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
282
2.69k
      13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
283
2.69k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
284
2.69k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
285
2.69k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
286
2.69k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
287
2.69k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
288
2.69k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
289
2.69k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
290
2.69k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
291
2.69k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
292
2.69k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
293
2.69k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
294
2.69k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
295
2.69k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16};
296
2.69k
  return kCtxMap;
297
2.69k
}
298
}  // namespace estimate_cost_detail
299
300
}  // namespace jxl
301
#endif