Coverage Report

Created: 2026-02-14 07:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_modular_simd.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_modular_simd.h"
7
8
#include <cstdint>
9
10
#include "lib/jxl/base/common.h"
11
#include "lib/jxl/base/status.h"
12
#include "lib/jxl/dec_ans.h"
13
#include "lib/jxl/enc_ans_params.h"
14
#include "lib/jxl/memory_manager_internal.h"
15
#include "lib/jxl/modular/modular_image.h"
16
17
#undef HWY_TARGET_INCLUDE
18
#define HWY_TARGET_INCLUDE "lib/jxl/enc_modular_simd.cc"
19
#include <hwy/foreach_target.h>
20
#include <hwy/highway.h>
21
22
#if HWY_TARGET == HWY_SCALAR
23
#include "lib/jxl/modular/encoding/context_predict.h"
24
#include "lib/jxl/pack_signed.h"
25
#endif
26
27
HWY_BEFORE_NAMESPACE();
28
namespace jxl {
29
namespace HWY_NAMESPACE {
30
31
// These templates are not found via ADL.
32
using hwy::HWY_NAMESPACE::Add;
33
using hwy::HWY_NAMESPACE::And;
34
using hwy::HWY_NAMESPACE::Ge;
35
using hwy::HWY_NAMESPACE::GetLane;
36
using hwy::HWY_NAMESPACE::Gt;
37
using hwy::HWY_NAMESPACE::IfThenElse;
38
using hwy::HWY_NAMESPACE::IfThenElseZero;
39
using hwy::HWY_NAMESPACE::Iota;
40
using hwy::HWY_NAMESPACE::Load;
41
using hwy::HWY_NAMESPACE::LoadU;
42
using hwy::HWY_NAMESPACE::Lt;
43
using hwy::HWY_NAMESPACE::Max;
44
using hwy::HWY_NAMESPACE::Min;
45
using hwy::HWY_NAMESPACE::Mul;
46
using hwy::HWY_NAMESPACE::Not;
47
using hwy::HWY_NAMESPACE::Set;
48
using hwy::HWY_NAMESPACE::ShiftLeft;
49
using hwy::HWY_NAMESPACE::ShiftRight;
50
using hwy::HWY_NAMESPACE::Store;
51
using hwy::HWY_NAMESPACE::StoreU;
52
using hwy::HWY_NAMESPACE::Sub;
53
using hwy::HWY_NAMESPACE::Xor;
54
using hwy::HWY_NAMESPACE::Zero;
55
56
876
StatusOr<float> EstimateCost(const Image& img) {
57
876
  size_t histo_cost = 0;
58
876
  float histo_cost_frac = 0.0f;
59
876
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const ptrdiff_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
876
  JxlMemoryManager* memory_manager = img.memory_manager();
100
876
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
876
  const HWY_FULL(int32_t) di;
102
876
  const HWY_FULL(uint32_t) du;
103
876
  const HWY_FULL(float) df;
104
876
  const auto kOne = Set(du, 1);
105
876
  const auto kSplit = Set(du, 16);
106
876
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
876
  const auto kTokenBias = Set(du, 8);
108
876
  const auto kTokenMul = Set(du, 4);
109
876
  const auto kMsbMask = Set(du, 3);
110
876
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
876
  const auto kLanes = Set(du, Lanes(du));
112
876
  const auto kIota = Iota(du, 0);
113
876
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
114
876
  constexpr size_t kLargeShiftVal = 10;
115
876
  const auto kLargeShift = Set(du, kLargeShiftVal);
116
117
876
  size_t max_w = 0;
118
876
  for (const Channel& ch : img.channel) {
119
876
    if (ch.h == 0) continue;
120
876
    max_w = std::max(max_w, ch.w);
121
876
  }
122
876
  max_w = RoundUpTo(max_w, Lanes(du));
123
876
  max_w = std::max(max_w, 2 * Lanes(du));
124
125
876
  JXL_ASSIGN_OR_RETURN(
126
876
      AlignedMemory buffer,
127
876
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
128
876
  uint32_t* max_diff_row = buffer.address<uint32_t>();
129
876
  uint32_t* token_row = max_diff_row + max_w;
130
876
  int32_t* primer = buffer.address<int32_t>();
131
876
  int32_t* top_primer = primer + max_w;
132
133
876
  HybridUintConfig config;
134
135
876
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
136
876
  auto extra_bits_lanes = Zero(du);
137
876
  for (const Channel& ch : img.channel) {
138
876
    if (ch.h == 0 || ch.w == 0) continue;
139
14.8k
    for (auto& h : histo) {
140
14.8k
      h.EnsureCapacity(32 * 4);
141
14.8k
    }
142
876
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
143
876
    const pixel_type* JXL_RESTRICT last = primer;
144
876
    primer[0] = 0;
145
876
    StoreU(Load(di, r), di, primer + 1);
146
876
    auto pos = kIota;
147
876
    const auto last_pos = Set(du, ch.w);
148
24.1k
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
149
23.2k
      const auto left = LoadU(di, last);
150
23.2k
      const auto central = Load(di, r + x);
151
23.2k
      const auto ures = BitCast(du, Sub(central, left));
152
23.2k
      const auto packed =
153
23.2k
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
154
23.2k
      const auto is_large = Gt(packed, kLargeThreshold);
155
23.2k
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
156
23.2k
      const auto not_literal = Ge(packed, kSplit);
157
23.2k
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
158
23.2k
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
159
23.2k
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
160
23.2k
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
161
23.2k
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
162
23.2k
                             And(ShiftRight<21>(v), kMsbMask));
163
23.2k
      const auto tail_mask = Lt(pos, last_pos);
164
23.2k
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
165
23.2k
      const auto token_fixed = IfThenElse(not_literal, token, packed);
166
23.2k
      extra_bits_lanes =
167
23.2k
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
168
23.2k
      Store(token_fixed, du, token_row + x);
169
23.2k
      pos = Add(pos, kLanes);
170
23.2k
      last = r + x + Lanes(di) - 1;
171
23.2k
    }
172
184k
    for (size_t x = 0; x < ch.w; x++) {
173
183k
      histo[0].FastAdd(token_row[x]);
174
183k
    }
175
179k
    for (size_t y = 1; y < ch.h; y++) {
176
179k
      r = ch.Row(y);
177
179k
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
178
179k
      last = primer;
179
179k
      primer[0] = t[0];
180
179k
      StoreU(Load(di, r), di, primer + 1);
181
179k
      top_primer[0] = t[0];
182
179k
      StoreU(Load(di, t), di, top_primer + 1);
183
179k
      const pixel_type* JXL_RESTRICT top_last = top_primer;
184
179k
      pos = kIota;
185
8.21M
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
186
8.03M
        const auto left = LoadU(di, last);
187
8.03M
        const auto central = Load(di, r + x);
188
8.03M
        const auto topleft = LoadU(di, top_last);
189
8.03M
        const auto top = Load(di, t + x);
190
8.03M
        const auto l_ge_t = Ge(left, top);
191
8.03M
        const auto m = IfThenElse(l_ge_t, top, left);
192
8.03M
        const auto M = IfThenElse(l_ge_t, left, top);
193
8.03M
        const auto maxx = Max(topleft, M);
194
8.03M
        const auto minn = Min(topleft, m);
195
8.03M
        const auto max_diff = BitCast(du, Sub(maxx, minn));
196
8.03M
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
197
8.03M
        const auto overshoot = Lt(topleft, m);
198
8.03M
        const auto undershoot = Gt(topleft, M);
199
8.03M
        const auto grad =
200
8.03M
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
201
8.03M
                            BitCast(du, topleft)));
202
8.03M
        const auto prediction =
203
8.03M
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
204
8.03M
        const auto ures = BitCast(du, Sub(central, prediction));
205
8.03M
        const auto packed =
206
8.03M
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
207
8.03M
        const auto is_large = Gt(packed, kLargeThreshold);
208
8.03M
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
209
8.03M
        const auto not_literal = Ge(packed, kSplit);
210
8.03M
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
211
8.03M
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
212
8.03M
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
213
8.03M
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
214
8.03M
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
215
8.03M
                               And(ShiftRight<21>(v), kMsbMask));
216
8.03M
        const auto tail_mask = Lt(pos, last_pos);
217
8.03M
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
218
8.03M
        const auto token_fixed = IfThenElse(not_literal, token, packed);
219
8.03M
        extra_bits_lanes =
220
8.03M
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
221
8.03M
        Store(token_fixed, du, token_row + x);
222
8.03M
        pos = Add(pos, kLanes);
223
8.03M
        last = r + x + Lanes(di) - 1;
224
8.03M
        top_last = t + x + Lanes(di) - 1;
225
8.03M
      }
226
63.8M
      for (size_t x = 0; x < ch.w; x++) {
227
63.6M
        size_t ctx = ctx_map[max_diff_row[x]];
228
63.6M
        histo[ctx].FastAdd(token_row[x]);
229
63.6M
      }
230
179k
    }
231
14.8k
    for (auto& h : histo) {
232
14.8k
      h.Condition();
233
14.8k
      float f_cost = h.ShannonEntropy();
234
14.8k
      size_t i_cost = f_cost;
235
14.8k
      histo_cost += i_cost;
236
14.8k
      histo_cost_frac += f_cost - i_cost;
237
14.8k
      h.Clear();
238
14.8k
    }
239
876
  }
240
876
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
241
876
#endif
242
876
  size_t total_cost =
243
876
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
244
876
  return total_cost;
245
876
}
Unexecuted instantiation: jxl::N_SSE4::EstimateCost(jxl::Image const&)
jxl::N_AVX2::EstimateCost(jxl::Image const&)
Line
Count
Source
56
876
StatusOr<float> EstimateCost(const Image& img) {
57
876
  size_t histo_cost = 0;
58
876
  float histo_cost_frac = 0.0f;
59
876
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const ptrdiff_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
876
  JxlMemoryManager* memory_manager = img.memory_manager();
100
876
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
876
  const HWY_FULL(int32_t) di;
102
876
  const HWY_FULL(uint32_t) du;
103
876
  const HWY_FULL(float) df;
104
876
  const auto kOne = Set(du, 1);
105
876
  const auto kSplit = Set(du, 16);
106
876
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
876
  const auto kTokenBias = Set(du, 8);
108
876
  const auto kTokenMul = Set(du, 4);
109
876
  const auto kMsbMask = Set(du, 3);
110
876
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
876
  const auto kLanes = Set(du, Lanes(du));
112
876
  const auto kIota = Iota(du, 0);
113
876
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
114
876
  constexpr size_t kLargeShiftVal = 10;
115
876
  const auto kLargeShift = Set(du, kLargeShiftVal);
116
117
876
  size_t max_w = 0;
118
876
  for (const Channel& ch : img.channel) {
119
876
    if (ch.h == 0) continue;
120
876
    max_w = std::max(max_w, ch.w);
121
876
  }
122
876
  max_w = RoundUpTo(max_w, Lanes(du));
123
876
  max_w = std::max(max_w, 2 * Lanes(du));
124
125
876
  JXL_ASSIGN_OR_RETURN(
126
876
      AlignedMemory buffer,
127
876
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
128
876
  uint32_t* max_diff_row = buffer.address<uint32_t>();
129
876
  uint32_t* token_row = max_diff_row + max_w;
130
876
  int32_t* primer = buffer.address<int32_t>();
131
876
  int32_t* top_primer = primer + max_w;
132
133
876
  HybridUintConfig config;
134
135
876
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
136
876
  auto extra_bits_lanes = Zero(du);
137
876
  for (const Channel& ch : img.channel) {
138
876
    if (ch.h == 0 || ch.w == 0) continue;
139
14.8k
    for (auto& h : histo) {
140
14.8k
      h.EnsureCapacity(32 * 4);
141
14.8k
    }
142
876
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
143
876
    const pixel_type* JXL_RESTRICT last = primer;
144
876
    primer[0] = 0;
145
876
    StoreU(Load(di, r), di, primer + 1);
146
876
    auto pos = kIota;
147
876
    const auto last_pos = Set(du, ch.w);
148
24.1k
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
149
23.2k
      const auto left = LoadU(di, last);
150
23.2k
      const auto central = Load(di, r + x);
151
23.2k
      const auto ures = BitCast(du, Sub(central, left));
152
23.2k
      const auto packed =
153
23.2k
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
154
23.2k
      const auto is_large = Gt(packed, kLargeThreshold);
155
23.2k
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
156
23.2k
      const auto not_literal = Ge(packed, kSplit);
157
23.2k
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
158
23.2k
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
159
23.2k
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
160
23.2k
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
161
23.2k
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
162
23.2k
                             And(ShiftRight<21>(v), kMsbMask));
163
23.2k
      const auto tail_mask = Lt(pos, last_pos);
164
23.2k
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
165
23.2k
      const auto token_fixed = IfThenElse(not_literal, token, packed);
166
23.2k
      extra_bits_lanes =
167
23.2k
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
168
23.2k
      Store(token_fixed, du, token_row + x);
169
23.2k
      pos = Add(pos, kLanes);
170
23.2k
      last = r + x + Lanes(di) - 1;
171
23.2k
    }
172
184k
    for (size_t x = 0; x < ch.w; x++) {
173
183k
      histo[0].FastAdd(token_row[x]);
174
183k
    }
175
179k
    for (size_t y = 1; y < ch.h; y++) {
176
179k
      r = ch.Row(y);
177
179k
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
178
179k
      last = primer;
179
179k
      primer[0] = t[0];
180
179k
      StoreU(Load(di, r), di, primer + 1);
181
179k
      top_primer[0] = t[0];
182
179k
      StoreU(Load(di, t), di, top_primer + 1);
183
179k
      const pixel_type* JXL_RESTRICT top_last = top_primer;
184
179k
      pos = kIota;
185
8.21M
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
186
8.03M
        const auto left = LoadU(di, last);
187
8.03M
        const auto central = Load(di, r + x);
188
8.03M
        const auto topleft = LoadU(di, top_last);
189
8.03M
        const auto top = Load(di, t + x);
190
8.03M
        const auto l_ge_t = Ge(left, top);
191
8.03M
        const auto m = IfThenElse(l_ge_t, top, left);
192
8.03M
        const auto M = IfThenElse(l_ge_t, left, top);
193
8.03M
        const auto maxx = Max(topleft, M);
194
8.03M
        const auto minn = Min(topleft, m);
195
8.03M
        const auto max_diff = BitCast(du, Sub(maxx, minn));
196
8.03M
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
197
8.03M
        const auto overshoot = Lt(topleft, m);
198
8.03M
        const auto undershoot = Gt(topleft, M);
199
8.03M
        const auto grad =
200
8.03M
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
201
8.03M
                            BitCast(du, topleft)));
202
8.03M
        const auto prediction =
203
8.03M
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
204
8.03M
        const auto ures = BitCast(du, Sub(central, prediction));
205
8.03M
        const auto packed =
206
8.03M
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
207
8.03M
        const auto is_large = Gt(packed, kLargeThreshold);
208
8.03M
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
209
8.03M
        const auto not_literal = Ge(packed, kSplit);
210
8.03M
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
211
8.03M
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
212
8.03M
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
213
8.03M
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
214
8.03M
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
215
8.03M
                               And(ShiftRight<21>(v), kMsbMask));
216
8.03M
        const auto tail_mask = Lt(pos, last_pos);
217
8.03M
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
218
8.03M
        const auto token_fixed = IfThenElse(not_literal, token, packed);
219
8.03M
        extra_bits_lanes =
220
8.03M
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
221
8.03M
        Store(token_fixed, du, token_row + x);
222
8.03M
        pos = Add(pos, kLanes);
223
8.03M
        last = r + x + Lanes(di) - 1;
224
8.03M
        top_last = t + x + Lanes(di) - 1;
225
8.03M
      }
226
63.8M
      for (size_t x = 0; x < ch.w; x++) {
227
63.6M
        size_t ctx = ctx_map[max_diff_row[x]];
228
63.6M
        histo[ctx].FastAdd(token_row[x]);
229
63.6M
      }
230
179k
    }
231
14.8k
    for (auto& h : histo) {
232
14.8k
      h.Condition();
233
14.8k
      float f_cost = h.ShannonEntropy();
234
14.8k
      size_t i_cost = f_cost;
235
14.8k
      histo_cost += i_cost;
236
14.8k
      histo_cost_frac += f_cost - i_cost;
237
14.8k
      h.Clear();
238
14.8k
    }
239
876
  }
240
876
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
241
876
#endif
242
876
  size_t total_cost =
243
876
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
244
876
  return total_cost;
245
876
}
Unexecuted instantiation: jxl::N_SSE2::EstimateCost(jxl::Image const&)
246
247
// NOLINTNEXTLINE(google-readability-namespace-comments)
248
}  // namespace HWY_NAMESPACE
249
}  // namespace jxl
250
HWY_AFTER_NAMESPACE();
251
252
#if HWY_ONCE
253
namespace jxl {
254
255
HWY_EXPORT(EstimateCost);
256
257
876
StatusOr<float> EstimateCost(const Image& img) {
258
876
  return HWY_DYNAMIC_DISPATCH(EstimateCost)(img);
259
876
}
260
261
namespace estimate_cost_detail {
262
/*
263
cutoffs = [0, 1, 3, 5, 7, 11, 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500]
264
ctx_map = [[c for c,v in enumerate(cutoffs) if v <= i][0] for i in range(501)]
265
*/
266
876
const std::array<uint8_t, kLastThreshold>& ContextMap() {
267
876
  static const std::array<uint8_t, kLastThreshold> kCtxMap = {
268
876
      0,  1,  1,  2,  2,  3,  3,  4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,
269
876
      6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
270
876
      8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,
271
876
      9,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10,
272
876
      10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
273
876
      10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
274
876
      11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
275
876
      11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
276
876
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
277
876
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
278
876
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,
279
876
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
280
876
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
281
876
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
282
876
      13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
283
876
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
284
876
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
285
876
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
286
876
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
287
876
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
288
876
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
289
876
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
290
876
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
291
876
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
292
876
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
293
876
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
294
876
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
295
876
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16};
296
876
  return kCtxMap;
297
876
}
298
}  // namespace estimate_cost_detail
299
300
}  // namespace jxl
301
#endif