Coverage Report

Created: 2025-12-13 07:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_modular_simd.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_modular_simd.h"
7
8
#include <cstdint>
9
10
#include "lib/jxl/base/common.h"
11
#include "lib/jxl/base/status.h"
12
#include "lib/jxl/dec_ans.h"
13
#include "lib/jxl/enc_ans_params.h"
14
#include "lib/jxl/memory_manager_internal.h"
15
#include "lib/jxl/modular/modular_image.h"
16
17
#undef HWY_TARGET_INCLUDE
18
#define HWY_TARGET_INCLUDE "lib/jxl/enc_modular_simd.cc"
19
#include <hwy/foreach_target.h>
20
#include <hwy/highway.h>
21
22
#if HWY_TARGET == HWY_SCALAR
23
#include "lib/jxl/modular/encoding/context_predict.h"
24
#include "lib/jxl/pack_signed.h"
25
#endif
26
27
HWY_BEFORE_NAMESPACE();
28
namespace jxl {
29
namespace HWY_NAMESPACE {
30
31
// These templates are not found via ADL.
32
using hwy::HWY_NAMESPACE::Add;
33
using hwy::HWY_NAMESPACE::And;
34
using hwy::HWY_NAMESPACE::Ge;
35
using hwy::HWY_NAMESPACE::GetLane;
36
using hwy::HWY_NAMESPACE::Gt;
37
using hwy::HWY_NAMESPACE::IfThenElse;
38
using hwy::HWY_NAMESPACE::IfThenElseZero;
39
using hwy::HWY_NAMESPACE::Iota;
40
using hwy::HWY_NAMESPACE::Load;
41
using hwy::HWY_NAMESPACE::LoadU;
42
using hwy::HWY_NAMESPACE::Lt;
43
using hwy::HWY_NAMESPACE::Max;
44
using hwy::HWY_NAMESPACE::Min;
45
using hwy::HWY_NAMESPACE::Mul;
46
using hwy::HWY_NAMESPACE::Not;
47
using hwy::HWY_NAMESPACE::Set;
48
using hwy::HWY_NAMESPACE::ShiftLeft;
49
using hwy::HWY_NAMESPACE::ShiftRight;
50
using hwy::HWY_NAMESPACE::Store;
51
using hwy::HWY_NAMESPACE::StoreU;
52
using hwy::HWY_NAMESPACE::Sub;
53
using hwy::HWY_NAMESPACE::Xor;
54
using hwy::HWY_NAMESPACE::Zero;
55
56
877
StatusOr<float> EstimateCost(const Image& img) {
57
877
  size_t histo_cost = 0;
58
877
  float histo_cost_frac = 0.0f;
59
877
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const ptrdiff_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
877
  JxlMemoryManager* memory_manager = img.memory_manager();
100
877
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
877
  const HWY_FULL(int32_t) di;
102
877
  const HWY_FULL(uint32_t) du;
103
877
  const HWY_FULL(float) df;
104
877
  const auto kOne = Set(du, 1);
105
877
  const auto kSplit = Set(du, 16);
106
877
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
877
  const auto kTokenBias = Set(du, 8);
108
877
  const auto kTokenMul = Set(du, 4);
109
877
  const auto kMsbMask = Set(du, 3);
110
877
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
877
  const auto kLanes = Set(du, Lanes(du));
112
877
  const auto kIota = Iota(du, 0);
113
877
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
114
877
  constexpr size_t kLargeShiftVal = 10;
115
877
  const auto kLargeShift = Set(du, kLargeShiftVal);
116
117
877
  size_t max_w = 0;
118
877
  for (const Channel& ch : img.channel) {
119
877
    if (ch.h == 0) continue;
120
877
    max_w = std::max(max_w, ch.w);
121
877
  }
122
877
  max_w = RoundUpTo(max_w, Lanes(du));
123
877
  max_w = std::max(max_w, 2 * Lanes(du));
124
125
877
  JXL_ASSIGN_OR_RETURN(
126
877
      AlignedMemory buffer,
127
877
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
128
877
  uint32_t* max_diff_row = buffer.address<uint32_t>();
129
877
  uint32_t* token_row = max_diff_row + max_w;
130
877
  int32_t* primer = buffer.address<int32_t>();
131
877
  int32_t* top_primer = primer + max_w;
132
133
877
  HybridUintConfig config;
134
135
877
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
136
877
  auto extra_bits_lanes = Zero(du);
137
877
  for (const Channel& ch : img.channel) {
138
877
    if (ch.h == 0 || ch.w == 0) continue;
139
14.9k
    for (auto& h : histo) {
140
14.9k
      h.EnsureCapacity(32 * 4);
141
14.9k
    }
142
877
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
143
877
    const pixel_type* JXL_RESTRICT last = primer;
144
877
    primer[0] = 0;
145
877
    StoreU(Load(di, r), di, primer + 1);
146
877
    auto pos = kIota;
147
877
    const auto last_pos = Set(du, ch.w);
148
23.4k
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
149
22.6k
      const auto left = LoadU(di, last);
150
22.6k
      const auto central = Load(di, r + x);
151
22.6k
      const auto ures = BitCast(du, Sub(central, left));
152
22.6k
      const auto packed =
153
22.6k
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
154
22.6k
      const auto is_large = Gt(packed, kLargeThreshold);
155
22.6k
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
156
22.6k
      const auto not_literal = Ge(packed, kSplit);
157
22.6k
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
158
22.6k
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
159
22.6k
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
160
22.6k
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
161
22.6k
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
162
22.6k
                             And(ShiftRight<21>(v), kMsbMask));
163
22.6k
      const auto tail_mask = Lt(pos, last_pos);
164
22.6k
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
165
22.6k
      const auto token_fixed = IfThenElse(not_literal, token, packed);
166
22.6k
      extra_bits_lanes =
167
22.6k
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
168
22.6k
      Store(token_fixed, du, token_row + x);
169
22.6k
      pos = Add(pos, kLanes);
170
22.6k
      last = r + x + Lanes(di) - 1;
171
22.6k
    }
172
179k
    for (size_t x = 0; x < ch.w; x++) {
173
178k
      histo[0].FastAdd(token_row[x]);
174
178k
    }
175
177k
    for (size_t y = 1; y < ch.h; y++) {
176
176k
      r = ch.Row(y);
177
176k
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
178
176k
      last = primer;
179
176k
      primer[0] = t[0];
180
176k
      StoreU(Load(di, r), di, primer + 1);
181
176k
      top_primer[0] = t[0];
182
176k
      StoreU(Load(di, t), di, top_primer + 1);
183
176k
      const pixel_type* JXL_RESTRICT top_last = top_primer;
184
176k
      pos = kIota;
185
7.72M
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
186
7.54M
        const auto left = LoadU(di, last);
187
7.54M
        const auto central = Load(di, r + x);
188
7.54M
        const auto topleft = LoadU(di, top_last);
189
7.54M
        const auto top = Load(di, t + x);
190
7.54M
        const auto l_ge_t = Ge(left, top);
191
7.54M
        const auto m = IfThenElse(l_ge_t, top, left);
192
7.54M
        const auto M = IfThenElse(l_ge_t, left, top);
193
7.54M
        const auto maxx = Max(topleft, M);
194
7.54M
        const auto minn = Min(topleft, m);
195
7.54M
        const auto max_diff = BitCast(du, Sub(maxx, minn));
196
7.54M
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
197
7.54M
        const auto overshoot = Lt(topleft, m);
198
7.54M
        const auto undershoot = Gt(topleft, M);
199
7.54M
        const auto grad =
200
7.54M
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
201
7.54M
                            BitCast(du, topleft)));
202
7.54M
        const auto prediction =
203
7.54M
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
204
7.54M
        const auto ures = BitCast(du, Sub(central, prediction));
205
7.54M
        const auto packed =
206
7.54M
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
207
7.54M
        const auto is_large = Gt(packed, kLargeThreshold);
208
7.54M
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
209
7.54M
        const auto not_literal = Ge(packed, kSplit);
210
7.54M
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
211
7.54M
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
212
7.54M
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
213
7.54M
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
214
7.54M
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
215
7.54M
                               And(ShiftRight<21>(v), kMsbMask));
216
7.54M
        const auto tail_mask = Lt(pos, last_pos);
217
7.54M
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
218
7.54M
        const auto token_fixed = IfThenElse(not_literal, token, packed);
219
7.54M
        extra_bits_lanes =
220
7.54M
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
221
7.54M
        Store(token_fixed, du, token_row + x);
222
7.54M
        pos = Add(pos, kLanes);
223
7.54M
        last = r + x + Lanes(di) - 1;
224
7.54M
        top_last = t + x + Lanes(di) - 1;
225
7.54M
      }
226
59.9M
      for (size_t x = 0; x < ch.w; x++) {
227
59.7M
        size_t ctx = ctx_map[max_diff_row[x]];
228
59.7M
        histo[ctx].FastAdd(token_row[x]);
229
59.7M
      }
230
176k
    }
231
14.9k
    for (auto& h : histo) {
232
14.9k
      h.Condition();
233
14.9k
      float f_cost = h.ShannonEntropy();
234
14.9k
      size_t i_cost = f_cost;
235
14.9k
      histo_cost += i_cost;
236
14.9k
      histo_cost_frac += f_cost - i_cost;
237
14.9k
      h.Clear();
238
14.9k
    }
239
877
  }
240
877
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
241
877
#endif
242
877
  size_t total_cost =
243
877
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
244
877
  return total_cost;
245
877
}
Unexecuted instantiation: jxl::N_SSE4::EstimateCost(jxl::Image const&)
jxl::N_AVX2::EstimateCost(jxl::Image const&)
Line
Count
Source
56
877
StatusOr<float> EstimateCost(const Image& img) {
57
877
  size_t histo_cost = 0;
58
877
  float histo_cost_frac = 0.0f;
59
877
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const ptrdiff_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
877
  JxlMemoryManager* memory_manager = img.memory_manager();
100
877
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
877
  const HWY_FULL(int32_t) di;
102
877
  const HWY_FULL(uint32_t) du;
103
877
  const HWY_FULL(float) df;
104
877
  const auto kOne = Set(du, 1);
105
877
  const auto kSplit = Set(du, 16);
106
877
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
877
  const auto kTokenBias = Set(du, 8);
108
877
  const auto kTokenMul = Set(du, 4);
109
877
  const auto kMsbMask = Set(du, 3);
110
877
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
877
  const auto kLanes = Set(du, Lanes(du));
112
877
  const auto kIota = Iota(du, 0);
113
877
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
114
877
  constexpr size_t kLargeShiftVal = 10;
115
877
  const auto kLargeShift = Set(du, kLargeShiftVal);
116
117
877
  size_t max_w = 0;
118
877
  for (const Channel& ch : img.channel) {
119
877
    if (ch.h == 0) continue;
120
877
    max_w = std::max(max_w, ch.w);
121
877
  }
122
877
  max_w = RoundUpTo(max_w, Lanes(du));
123
877
  max_w = std::max(max_w, 2 * Lanes(du));
124
125
877
  JXL_ASSIGN_OR_RETURN(
126
877
      AlignedMemory buffer,
127
877
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
128
877
  uint32_t* max_diff_row = buffer.address<uint32_t>();
129
877
  uint32_t* token_row = max_diff_row + max_w;
130
877
  int32_t* primer = buffer.address<int32_t>();
131
877
  int32_t* top_primer = primer + max_w;
132
133
877
  HybridUintConfig config;
134
135
877
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
136
877
  auto extra_bits_lanes = Zero(du);
137
877
  for (const Channel& ch : img.channel) {
138
877
    if (ch.h == 0 || ch.w == 0) continue;
139
14.9k
    for (auto& h : histo) {
140
14.9k
      h.EnsureCapacity(32 * 4);
141
14.9k
    }
142
877
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
143
877
    const pixel_type* JXL_RESTRICT last = primer;
144
877
    primer[0] = 0;
145
877
    StoreU(Load(di, r), di, primer + 1);
146
877
    auto pos = kIota;
147
877
    const auto last_pos = Set(du, ch.w);
148
23.4k
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
149
22.6k
      const auto left = LoadU(di, last);
150
22.6k
      const auto central = Load(di, r + x);
151
22.6k
      const auto ures = BitCast(du, Sub(central, left));
152
22.6k
      const auto packed =
153
22.6k
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
154
22.6k
      const auto is_large = Gt(packed, kLargeThreshold);
155
22.6k
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
156
22.6k
      const auto not_literal = Ge(packed, kSplit);
157
22.6k
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
158
22.6k
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
159
22.6k
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
160
22.6k
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
161
22.6k
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
162
22.6k
                             And(ShiftRight<21>(v), kMsbMask));
163
22.6k
      const auto tail_mask = Lt(pos, last_pos);
164
22.6k
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
165
22.6k
      const auto token_fixed = IfThenElse(not_literal, token, packed);
166
22.6k
      extra_bits_lanes =
167
22.6k
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
168
22.6k
      Store(token_fixed, du, token_row + x);
169
22.6k
      pos = Add(pos, kLanes);
170
22.6k
      last = r + x + Lanes(di) - 1;
171
22.6k
    }
172
179k
    for (size_t x = 0; x < ch.w; x++) {
173
178k
      histo[0].FastAdd(token_row[x]);
174
178k
    }
175
177k
    for (size_t y = 1; y < ch.h; y++) {
176
176k
      r = ch.Row(y);
177
176k
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
178
176k
      last = primer;
179
176k
      primer[0] = t[0];
180
176k
      StoreU(Load(di, r), di, primer + 1);
181
176k
      top_primer[0] = t[0];
182
176k
      StoreU(Load(di, t), di, top_primer + 1);
183
176k
      const pixel_type* JXL_RESTRICT top_last = top_primer;
184
176k
      pos = kIota;
185
7.72M
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
186
7.54M
        const auto left = LoadU(di, last);
187
7.54M
        const auto central = Load(di, r + x);
188
7.54M
        const auto topleft = LoadU(di, top_last);
189
7.54M
        const auto top = Load(di, t + x);
190
7.54M
        const auto l_ge_t = Ge(left, top);
191
7.54M
        const auto m = IfThenElse(l_ge_t, top, left);
192
7.54M
        const auto M = IfThenElse(l_ge_t, left, top);
193
7.54M
        const auto maxx = Max(topleft, M);
194
7.54M
        const auto minn = Min(topleft, m);
195
7.54M
        const auto max_diff = BitCast(du, Sub(maxx, minn));
196
7.54M
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
197
7.54M
        const auto overshoot = Lt(topleft, m);
198
7.54M
        const auto undershoot = Gt(topleft, M);
199
7.54M
        const auto grad =
200
7.54M
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
201
7.54M
                            BitCast(du, topleft)));
202
7.54M
        const auto prediction =
203
7.54M
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
204
7.54M
        const auto ures = BitCast(du, Sub(central, prediction));
205
7.54M
        const auto packed =
206
7.54M
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
207
7.54M
        const auto is_large = Gt(packed, kLargeThreshold);
208
7.54M
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
209
7.54M
        const auto not_literal = Ge(packed, kSplit);
210
7.54M
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
211
7.54M
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
212
7.54M
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
213
7.54M
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
214
7.54M
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
215
7.54M
                               And(ShiftRight<21>(v), kMsbMask));
216
7.54M
        const auto tail_mask = Lt(pos, last_pos);
217
7.54M
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
218
7.54M
        const auto token_fixed = IfThenElse(not_literal, token, packed);
219
7.54M
        extra_bits_lanes =
220
7.54M
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
221
7.54M
        Store(token_fixed, du, token_row + x);
222
7.54M
        pos = Add(pos, kLanes);
223
7.54M
        last = r + x + Lanes(di) - 1;
224
7.54M
        top_last = t + x + Lanes(di) - 1;
225
7.54M
      }
226
59.9M
      for (size_t x = 0; x < ch.w; x++) {
227
59.7M
        size_t ctx = ctx_map[max_diff_row[x]];
228
59.7M
        histo[ctx].FastAdd(token_row[x]);
229
59.7M
      }
230
176k
    }
231
14.9k
    for (auto& h : histo) {
232
14.9k
      h.Condition();
233
14.9k
      float f_cost = h.ShannonEntropy();
234
14.9k
      size_t i_cost = f_cost;
235
14.9k
      histo_cost += i_cost;
236
14.9k
      histo_cost_frac += f_cost - i_cost;
237
14.9k
      h.Clear();
238
14.9k
    }
239
877
  }
240
877
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
241
877
#endif
242
877
  size_t total_cost =
243
877
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
244
877
  return total_cost;
245
877
}
Unexecuted instantiation: jxl::N_SSE2::EstimateCost(jxl::Image const&)
246
247
// NOLINTNEXTLINE(google-readability-namespace-comments)
248
}  // namespace HWY_NAMESPACE
249
}  // namespace jxl
250
HWY_AFTER_NAMESPACE();
251
252
#if HWY_ONCE
253
namespace jxl {
254
255
HWY_EXPORT(EstimateCost);
256
257
877
StatusOr<float> EstimateCost(const Image& img) {
258
877
  return HWY_DYNAMIC_DISPATCH(EstimateCost)(img);
259
877
}
260
261
namespace estimate_cost_detail {
262
/*
263
cutoffs = [0, 1, 3, 5, 7, 11, 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500]
264
ctx_map = [[c for c,v in enumerate(cutoffs) if v <= i][0] for i in range(501)]
265
*/
266
877
const std::array<uint8_t, kLastThreshold>& ContextMap() {
267
877
  static const std::array<uint8_t, kLastThreshold> kCtxMap = {
268
877
      0,  1,  1,  2,  2,  3,  3,  4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,
269
877
      6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
270
877
      8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,
271
877
      9,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10,
272
877
      10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
273
877
      10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
274
877
      11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
275
877
      11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
276
877
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
277
877
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
278
877
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,
279
877
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
280
877
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
281
877
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
282
877
      13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
283
877
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
284
877
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
285
877
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
286
877
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
287
877
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
288
877
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
289
877
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
290
877
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
291
877
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
292
877
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
293
877
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
294
877
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
295
877
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16};
296
877
  return kCtxMap;
297
877
}
298
}  // namespace estimate_cost_detail
299
300
}  // namespace jxl
301
#endif