Coverage Report

Created: 2025-10-12 07:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_modular_simd.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_modular_simd.h"
7
8
#include <cstdint>
9
10
#include "lib/jxl/base/common.h"
11
#include "lib/jxl/base/status.h"
12
#include "lib/jxl/dec_ans.h"
13
#include "lib/jxl/enc_ans_params.h"
14
#include "lib/jxl/memory_manager_internal.h"
15
#include "lib/jxl/modular/modular_image.h"
16
17
#undef HWY_TARGET_INCLUDE
18
#define HWY_TARGET_INCLUDE "lib/jxl/enc_modular_simd.cc"
19
#include <hwy/foreach_target.h>
20
#include <hwy/highway.h>
21
22
#if HWY_TARGET == HWY_SCALAR
23
#include "lib/jxl/modular/encoding/context_predict.h"
24
#include "lib/jxl/pack_signed.h"
25
#endif
26
27
HWY_BEFORE_NAMESPACE();
28
namespace jxl {
29
namespace HWY_NAMESPACE {
30
31
// These templates are not found via ADL.
32
using hwy::HWY_NAMESPACE::Add;
33
using hwy::HWY_NAMESPACE::And;
34
using hwy::HWY_NAMESPACE::Ge;
35
using hwy::HWY_NAMESPACE::GetLane;
36
using hwy::HWY_NAMESPACE::Gt;
37
using hwy::HWY_NAMESPACE::IfThenElse;
38
using hwy::HWY_NAMESPACE::IfThenElseZero;
39
using hwy::HWY_NAMESPACE::Iota;
40
using hwy::HWY_NAMESPACE::Load;
41
using hwy::HWY_NAMESPACE::LoadU;
42
using hwy::HWY_NAMESPACE::Lt;
43
using hwy::HWY_NAMESPACE::Max;
44
using hwy::HWY_NAMESPACE::Min;
45
using hwy::HWY_NAMESPACE::Mul;
46
using hwy::HWY_NAMESPACE::Not;
47
using hwy::HWY_NAMESPACE::Set;
48
using hwy::HWY_NAMESPACE::ShiftLeft;
49
using hwy::HWY_NAMESPACE::ShiftRight;
50
using hwy::HWY_NAMESPACE::Store;
51
using hwy::HWY_NAMESPACE::StoreU;
52
using hwy::HWY_NAMESPACE::Sub;
53
using hwy::HWY_NAMESPACE::Xor;
54
using hwy::HWY_NAMESPACE::Zero;
55
56
922
StatusOr<float> EstimateCost(const Image& img) {
57
922
  size_t histo_cost = 0;
58
922
  float histo_cost_frac = 0.0f;
59
922
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const intptr_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
922
  JxlMemoryManager* memory_manager = img.memory_manager();
100
922
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
922
  const HWY_FULL(int32_t) di;
102
922
  const HWY_FULL(uint32_t) du;
103
922
  const HWY_FULL(float) df;
104
922
  const auto kOne = Set(du, 1);
105
922
  const auto kSplit = Set(du, 16);
106
922
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
922
  const auto kTokenBias = Set(du, 8);
108
922
  const auto kTokenMul = Set(du, 4);
109
922
  const auto kMsbMask = Set(du, 3);
110
922
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
922
  const auto kLanes = Set(du, Lanes(du));
112
922
  const auto kIota = Iota(du, 0);
113
922
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
114
922
  constexpr size_t kLargeShiftVal = 10;
115
922
  const auto kLargeShift = Set(du, kLargeShiftVal);
116
117
922
  size_t max_w = 0;
118
922
  for (const Channel& ch : img.channel) {
119
922
    if (ch.h == 0) continue;
120
922
    max_w = std::max(max_w, ch.w);
121
922
  }
122
922
  max_w = RoundUpTo(max_w, Lanes(du));
123
922
  max_w = std::max(max_w, 2 * Lanes(du));
124
125
922
  JXL_ASSIGN_OR_RETURN(
126
922
      AlignedMemory buffer,
127
922
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
128
922
  uint32_t* max_diff_row = buffer.address<uint32_t>();
129
922
  uint32_t* token_row = max_diff_row + max_w;
130
922
  int32_t* primer = buffer.address<int32_t>();
131
922
  int32_t* top_primer = primer + max_w;
132
133
922
  HybridUintConfig config;
134
135
922
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
136
922
  auto extra_bits_lanes = Zero(du);
137
922
  for (const Channel& ch : img.channel) {
138
922
    if (ch.h == 0 || ch.w == 0) continue;
139
15.6k
    for (auto& h : histo) {
140
15.6k
      h.EnsureCapacity(32 * 4);
141
15.6k
    }
142
922
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
143
922
    const pixel_type* JXL_RESTRICT last = primer;
144
922
    primer[0] = 0;
145
922
    StoreU(Load(di, r), di, primer + 1);
146
922
    auto pos = kIota;
147
922
    const auto last_pos = Set(du, ch.w);
148
23.2k
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
149
22.3k
      const auto left = LoadU(di, last);
150
22.3k
      const auto central = Load(di, r + x);
151
22.3k
      const auto ures = BitCast(du, Sub(central, left));
152
22.3k
      const auto packed =
153
22.3k
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
154
22.3k
      const auto is_large = Gt(packed, kLargeThreshold);
155
22.3k
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
156
22.3k
      const auto not_literal = Ge(packed, kSplit);
157
22.3k
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
158
22.3k
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
159
22.3k
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
160
22.3k
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
161
22.3k
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
162
22.3k
                             And(ShiftRight<21>(v), kMsbMask));
163
22.3k
      const auto tail_mask = Lt(pos, last_pos);
164
22.3k
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
165
22.3k
      const auto token_fixed = IfThenElse(not_literal, token, packed);
166
22.3k
      extra_bits_lanes =
167
22.3k
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
168
22.3k
      Store(token_fixed, du, token_row + x);
169
22.3k
      pos = Add(pos, kLanes);
170
22.3k
      last = r + x + Lanes(di) - 1;
171
22.3k
    }
172
176k
    for (size_t x = 0; x < ch.w; x++) {
173
175k
      histo[0].FastAdd(token_row[x]);
174
175k
    }
175
174k
    for (size_t y = 1; y < ch.h; y++) {
176
173k
      r = ch.Row(y);
177
173k
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
178
173k
      last = primer;
179
173k
      primer[0] = t[0];
180
173k
      StoreU(Load(di, r), di, primer + 1);
181
173k
      top_primer[0] = t[0];
182
173k
      StoreU(Load(di, t), di, top_primer + 1);
183
173k
      const pixel_type* JXL_RESTRICT top_last = top_primer;
184
173k
      pos = kIota;
185
7.35M
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
186
7.17M
        const auto left = LoadU(di, last);
187
7.17M
        const auto central = Load(di, r + x);
188
7.17M
        const auto topleft = LoadU(di, top_last);
189
7.17M
        const auto top = Load(di, t + x);
190
7.17M
        const auto l_ge_t = Ge(left, top);
191
7.17M
        const auto m = IfThenElse(l_ge_t, top, left);
192
7.17M
        const auto M = IfThenElse(l_ge_t, left, top);
193
7.17M
        const auto maxx = Max(topleft, M);
194
7.17M
        const auto minn = Min(topleft, m);
195
7.17M
        const auto max_diff = BitCast(du, Sub(maxx, minn));
196
7.17M
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
197
7.17M
        const auto overshoot = Lt(topleft, m);
198
7.17M
        const auto undershoot = Gt(topleft, M);
199
7.17M
        const auto grad =
200
7.17M
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
201
7.17M
                            BitCast(du, topleft)));
202
7.17M
        const auto prediction =
203
7.17M
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
204
7.17M
        const auto ures = BitCast(du, Sub(central, prediction));
205
7.17M
        const auto packed =
206
7.17M
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
207
7.17M
        const auto is_large = Gt(packed, kLargeThreshold);
208
7.17M
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
209
7.17M
        const auto not_literal = Ge(packed, kSplit);
210
7.17M
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
211
7.17M
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
212
7.17M
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
213
7.17M
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
214
7.17M
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
215
7.17M
                               And(ShiftRight<21>(v), kMsbMask));
216
7.17M
        const auto tail_mask = Lt(pos, last_pos);
217
7.17M
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
218
7.17M
        const auto token_fixed = IfThenElse(not_literal, token, packed);
219
7.17M
        extra_bits_lanes =
220
7.17M
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
221
7.17M
        Store(token_fixed, du, token_row + x);
222
7.17M
        pos = Add(pos, kLanes);
223
7.17M
        last = r + x + Lanes(di) - 1;
224
7.17M
        top_last = t + x + Lanes(di) - 1;
225
7.17M
      }
226
57.0M
      for (size_t x = 0; x < ch.w; x++) {
227
56.8M
        size_t ctx = ctx_map[max_diff_row[x]];
228
56.8M
        histo[ctx].FastAdd(token_row[x]);
229
56.8M
      }
230
173k
    }
231
15.6k
    for (auto& h : histo) {
232
15.6k
      h.Condition();
233
15.6k
      float f_cost = h.ShannonEntropy();
234
15.6k
      size_t i_cost = f_cost;
235
15.6k
      histo_cost += i_cost;
236
15.6k
      histo_cost_frac += f_cost - i_cost;
237
15.6k
      h.Clear();
238
15.6k
    }
239
922
  }
240
922
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
241
922
#endif
242
922
  size_t total_cost =
243
922
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
244
922
  return total_cost;
245
922
}
Unexecuted instantiation: jxl::N_SSE4::EstimateCost(jxl::Image const&)
jxl::N_AVX2::EstimateCost(jxl::Image const&)
Line
Count
Source
56
922
StatusOr<float> EstimateCost(const Image& img) {
57
922
  size_t histo_cost = 0;
58
922
  float histo_cost_frac = 0.0f;
59
922
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const intptr_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
922
  JxlMemoryManager* memory_manager = img.memory_manager();
100
922
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
922
  const HWY_FULL(int32_t) di;
102
922
  const HWY_FULL(uint32_t) du;
103
922
  const HWY_FULL(float) df;
104
922
  const auto kOne = Set(du, 1);
105
922
  const auto kSplit = Set(du, 16);
106
922
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
922
  const auto kTokenBias = Set(du, 8);
108
922
  const auto kTokenMul = Set(du, 4);
109
922
  const auto kMsbMask = Set(du, 3);
110
922
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
922
  const auto kLanes = Set(du, Lanes(du));
112
922
  const auto kIota = Iota(du, 0);
113
922
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
114
922
  constexpr size_t kLargeShiftVal = 10;
115
922
  const auto kLargeShift = Set(du, kLargeShiftVal);
116
117
922
  size_t max_w = 0;
118
922
  for (const Channel& ch : img.channel) {
119
922
    if (ch.h == 0) continue;
120
922
    max_w = std::max(max_w, ch.w);
121
922
  }
122
922
  max_w = RoundUpTo(max_w, Lanes(du));
123
922
  max_w = std::max(max_w, 2 * Lanes(du));
124
125
922
  JXL_ASSIGN_OR_RETURN(
126
922
      AlignedMemory buffer,
127
922
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
128
922
  uint32_t* max_diff_row = buffer.address<uint32_t>();
129
922
  uint32_t* token_row = max_diff_row + max_w;
130
922
  int32_t* primer = buffer.address<int32_t>();
131
922
  int32_t* top_primer = primer + max_w;
132
133
922
  HybridUintConfig config;
134
135
922
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
136
922
  auto extra_bits_lanes = Zero(du);
137
922
  for (const Channel& ch : img.channel) {
138
922
    if (ch.h == 0 || ch.w == 0) continue;
139
15.6k
    for (auto& h : histo) {
140
15.6k
      h.EnsureCapacity(32 * 4);
141
15.6k
    }
142
922
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
143
922
    const pixel_type* JXL_RESTRICT last = primer;
144
922
    primer[0] = 0;
145
922
    StoreU(Load(di, r), di, primer + 1);
146
922
    auto pos = kIota;
147
922
    const auto last_pos = Set(du, ch.w);
148
23.2k
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
149
22.3k
      const auto left = LoadU(di, last);
150
22.3k
      const auto central = Load(di, r + x);
151
22.3k
      const auto ures = BitCast(du, Sub(central, left));
152
22.3k
      const auto packed =
153
22.3k
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
154
22.3k
      const auto is_large = Gt(packed, kLargeThreshold);
155
22.3k
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
156
22.3k
      const auto not_literal = Ge(packed, kSplit);
157
22.3k
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
158
22.3k
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
159
22.3k
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
160
22.3k
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
161
22.3k
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
162
22.3k
                             And(ShiftRight<21>(v), kMsbMask));
163
22.3k
      const auto tail_mask = Lt(pos, last_pos);
164
22.3k
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
165
22.3k
      const auto token_fixed = IfThenElse(not_literal, token, packed);
166
22.3k
      extra_bits_lanes =
167
22.3k
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
168
22.3k
      Store(token_fixed, du, token_row + x);
169
22.3k
      pos = Add(pos, kLanes);
170
22.3k
      last = r + x + Lanes(di) - 1;
171
22.3k
    }
172
176k
    for (size_t x = 0; x < ch.w; x++) {
173
175k
      histo[0].FastAdd(token_row[x]);
174
175k
    }
175
174k
    for (size_t y = 1; y < ch.h; y++) {
176
173k
      r = ch.Row(y);
177
173k
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
178
173k
      last = primer;
179
173k
      primer[0] = t[0];
180
173k
      StoreU(Load(di, r), di, primer + 1);
181
173k
      top_primer[0] = t[0];
182
173k
      StoreU(Load(di, t), di, top_primer + 1);
183
173k
      const pixel_type* JXL_RESTRICT top_last = top_primer;
184
173k
      pos = kIota;
185
7.35M
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
186
7.17M
        const auto left = LoadU(di, last);
187
7.17M
        const auto central = Load(di, r + x);
188
7.17M
        const auto topleft = LoadU(di, top_last);
189
7.17M
        const auto top = Load(di, t + x);
190
7.17M
        const auto l_ge_t = Ge(left, top);
191
7.17M
        const auto m = IfThenElse(l_ge_t, top, left);
192
7.17M
        const auto M = IfThenElse(l_ge_t, left, top);
193
7.17M
        const auto maxx = Max(topleft, M);
194
7.17M
        const auto minn = Min(topleft, m);
195
7.17M
        const auto max_diff = BitCast(du, Sub(maxx, minn));
196
7.17M
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
197
7.17M
        const auto overshoot = Lt(topleft, m);
198
7.17M
        const auto undershoot = Gt(topleft, M);
199
7.17M
        const auto grad =
200
7.17M
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
201
7.17M
                            BitCast(du, topleft)));
202
7.17M
        const auto prediction =
203
7.17M
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
204
7.17M
        const auto ures = BitCast(du, Sub(central, prediction));
205
7.17M
        const auto packed =
206
7.17M
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
207
7.17M
        const auto is_large = Gt(packed, kLargeThreshold);
208
7.17M
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
209
7.17M
        const auto not_literal = Ge(packed, kSplit);
210
7.17M
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
211
7.17M
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
212
7.17M
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
213
7.17M
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
214
7.17M
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
215
7.17M
                               And(ShiftRight<21>(v), kMsbMask));
216
7.17M
        const auto tail_mask = Lt(pos, last_pos);
217
7.17M
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
218
7.17M
        const auto token_fixed = IfThenElse(not_literal, token, packed);
219
7.17M
        extra_bits_lanes =
220
7.17M
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
221
7.17M
        Store(token_fixed, du, token_row + x);
222
7.17M
        pos = Add(pos, kLanes);
223
7.17M
        last = r + x + Lanes(di) - 1;
224
7.17M
        top_last = t + x + Lanes(di) - 1;
225
7.17M
      }
226
57.0M
      for (size_t x = 0; x < ch.w; x++) {
227
56.8M
        size_t ctx = ctx_map[max_diff_row[x]];
228
56.8M
        histo[ctx].FastAdd(token_row[x]);
229
56.8M
      }
230
173k
    }
231
15.6k
    for (auto& h : histo) {
232
15.6k
      h.Condition();
233
15.6k
      float f_cost = h.ShannonEntropy();
234
15.6k
      size_t i_cost = f_cost;
235
15.6k
      histo_cost += i_cost;
236
15.6k
      histo_cost_frac += f_cost - i_cost;
237
15.6k
      h.Clear();
238
15.6k
    }
239
922
  }
240
922
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
241
922
#endif
242
922
  size_t total_cost =
243
922
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
244
922
  return total_cost;
245
922
}
Unexecuted instantiation: jxl::N_SSE2::EstimateCost(jxl::Image const&)
246
247
// NOLINTNEXTLINE(google-readability-namespace-comments)
248
}  // namespace HWY_NAMESPACE
249
}  // namespace jxl
250
HWY_AFTER_NAMESPACE();
251
252
#if HWY_ONCE
253
namespace jxl {
254
255
HWY_EXPORT(EstimateCost);
256
257
922
StatusOr<float> EstimateCost(const Image& img) {
258
922
  return HWY_DYNAMIC_DISPATCH(EstimateCost)(img);
259
922
}
260
261
namespace estimate_cost_detail {
262
/*
263
cutoffs = [0, 1, 3, 5, 7, 11, 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500]
264
ctx_map = [[c for c,v in enumerate(cutoffs) if v <= i][0] for i in range(501)]
265
*/
266
922
const std::array<uint8_t, kLastThreshold>& ContextMap() {
267
922
  static const std::array<uint8_t, kLastThreshold> kCtxMap = {
268
922
      0,  1,  1,  2,  2,  3,  3,  4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,
269
922
      6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
270
922
      8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,
271
922
      9,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10,
272
922
      10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
273
922
      10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
274
922
      11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
275
922
      11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
276
922
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
277
922
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
278
922
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,
279
922
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
280
922
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
281
922
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
282
922
      13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
283
922
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
284
922
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
285
922
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
286
922
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
287
922
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
288
922
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
289
922
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
290
922
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
291
922
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
292
922
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
293
922
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
294
922
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
295
922
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16};
296
922
  return kCtxMap;
297
922
}
298
}  // namespace estimate_cost_detail
299
300
}  // namespace jxl
301
#endif