Coverage Report

Created: 2025-11-14 07:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_modular_simd.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_modular_simd.h"
7
8
#include <cstdint>
9
10
#include "lib/jxl/base/common.h"
11
#include "lib/jxl/base/status.h"
12
#include "lib/jxl/dec_ans.h"
13
#include "lib/jxl/enc_ans_params.h"
14
#include "lib/jxl/memory_manager_internal.h"
15
#include "lib/jxl/modular/modular_image.h"
16
17
#undef HWY_TARGET_INCLUDE
18
#define HWY_TARGET_INCLUDE "lib/jxl/enc_modular_simd.cc"
19
#include <hwy/foreach_target.h>
20
#include <hwy/highway.h>
21
22
#if HWY_TARGET == HWY_SCALAR
23
#include "lib/jxl/modular/encoding/context_predict.h"
24
#include "lib/jxl/pack_signed.h"
25
#endif
26
27
HWY_BEFORE_NAMESPACE();
28
namespace jxl {
29
namespace HWY_NAMESPACE {
30
31
// These templates are not found via ADL.
32
using hwy::HWY_NAMESPACE::Add;
33
using hwy::HWY_NAMESPACE::And;
34
using hwy::HWY_NAMESPACE::Ge;
35
using hwy::HWY_NAMESPACE::GetLane;
36
using hwy::HWY_NAMESPACE::Gt;
37
using hwy::HWY_NAMESPACE::IfThenElse;
38
using hwy::HWY_NAMESPACE::IfThenElseZero;
39
using hwy::HWY_NAMESPACE::Iota;
40
using hwy::HWY_NAMESPACE::Load;
41
using hwy::HWY_NAMESPACE::LoadU;
42
using hwy::HWY_NAMESPACE::Lt;
43
using hwy::HWY_NAMESPACE::Max;
44
using hwy::HWY_NAMESPACE::Min;
45
using hwy::HWY_NAMESPACE::Mul;
46
using hwy::HWY_NAMESPACE::Not;
47
using hwy::HWY_NAMESPACE::Set;
48
using hwy::HWY_NAMESPACE::ShiftLeft;
49
using hwy::HWY_NAMESPACE::ShiftRight;
50
using hwy::HWY_NAMESPACE::Store;
51
using hwy::HWY_NAMESPACE::StoreU;
52
using hwy::HWY_NAMESPACE::Sub;
53
using hwy::HWY_NAMESPACE::Xor;
54
using hwy::HWY_NAMESPACE::Zero;
55
56
1.04k
StatusOr<float> EstimateCost(const Image& img) {
57
1.04k
  size_t histo_cost = 0;
58
1.04k
  float histo_cost_frac = 0.0f;
59
1.04k
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const ptrdiff_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
1.04k
  JxlMemoryManager* memory_manager = img.memory_manager();
100
1.04k
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
1.04k
  const HWY_FULL(int32_t) di;
102
1.04k
  const HWY_FULL(uint32_t) du;
103
1.04k
  const HWY_FULL(float) df;
104
1.04k
  const auto kOne = Set(du, 1);
105
1.04k
  const auto kSplit = Set(du, 16);
106
1.04k
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
1.04k
  const auto kTokenBias = Set(du, 8);
108
1.04k
  const auto kTokenMul = Set(du, 4);
109
1.04k
  const auto kMsbMask = Set(du, 3);
110
1.04k
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
1.04k
  const auto kLanes = Set(du, Lanes(du));
112
1.04k
  const auto kIota = Iota(du, 0);
113
1.04k
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
114
1.04k
  constexpr size_t kLargeShiftVal = 10;
115
1.04k
  const auto kLargeShift = Set(du, kLargeShiftVal);
116
117
1.04k
  size_t max_w = 0;
118
1.04k
  for (const Channel& ch : img.channel) {
119
1.04k
    if (ch.h == 0) continue;
120
1.04k
    max_w = std::max(max_w, ch.w);
121
1.04k
  }
122
1.04k
  max_w = RoundUpTo(max_w, Lanes(du));
123
1.04k
  max_w = std::max(max_w, 2 * Lanes(du));
124
125
1.04k
  JXL_ASSIGN_OR_RETURN(
126
1.04k
      AlignedMemory buffer,
127
1.04k
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
128
1.04k
  uint32_t* max_diff_row = buffer.address<uint32_t>();
129
1.04k
  uint32_t* token_row = max_diff_row + max_w;
130
1.04k
  int32_t* primer = buffer.address<int32_t>();
131
1.04k
  int32_t* top_primer = primer + max_w;
132
133
1.04k
  HybridUintConfig config;
134
135
1.04k
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
136
1.04k
  auto extra_bits_lanes = Zero(du);
137
1.04k
  for (const Channel& ch : img.channel) {
138
1.04k
    if (ch.h == 0 || ch.w == 0) continue;
139
17.8k
    for (auto& h : histo) {
140
17.8k
      h.EnsureCapacity(32 * 4);
141
17.8k
    }
142
1.04k
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
143
1.04k
    const pixel_type* JXL_RESTRICT last = primer;
144
1.04k
    primer[0] = 0;
145
1.04k
    StoreU(Load(di, r), di, primer + 1);
146
1.04k
    auto pos = kIota;
147
1.04k
    const auto last_pos = Set(du, ch.w);
148
26.4k
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
149
25.4k
      const auto left = LoadU(di, last);
150
25.4k
      const auto central = Load(di, r + x);
151
25.4k
      const auto ures = BitCast(du, Sub(central, left));
152
25.4k
      const auto packed =
153
25.4k
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
154
25.4k
      const auto is_large = Gt(packed, kLargeThreshold);
155
25.4k
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
156
25.4k
      const auto not_literal = Ge(packed, kSplit);
157
25.4k
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
158
25.4k
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
159
25.4k
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
160
25.4k
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
161
25.4k
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
162
25.4k
                             And(ShiftRight<21>(v), kMsbMask));
163
25.4k
      const auto tail_mask = Lt(pos, last_pos);
164
25.4k
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
165
25.4k
      const auto token_fixed = IfThenElse(not_literal, token, packed);
166
25.4k
      extra_bits_lanes =
167
25.4k
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
168
25.4k
      Store(token_fixed, du, token_row + x);
169
25.4k
      pos = Add(pos, kLanes);
170
25.4k
      last = r + x + Lanes(di) - 1;
171
25.4k
    }
172
201k
    for (size_t x = 0; x < ch.w; x++) {
173
200k
      histo[0].FastAdd(token_row[x]);
174
200k
    }
175
197k
    for (size_t y = 1; y < ch.h; y++) {
176
196k
      r = ch.Row(y);
177
196k
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
178
196k
      last = primer;
179
196k
      primer[0] = t[0];
180
196k
      StoreU(Load(di, r), di, primer + 1);
181
196k
      top_primer[0] = t[0];
182
196k
      StoreU(Load(di, t), di, top_primer + 1);
183
196k
      const pixel_type* JXL_RESTRICT top_last = top_primer;
184
196k
      pos = kIota;
185
8.77M
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
186
8.58M
        const auto left = LoadU(di, last);
187
8.58M
        const auto central = Load(di, r + x);
188
8.58M
        const auto topleft = LoadU(di, top_last);
189
8.58M
        const auto top = Load(di, t + x);
190
8.58M
        const auto l_ge_t = Ge(left, top);
191
8.58M
        const auto m = IfThenElse(l_ge_t, top, left);
192
8.58M
        const auto M = IfThenElse(l_ge_t, left, top);
193
8.58M
        const auto maxx = Max(topleft, M);
194
8.58M
        const auto minn = Min(topleft, m);
195
8.58M
        const auto max_diff = BitCast(du, Sub(maxx, minn));
196
8.58M
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
197
8.58M
        const auto overshoot = Lt(topleft, m);
198
8.58M
        const auto undershoot = Gt(topleft, M);
199
8.58M
        const auto grad =
200
8.58M
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
201
8.58M
                            BitCast(du, topleft)));
202
8.58M
        const auto prediction =
203
8.58M
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
204
8.58M
        const auto ures = BitCast(du, Sub(central, prediction));
205
8.58M
        const auto packed =
206
8.58M
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
207
8.58M
        const auto is_large = Gt(packed, kLargeThreshold);
208
8.58M
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
209
8.58M
        const auto not_literal = Ge(packed, kSplit);
210
8.58M
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
211
8.58M
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
212
8.58M
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
213
8.58M
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
214
8.58M
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
215
8.58M
                               And(ShiftRight<21>(v), kMsbMask));
216
8.58M
        const auto tail_mask = Lt(pos, last_pos);
217
8.58M
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
218
8.58M
        const auto token_fixed = IfThenElse(not_literal, token, packed);
219
8.58M
        extra_bits_lanes =
220
8.58M
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
221
8.58M
        Store(token_fixed, du, token_row + x);
222
8.58M
        pos = Add(pos, kLanes);
223
8.58M
        last = r + x + Lanes(di) - 1;
224
8.58M
        top_last = t + x + Lanes(di) - 1;
225
8.58M
      }
226
68.1M
      for (size_t x = 0; x < ch.w; x++) {
227
67.9M
        size_t ctx = ctx_map[max_diff_row[x]];
228
67.9M
        histo[ctx].FastAdd(token_row[x]);
229
67.9M
      }
230
196k
    }
231
17.8k
    for (auto& h : histo) {
232
17.8k
      h.Condition();
233
17.8k
      float f_cost = h.ShannonEntropy();
234
17.8k
      size_t i_cost = f_cost;
235
17.8k
      histo_cost += i_cost;
236
17.8k
      histo_cost_frac += f_cost - i_cost;
237
17.8k
      h.Clear();
238
17.8k
    }
239
1.04k
  }
240
1.04k
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
241
1.04k
#endif
242
1.04k
  size_t total_cost =
243
1.04k
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
244
1.04k
  return total_cost;
245
1.04k
}
Unexecuted instantiation: jxl::N_SSE4::EstimateCost(jxl::Image const&)
jxl::N_AVX2::EstimateCost(jxl::Image const&)
Line
Count
Source
56
1.04k
StatusOr<float> EstimateCost(const Image& img) {
57
1.04k
  size_t histo_cost = 0;
58
1.04k
  float histo_cost_frac = 0.0f;
59
1.04k
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const ptrdiff_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
1.04k
  JxlMemoryManager* memory_manager = img.memory_manager();
100
1.04k
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
1.04k
  const HWY_FULL(int32_t) di;
102
1.04k
  const HWY_FULL(uint32_t) du;
103
1.04k
  const HWY_FULL(float) df;
104
1.04k
  const auto kOne = Set(du, 1);
105
1.04k
  const auto kSplit = Set(du, 16);
106
1.04k
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
1.04k
  const auto kTokenBias = Set(du, 8);
108
1.04k
  const auto kTokenMul = Set(du, 4);
109
1.04k
  const auto kMsbMask = Set(du, 3);
110
1.04k
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
1.04k
  const auto kLanes = Set(du, Lanes(du));
112
1.04k
  const auto kIota = Iota(du, 0);
113
1.04k
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
114
1.04k
  constexpr size_t kLargeShiftVal = 10;
115
1.04k
  const auto kLargeShift = Set(du, kLargeShiftVal);
116
117
1.04k
  size_t max_w = 0;
118
1.04k
  for (const Channel& ch : img.channel) {
119
1.04k
    if (ch.h == 0) continue;
120
1.04k
    max_w = std::max(max_w, ch.w);
121
1.04k
  }
122
1.04k
  max_w = RoundUpTo(max_w, Lanes(du));
123
1.04k
  max_w = std::max(max_w, 2 * Lanes(du));
124
125
1.04k
  JXL_ASSIGN_OR_RETURN(
126
1.04k
      AlignedMemory buffer,
127
1.04k
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
128
1.04k
  uint32_t* max_diff_row = buffer.address<uint32_t>();
129
1.04k
  uint32_t* token_row = max_diff_row + max_w;
130
1.04k
  int32_t* primer = buffer.address<int32_t>();
131
1.04k
  int32_t* top_primer = primer + max_w;
132
133
1.04k
  HybridUintConfig config;
134
135
1.04k
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
136
1.04k
  auto extra_bits_lanes = Zero(du);
137
1.04k
  for (const Channel& ch : img.channel) {
138
1.04k
    if (ch.h == 0 || ch.w == 0) continue;
139
17.8k
    for (auto& h : histo) {
140
17.8k
      h.EnsureCapacity(32 * 4);
141
17.8k
    }
142
1.04k
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
143
1.04k
    const pixel_type* JXL_RESTRICT last = primer;
144
1.04k
    primer[0] = 0;
145
1.04k
    StoreU(Load(di, r), di, primer + 1);
146
1.04k
    auto pos = kIota;
147
1.04k
    const auto last_pos = Set(du, ch.w);
148
26.4k
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
149
25.4k
      const auto left = LoadU(di, last);
150
25.4k
      const auto central = Load(di, r + x);
151
25.4k
      const auto ures = BitCast(du, Sub(central, left));
152
25.4k
      const auto packed =
153
25.4k
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
154
25.4k
      const auto is_large = Gt(packed, kLargeThreshold);
155
25.4k
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
156
25.4k
      const auto not_literal = Ge(packed, kSplit);
157
25.4k
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
158
25.4k
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
159
25.4k
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
160
25.4k
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
161
25.4k
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
162
25.4k
                             And(ShiftRight<21>(v), kMsbMask));
163
25.4k
      const auto tail_mask = Lt(pos, last_pos);
164
25.4k
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
165
25.4k
      const auto token_fixed = IfThenElse(not_literal, token, packed);
166
25.4k
      extra_bits_lanes =
167
25.4k
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
168
25.4k
      Store(token_fixed, du, token_row + x);
169
25.4k
      pos = Add(pos, kLanes);
170
25.4k
      last = r + x + Lanes(di) - 1;
171
25.4k
    }
172
201k
    for (size_t x = 0; x < ch.w; x++) {
173
200k
      histo[0].FastAdd(token_row[x]);
174
200k
    }
175
197k
    for (size_t y = 1; y < ch.h; y++) {
176
196k
      r = ch.Row(y);
177
196k
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
178
196k
      last = primer;
179
196k
      primer[0] = t[0];
180
196k
      StoreU(Load(di, r), di, primer + 1);
181
196k
      top_primer[0] = t[0];
182
196k
      StoreU(Load(di, t), di, top_primer + 1);
183
196k
      const pixel_type* JXL_RESTRICT top_last = top_primer;
184
196k
      pos = kIota;
185
8.77M
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
186
8.58M
        const auto left = LoadU(di, last);
187
8.58M
        const auto central = Load(di, r + x);
188
8.58M
        const auto topleft = LoadU(di, top_last);
189
8.58M
        const auto top = Load(di, t + x);
190
8.58M
        const auto l_ge_t = Ge(left, top);
191
8.58M
        const auto m = IfThenElse(l_ge_t, top, left);
192
8.58M
        const auto M = IfThenElse(l_ge_t, left, top);
193
8.58M
        const auto maxx = Max(topleft, M);
194
8.58M
        const auto minn = Min(topleft, m);
195
8.58M
        const auto max_diff = BitCast(du, Sub(maxx, minn));
196
8.58M
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
197
8.58M
        const auto overshoot = Lt(topleft, m);
198
8.58M
        const auto undershoot = Gt(topleft, M);
199
8.58M
        const auto grad =
200
8.58M
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
201
8.58M
                            BitCast(du, topleft)));
202
8.58M
        const auto prediction =
203
8.58M
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
204
8.58M
        const auto ures = BitCast(du, Sub(central, prediction));
205
8.58M
        const auto packed =
206
8.58M
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
207
8.58M
        const auto is_large = Gt(packed, kLargeThreshold);
208
8.58M
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
209
8.58M
        const auto not_literal = Ge(packed, kSplit);
210
8.58M
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
211
8.58M
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
212
8.58M
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
213
8.58M
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
214
8.58M
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
215
8.58M
                               And(ShiftRight<21>(v), kMsbMask));
216
8.58M
        const auto tail_mask = Lt(pos, last_pos);
217
8.58M
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
218
8.58M
        const auto token_fixed = IfThenElse(not_literal, token, packed);
219
8.58M
        extra_bits_lanes =
220
8.58M
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
221
8.58M
        Store(token_fixed, du, token_row + x);
222
8.58M
        pos = Add(pos, kLanes);
223
8.58M
        last = r + x + Lanes(di) - 1;
224
8.58M
        top_last = t + x + Lanes(di) - 1;
225
8.58M
      }
226
68.1M
      for (size_t x = 0; x < ch.w; x++) {
227
67.9M
        size_t ctx = ctx_map[max_diff_row[x]];
228
67.9M
        histo[ctx].FastAdd(token_row[x]);
229
67.9M
      }
230
196k
    }
231
17.8k
    for (auto& h : histo) {
232
17.8k
      h.Condition();
233
17.8k
      float f_cost = h.ShannonEntropy();
234
17.8k
      size_t i_cost = f_cost;
235
17.8k
      histo_cost += i_cost;
236
17.8k
      histo_cost_frac += f_cost - i_cost;
237
17.8k
      h.Clear();
238
17.8k
    }
239
1.04k
  }
240
1.04k
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
241
1.04k
#endif
242
1.04k
  size_t total_cost =
243
1.04k
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
244
1.04k
  return total_cost;
245
1.04k
}
Unexecuted instantiation: jxl::N_SSE2::EstimateCost(jxl::Image const&)
246
247
// NOLINTNEXTLINE(google-readability-namespace-comments)
248
}  // namespace HWY_NAMESPACE
249
}  // namespace jxl
250
HWY_AFTER_NAMESPACE();
251
252
#if HWY_ONCE
253
namespace jxl {
254
255
HWY_EXPORT(EstimateCost);
256
257
1.04k
StatusOr<float> EstimateCost(const Image& img) {
258
1.04k
  return HWY_DYNAMIC_DISPATCH(EstimateCost)(img);
259
1.04k
}
260
261
namespace estimate_cost_detail {
262
/*
263
cutoffs = [0, 1, 3, 5, 7, 11, 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500]
264
ctx_map = [[c for c,v in enumerate(cutoffs) if v <= i][0] for i in range(501)]
265
*/
266
1.04k
const std::array<uint8_t, kLastThreshold>& ContextMap() {
267
1.04k
  static const std::array<uint8_t, kLastThreshold> kCtxMap = {
268
1.04k
      0,  1,  1,  2,  2,  3,  3,  4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,
269
1.04k
      6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
270
1.04k
      8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,
271
1.04k
      9,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10,
272
1.04k
      10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
273
1.04k
      10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
274
1.04k
      11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
275
1.04k
      11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
276
1.04k
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
277
1.04k
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
278
1.04k
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,
279
1.04k
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
280
1.04k
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
281
1.04k
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
282
1.04k
      13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
283
1.04k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
284
1.04k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
285
1.04k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
286
1.04k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
287
1.04k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
288
1.04k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
289
1.04k
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
290
1.04k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
291
1.04k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
292
1.04k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
293
1.04k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
294
1.04k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
295
1.04k
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16};
296
1.04k
  return kCtxMap;
297
1.04k
}
298
}  // namespace estimate_cost_detail
299
300
}  // namespace jxl
301
#endif