Coverage Report

Created: 2025-08-12 07:37

/src/libjxl/lib/jxl/enc_modular_simd.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_modular_simd.h"
7
8
#include <cstdint>
9
10
#include "lib/jxl/base/common.h"
11
#include "lib/jxl/base/status.h"
12
#include "lib/jxl/dec_ans.h"
13
#include "lib/jxl/enc_ans_params.h"
14
#include "lib/jxl/memory_manager_internal.h"
15
#include "lib/jxl/modular/modular_image.h"
16
17
#undef HWY_TARGET_INCLUDE
18
#define HWY_TARGET_INCLUDE "lib/jxl/enc_modular_simd.cc"
19
#include <hwy/foreach_target.h>
20
#include <hwy/highway.h>
21
22
#if HWY_TARGET == HWY_SCALAR
23
#include "lib/jxl/modular/encoding/context_predict.h"
24
#include "lib/jxl/pack_signed.h"
25
#endif
26
27
HWY_BEFORE_NAMESPACE();
28
namespace jxl {
29
namespace HWY_NAMESPACE {
30
31
// These templates are not found via ADL.
32
using hwy::HWY_NAMESPACE::Add;
33
using hwy::HWY_NAMESPACE::And;
34
using hwy::HWY_NAMESPACE::Ge;
35
using hwy::HWY_NAMESPACE::GetLane;
36
using hwy::HWY_NAMESPACE::Gt;
37
using hwy::HWY_NAMESPACE::IfThenElse;
38
using hwy::HWY_NAMESPACE::IfThenElseZero;
39
using hwy::HWY_NAMESPACE::Iota;
40
using hwy::HWY_NAMESPACE::Load;
41
using hwy::HWY_NAMESPACE::LoadU;
42
using hwy::HWY_NAMESPACE::Lt;
43
using hwy::HWY_NAMESPACE::Max;
44
using hwy::HWY_NAMESPACE::Min;
45
using hwy::HWY_NAMESPACE::Mul;
46
using hwy::HWY_NAMESPACE::Not;
47
using hwy::HWY_NAMESPACE::Set;
48
using hwy::HWY_NAMESPACE::ShiftLeft;
49
using hwy::HWY_NAMESPACE::ShiftRight;
50
using hwy::HWY_NAMESPACE::Store;
51
using hwy::HWY_NAMESPACE::StoreU;
52
using hwy::HWY_NAMESPACE::Sub;
53
using hwy::HWY_NAMESPACE::Xor;
54
using hwy::HWY_NAMESPACE::Zero;
55
56
11
StatusOr<float> EstimateCost(const Image& img) {
57
11
  size_t histo_cost = 0;
58
11
  float histo_cost_frac = 0.0f;
59
11
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const intptr_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
11
  JxlMemoryManager* memory_manager = img.memory_manager();
100
11
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
11
  const HWY_FULL(int32_t) di;
102
11
  const HWY_FULL(uint32_t) du;
103
11
  const HWY_FULL(float) df;
104
11
  const auto kOne = Set(du, 1);
105
11
  const auto kSplit = Set(du, 16);
106
11
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
11
  const auto kTokenBias = Set(du, 8);
108
11
  const auto kTokenMul = Set(du, 4);
109
11
  const auto kMsbMask = Set(du, 3);
110
11
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
11
  const auto kLanes = Set(du, Lanes(du));
112
11
  const auto kIota = Iota(du, 0);
113
11
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
114
11
  constexpr size_t kLargeShiftVal = 10;
115
11
  const auto kLargeShift = Set(du, kLargeShiftVal);
116
117
11
  size_t max_w = 0;
118
11
  for (const Channel& ch : img.channel) {
119
11
    if (ch.h == 0) continue;
120
11
    max_w = std::max(max_w, ch.w);
121
11
  }
122
11
  max_w = RoundUpTo(max_w, Lanes(du));
123
11
  max_w = std::max(max_w, 2 * Lanes(du));
124
125
11
  JXL_ASSIGN_OR_RETURN(
126
11
      AlignedMemory buffer,
127
11
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
128
11
  uint32_t* max_diff_row = buffer.address<uint32_t>();
129
11
  uint32_t* token_row = max_diff_row + max_w;
130
11
  int32_t* primer = buffer.address<int32_t>();
131
11
  int32_t* top_primer = primer + max_w;
132
133
11
  HybridUintConfig config;
134
135
11
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
136
11
  auto extra_bits_lanes = Zero(du);
137
11
  for (const Channel& ch : img.channel) {
138
11
    if (ch.h == 0 || ch.w == 0) continue;
139
187
    for (auto& h : histo) {
140
187
      h.EnsureCapacity(32 * 4);
141
187
    }
142
11
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
143
11
    const pixel_type* JXL_RESTRICT last = primer;
144
11
    primer[0] = 0;
145
11
    StoreU(Load(di, r), di, primer + 1);
146
11
    auto pos = kIota;
147
11
    const auto last_pos = Set(du, ch.w);
148
122
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
149
111
      const auto left = LoadU(di, last);
150
111
      const auto central = Load(di, r + x);
151
111
      const auto ures = BitCast(du, Sub(central, left));
152
111
      const auto packed =
153
111
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
154
111
      const auto is_large = Gt(packed, kLargeThreshold);
155
111
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
156
111
      const auto not_literal = Ge(packed, kSplit);
157
111
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
158
111
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
159
111
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
160
111
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
161
111
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
162
111
                             And(ShiftRight<21>(v), kMsbMask));
163
111
      const auto tail_mask = Lt(pos, last_pos);
164
111
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
165
111
      const auto token_fixed = IfThenElse(not_literal, token, packed);
166
111
      extra_bits_lanes =
167
111
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
168
111
      Store(token_fixed, du, token_row + x);
169
111
      pos = Add(pos, kLanes);
170
111
      last = r + x + Lanes(di) - 1;
171
111
    }
172
867
    for (size_t x = 0; x < ch.w; x++) {
173
856
      histo[0].FastAdd(token_row[x]);
174
856
    }
175
515
    for (size_t y = 1; y < ch.h; y++) {
176
504
      r = ch.Row(y);
177
504
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
178
504
      last = primer;
179
504
      primer[0] = t[0];
180
504
      StoreU(Load(di, r), di, primer + 1);
181
504
      top_primer[0] = t[0];
182
504
      StoreU(Load(di, t), di, top_primer + 1);
183
504
      const pixel_type* JXL_RESTRICT top_last = top_primer;
184
504
      pos = kIota;
185
12.7k
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
186
12.2k
        const auto left = LoadU(di, last);
187
12.2k
        const auto central = Load(di, r + x);
188
12.2k
        const auto topleft = LoadU(di, top_last);
189
12.2k
        const auto top = Load(di, t + x);
190
12.2k
        const auto l_ge_t = Ge(left, top);
191
12.2k
        const auto m = IfThenElse(l_ge_t, top, left);
192
12.2k
        const auto M = IfThenElse(l_ge_t, left, top);
193
12.2k
        const auto maxx = Max(topleft, M);
194
12.2k
        const auto minn = Min(topleft, m);
195
12.2k
        const auto max_diff = BitCast(du, Sub(maxx, minn));
196
12.2k
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
197
12.2k
        const auto overshoot = Lt(topleft, m);
198
12.2k
        const auto undershoot = Gt(topleft, M);
199
12.2k
        const auto grad =
200
12.2k
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
201
12.2k
                            BitCast(du, topleft)));
202
12.2k
        const auto prediction =
203
12.2k
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
204
12.2k
        const auto ures = BitCast(du, Sub(central, prediction));
205
12.2k
        const auto packed =
206
12.2k
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
207
12.2k
        const auto is_large = Gt(packed, kLargeThreshold);
208
12.2k
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
209
12.2k
        const auto not_literal = Ge(packed, kSplit);
210
12.2k
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
211
12.2k
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
212
12.2k
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
213
12.2k
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
214
12.2k
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
215
12.2k
                               And(ShiftRight<21>(v), kMsbMask));
216
12.2k
        const auto tail_mask = Lt(pos, last_pos);
217
12.2k
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
218
12.2k
        const auto token_fixed = IfThenElse(not_literal, token, packed);
219
12.2k
        extra_bits_lanes =
220
12.2k
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
221
12.2k
        Store(token_fixed, du, token_row + x);
222
12.2k
        pos = Add(pos, kLanes);
223
12.2k
        last = r + x + Lanes(di) - 1;
224
12.2k
        top_last = t + x + Lanes(di) - 1;
225
12.2k
      }
226
96.0k
      for (size_t x = 0; x < ch.w; x++) {
227
95.5k
        size_t ctx = ctx_map[max_diff_row[x]];
228
95.5k
        histo[ctx].FastAdd(token_row[x]);
229
95.5k
      }
230
504
    }
231
187
    for (auto& h : histo) {
232
187
      h.Condition();
233
187
      float f_cost = h.ShannonEntropy();
234
187
      size_t i_cost = f_cost;
235
187
      histo_cost += i_cost;
236
187
      histo_cost_frac += f_cost - i_cost;
237
187
      h.Clear();
238
187
    }
239
11
  }
240
11
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
241
11
#endif
242
11
  size_t total_cost =
243
11
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
244
11
  return total_cost;
245
11
}
Unexecuted instantiation: jxl::N_SSE4::EstimateCost(jxl::Image const&)
jxl::N_AVX2::EstimateCost(jxl::Image const&)
Line
Count
Source
56
11
StatusOr<float> EstimateCost(const Image& img) {
57
11
  size_t histo_cost = 0;
58
11
  float histo_cost_frac = 0.0f;
59
11
  size_t extra_bits = 0;
60
61
#if HWY_TARGET == HWY_SCALAR
62
  HybridUintConfig config;
63
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
64
                        47, 63, 95, 127, 191, 255, 392, 500};
65
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
66
  Histogram histo[nc] = {};
67
  for (const Channel& ch : img.channel) {
68
    const intptr_t onerow = ch.plane.PixelsPerRow();
69
    for (size_t y = 0; y < ch.h; y++) {
70
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
71
      for (size_t x = 0; x < ch.w; x++) {
72
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
73
        pixel_type_w top = (y ? *(r + x - onerow) : left);
74
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
75
        size_t max_diff =
76
            std::max({left, top, topleft}) - std::min({left, top, topleft});
77
        size_t ctx = 0;
78
        for (uint32_t c : cutoffs) {
79
          ctx += (max_diff < c) ? 1 : 0;
80
        }
81
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
82
        uint32_t token;
83
        uint32_t nbits;
84
        uint32_t bits;
85
        config.Encode(PackSigned(res), &token, &nbits, &bits);
86
        histo[ctx].Add(token);
87
        extra_bits += nbits;
88
      }
89
    }
90
    for (auto& h : histo) {
91
      float f_cost = h.ShannonEntropy();
92
      size_t i_cost = f_cost;
93
      histo_cost += i_cost;
94
      histo_cost_frac += f_cost - i_cost;
95
      h.Clear();
96
    }
97
  }
98
#else
99
11
  JxlMemoryManager* memory_manager = img.memory_manager();
100
11
  const auto& ctx_map = estimate_cost_detail::ContextMap();
101
11
  const HWY_FULL(int32_t) di;
102
11
  const HWY_FULL(uint32_t) du;
103
11
  const HWY_FULL(float) df;
104
11
  const auto kOne = Set(du, 1);
105
11
  const auto kSplit = Set(du, 16);
106
11
  const auto kExpOffset2 = Set(du, 129);  // 127 + 2
107
11
  const auto kTokenBias = Set(du, 8);
108
11
  const auto kTokenMul = Set(du, 4);
109
11
  const auto kMsbMask = Set(du, 3);
110
11
  const auto kMaxDiffCap = Set(du, estimate_cost_detail::kLastThreshold - 1);
111
11
  const auto kLanes = Set(du, Lanes(du));
112
11
  const auto kIota = Iota(du, 0);
113
11
  const auto kLargeThreshold = Set(du, (1 << 22) - 1);
114
11
  constexpr size_t kLargeShiftVal = 10;
115
11
  const auto kLargeShift = Set(du, kLargeShiftVal);
116
117
11
  size_t max_w = 0;
118
11
  for (const Channel& ch : img.channel) {
119
11
    if (ch.h == 0) continue;
120
11
    max_w = std::max(max_w, ch.w);
121
11
  }
122
11
  max_w = RoundUpTo(max_w, Lanes(du));
123
11
  max_w = std::max(max_w, 2 * Lanes(du));
124
125
11
  JXL_ASSIGN_OR_RETURN(
126
11
      AlignedMemory buffer,
127
11
      AlignedMemory::Create(memory_manager, max_w * 2 * sizeof(uint32_t)));
128
11
  uint32_t* max_diff_row = buffer.address<uint32_t>();
129
11
  uint32_t* token_row = max_diff_row + max_w;
130
11
  int32_t* primer = buffer.address<int32_t>();
131
11
  int32_t* top_primer = primer + max_w;
132
133
11
  HybridUintConfig config;
134
135
11
  Histogram histo[estimate_cost_detail::kLastCtx + 1] = {};
136
11
  auto extra_bits_lanes = Zero(du);
137
11
  for (const Channel& ch : img.channel) {
138
11
    if (ch.h == 0 || ch.w == 0) continue;
139
187
    for (auto& h : histo) {
140
187
      h.EnsureCapacity(32 * 4);
141
187
    }
142
11
    const pixel_type* JXL_RESTRICT r = ch.Row(0);
143
11
    const pixel_type* JXL_RESTRICT last = primer;
144
11
    primer[0] = 0;
145
11
    StoreU(Load(di, r), di, primer + 1);
146
11
    auto pos = kIota;
147
11
    const auto last_pos = Set(du, ch.w);
148
122
    for (size_t x = 0; x < ch.w; x += Lanes(di)) {
149
111
      const auto left = LoadU(di, last);
150
111
      const auto central = Load(di, r + x);
151
111
      const auto ures = BitCast(du, Sub(central, left));
152
111
      const auto packed =
153
111
          Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
154
111
      const auto is_large = Gt(packed, kLargeThreshold);
155
111
      const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
156
111
      const auto not_literal = Ge(packed, kSplit);
157
111
      const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
158
111
      const auto v = BitCast(du, ConvertTo(df, packed_fixed));
159
111
      const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
160
111
      const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
161
111
      const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
162
111
                             And(ShiftRight<21>(v), kMsbMask));
163
111
      const auto tail_mask = Lt(pos, last_pos);
164
111
      const auto eb_fixed = IfThenElseZero(not_literal, eb);
165
111
      const auto token_fixed = IfThenElse(not_literal, token, packed);
166
111
      extra_bits_lanes =
167
111
          Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
168
111
      Store(token_fixed, du, token_row + x);
169
111
      pos = Add(pos, kLanes);
170
111
      last = r + x + Lanes(di) - 1;
171
111
    }
172
867
    for (size_t x = 0; x < ch.w; x++) {
173
856
      histo[0].FastAdd(token_row[x]);
174
856
    }
175
515
    for (size_t y = 1; y < ch.h; y++) {
176
504
      r = ch.Row(y);
177
504
      const pixel_type* JXL_RESTRICT t = ch.Row(y - 1);
178
504
      last = primer;
179
504
      primer[0] = t[0];
180
504
      StoreU(Load(di, r), di, primer + 1);
181
504
      top_primer[0] = t[0];
182
504
      StoreU(Load(di, t), di, top_primer + 1);
183
504
      const pixel_type* JXL_RESTRICT top_last = top_primer;
184
504
      pos = kIota;
185
12.7k
      for (size_t x = 0; x < ch.w; x += Lanes(di)) {
186
12.2k
        const auto left = LoadU(di, last);
187
12.2k
        const auto central = Load(di, r + x);
188
12.2k
        const auto topleft = LoadU(di, top_last);
189
12.2k
        const auto top = Load(di, t + x);
190
12.2k
        const auto l_ge_t = Ge(left, top);
191
12.2k
        const auto m = IfThenElse(l_ge_t, top, left);
192
12.2k
        const auto M = IfThenElse(l_ge_t, left, top);
193
12.2k
        const auto maxx = Max(topleft, M);
194
12.2k
        const auto minn = Min(topleft, m);
195
12.2k
        const auto max_diff = BitCast(du, Sub(maxx, minn));
196
12.2k
        Store(Min(max_diff, kMaxDiffCap), du, max_diff_row + x);
197
12.2k
        const auto overshoot = Lt(topleft, m);
198
12.2k
        const auto undershoot = Gt(topleft, M);
199
12.2k
        const auto grad =
200
12.2k
            BitCast(di, Sub(Add(BitCast(du, top), BitCast(du, left)),
201
12.2k
                            BitCast(du, topleft)));
202
12.2k
        const auto prediction =
203
12.2k
            IfThenElse(undershoot, m, IfThenElse(overshoot, M, grad));
204
12.2k
        const auto ures = BitCast(du, Sub(central, prediction));
205
12.2k
        const auto packed =
206
12.2k
            Xor(ShiftLeft<1>(ures), Sub(ShiftRight<31>(Not(ures)), kOne));
207
12.2k
        const auto is_large = Gt(packed, kLargeThreshold);
208
12.2k
        const auto packed_shifted = ShiftRight<kLargeShiftVal>(packed);
209
12.2k
        const auto not_literal = Ge(packed, kSplit);
210
12.2k
        const auto packed_fixed = IfThenElse(is_large, packed_shifted, packed);
211
12.2k
        const auto v = BitCast(du, ConvertTo(df, packed_fixed));
212
12.2k
        const auto eb_raw = Sub(ShiftRight<23>(v), kExpOffset2);
213
12.2k
        const auto eb = IfThenElse(is_large, Add(eb_raw, kLargeShift), eb_raw);
214
12.2k
        const auto token = Add(Add(kTokenBias, Mul(eb, kTokenMul)),
215
12.2k
                               And(ShiftRight<21>(v), kMsbMask));
216
12.2k
        const auto tail_mask = Lt(pos, last_pos);
217
12.2k
        const auto eb_fixed = IfThenElseZero(not_literal, eb);
218
12.2k
        const auto token_fixed = IfThenElse(not_literal, token, packed);
219
12.2k
        extra_bits_lanes =
220
12.2k
            Add(extra_bits_lanes, IfThenElseZero(tail_mask, eb_fixed));
221
12.2k
        Store(token_fixed, du, token_row + x);
222
12.2k
        pos = Add(pos, kLanes);
223
12.2k
        last = r + x + Lanes(di) - 1;
224
12.2k
        top_last = t + x + Lanes(di) - 1;
225
12.2k
      }
226
96.0k
      for (size_t x = 0; x < ch.w; x++) {
227
95.5k
        size_t ctx = ctx_map[max_diff_row[x]];
228
95.5k
        histo[ctx].FastAdd(token_row[x]);
229
95.5k
      }
230
504
    }
231
187
    for (auto& h : histo) {
232
187
      h.Condition();
233
187
      float f_cost = h.ShannonEntropy();
234
187
      size_t i_cost = f_cost;
235
187
      histo_cost += i_cost;
236
187
      histo_cost_frac += f_cost - i_cost;
237
187
      h.Clear();
238
187
    }
239
11
  }
240
11
  extra_bits = GetLane(SumOfLanes(du, extra_bits_lanes));
241
11
#endif
242
11
  size_t total_cost =
243
11
      extra_bits + histo_cost + static_cast<size_t>(histo_cost_frac);
244
11
  return total_cost;
245
11
}
Unexecuted instantiation: jxl::N_SSE2::EstimateCost(jxl::Image const&)
246
247
// NOLINTNEXTLINE(google-readability-namespace-comments)
248
}  // namespace HWY_NAMESPACE
249
}  // namespace jxl
250
HWY_AFTER_NAMESPACE();
251
252
#if HWY_ONCE
253
namespace jxl {
254
255
HWY_EXPORT(EstimateCost);
256
257
11
StatusOr<float> EstimateCost(const Image& img) {
258
11
  return HWY_DYNAMIC_DISPATCH(EstimateCost)(img);
259
11
}
260
261
namespace estimate_cost_detail {
262
/*
263
cutoffs = [0, 1, 3, 5, 7, 11, 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500]
264
ctx_map = [[c for c,v in enumerate(cutoffs) if v <= i][0] for i in range(501)]
265
*/
266
11
const std::array<uint8_t, kLastThreshold>& ContextMap() {
267
11
  static const std::array<uint8_t, kLastThreshold> kCtxMap = {
268
11
      0,  1,  1,  2,  2,  3,  3,  4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,
269
11
      6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
270
11
      8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,
271
11
      9,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10,
272
11
      10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
273
11
      10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
274
11
      11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
275
11
      11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
276
11
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
277
11
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
278
11
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,
279
11
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
280
11
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
281
11
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
282
11
      13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
283
11
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
284
11
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
285
11
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
286
11
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
287
11
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
288
11
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
289
11
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15,
290
11
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
291
11
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
292
11
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
293
11
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
294
11
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
295
11
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16};
296
11
  return kCtxMap;
297
11
}
298
}  // namespace estimate_cost_detail
299
300
}  // namespace jxl
301
#endif