Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_cluster.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_cluster.h"
7
8
#include <algorithm>
9
#include <cstddef>
10
#include <cstdint>
11
#include <limits>
12
#include <map>
13
#include <numeric>
14
#include <queue>
15
#include <tuple>
16
#include <vector>
17
18
#include "lib/jxl/base/status.h"
19
#include "lib/jxl/enc_ans_params.h"
20
21
#undef HWY_TARGET_INCLUDE
22
#define HWY_TARGET_INCLUDE "lib/jxl/enc_cluster.cc"
23
#include <hwy/foreach_target.h>
24
#include <hwy/highway.h>
25
26
#include "lib/jxl/base/fast_math-inl.h"
27
HWY_BEFORE_NAMESPACE();
28
namespace jxl {
29
namespace HWY_NAMESPACE {
30
31
// These templates are not found via ADL.
32
using hwy::HWY_NAMESPACE::AllTrue;
33
using hwy::HWY_NAMESPACE::Eq;
34
using hwy::HWY_NAMESPACE::GetLane;
35
using hwy::HWY_NAMESPACE::IfThenZeroElse;
36
using hwy::HWY_NAMESPACE::SumOfLanes;
37
using hwy::HWY_NAMESPACE::Zero;
38
39
template <class V>
40
668M
V Entropy(V count, V inv_total, V total) {
41
668M
  const HWY_CAPPED(float, Histogram::kRounding) d;
42
668M
  const auto zero = Set(d, 0.0f);
43
  // TODO(eustas): why (0 - x) instead of Neg(x)?
44
668M
  return IfThenZeroElse(
45
668M
      Eq(count, total),
46
668M
      Sub(zero, Mul(count, FastLog2f(d, Mul(inv_total, count)))));
47
668M
}
Unexecuted instantiation: hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::Entropy<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>)
hwy::N_AVX2::Vec256<float> jxl::N_AVX2::Entropy<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>)
Line
Count
Source
40
668M
V Entropy(V count, V inv_total, V total) {
41
668M
  const HWY_CAPPED(float, Histogram::kRounding) d;
42
668M
  const auto zero = Set(d, 0.0f);
43
  // TODO(eustas): why (0 - x) instead of Neg(x)?
44
668M
  return IfThenZeroElse(
45
668M
      Eq(count, total),
46
668M
      Sub(zero, Mul(count, FastLog2f(d, Mul(inv_total, count)))));
47
668M
}
Unexecuted instantiation: hwy::N_AVX3::Vec256<float> jxl::N_AVX3::Entropy<hwy::N_AVX3::Vec256<float> >(hwy::N_AVX3::Vec256<float>, hwy::N_AVX3::Vec256<float>, hwy::N_AVX3::Vec256<float>)
Unexecuted instantiation: hwy::N_AVX3_ZEN4::Vec256<float> jxl::N_AVX3_ZEN4::Entropy<hwy::N_AVX3_ZEN4::Vec256<float> >(hwy::N_AVX3_ZEN4::Vec256<float>, hwy::N_AVX3_ZEN4::Vec256<float>, hwy::N_AVX3_ZEN4::Vec256<float>)
Unexecuted instantiation: hwy::N_AVX3_SPR::Vec256<float> jxl::N_AVX3_SPR::Entropy<hwy::N_AVX3_SPR::Vec256<float> >(hwy::N_AVX3_SPR::Vec256<float>, hwy::N_AVX3_SPR::Vec256<float>, hwy::N_AVX3_SPR::Vec256<float>)
Unexecuted instantiation: hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::Entropy<hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>)
48
49
831k
void HistogramCondition(Histogram& a) {
50
831k
  const HWY_CAPPED(int32_t, Histogram::kRounding) di;
51
831k
  const auto kZero = Zero(di);
52
831k
  auto total = kZero;
53
831k
  int nz_pos = -static_cast<int>(Lanes(di));
54
4.63M
  for (size_t i = 0; i < a.counts.size(); i += Lanes(di)) {
55
3.79M
    const auto counts = LoadU(di, &a.counts[i]);
56
3.79M
    const bool nz = !AllTrue(di, Eq(counts, kZero));
57
3.79M
    total = Add(total, counts);
58
3.79M
    if (nz) nz_pos = i;
59
3.79M
  }
60
831k
  a.counts.resize(nz_pos + Lanes(di));
61
831k
  a.total_count = GetLane(SumOfLanes(di, total));
62
831k
}
Unexecuted instantiation: jxl::N_SSE4::HistogramCondition(jxl::Histogram&)
jxl::N_AVX2::HistogramCondition(jxl::Histogram&)
Line
Count
Source
49
831k
void HistogramCondition(Histogram& a) {
50
831k
  const HWY_CAPPED(int32_t, Histogram::kRounding) di;
51
831k
  const auto kZero = Zero(di);
52
831k
  auto total = kZero;
53
831k
  int nz_pos = -static_cast<int>(Lanes(di));
54
4.63M
  for (size_t i = 0; i < a.counts.size(); i += Lanes(di)) {
55
3.79M
    const auto counts = LoadU(di, &a.counts[i]);
56
3.79M
    const bool nz = !AllTrue(di, Eq(counts, kZero));
57
3.79M
    total = Add(total, counts);
58
3.79M
    if (nz) nz_pos = i;
59
3.79M
  }
60
831k
  a.counts.resize(nz_pos + Lanes(di));
61
831k
  a.total_count = GetLane(SumOfLanes(di, total));
62
831k
}
Unexecuted instantiation: jxl::N_AVX3::HistogramCondition(jxl::Histogram&)
Unexecuted instantiation: jxl::N_AVX3_ZEN4::HistogramCondition(jxl::Histogram&)
Unexecuted instantiation: jxl::N_AVX3_SPR::HistogramCondition(jxl::Histogram&)
Unexecuted instantiation: jxl::N_SSE2::HistogramCondition(jxl::Histogram&)
63
64
31.8M
void HistogramEntropy(const Histogram& a) {
65
31.8M
  a.entropy = 0.0f;
66
31.8M
  if (a.total_count == 0) return;
67
68
12.9M
  const HWY_CAPPED(float, Histogram::kRounding) df;
69
12.9M
  const HWY_CAPPED(int32_t, Histogram::kRounding) di;
70
71
12.9M
  const auto inv_tot = Set(df, 1.0f / a.total_count);
72
12.9M
  auto entropy_lanes = Zero(df);
73
12.9M
  auto total = Set(df, a.total_count);
74
75
37.5M
  for (size_t i = 0; i < a.counts.size(); i += Lanes(di)) {
76
24.6M
    const auto counts = LoadU(di, &a.counts[i]);
77
24.6M
    entropy_lanes =
78
24.6M
        Add(entropy_lanes, Entropy(ConvertTo(df, counts), inv_tot, total));
79
24.6M
  }
80
12.9M
  a.entropy += GetLane(SumOfLanes(df, entropy_lanes));
81
12.9M
}
Unexecuted instantiation: jxl::N_SSE4::HistogramEntropy(jxl::Histogram const&)
jxl::N_AVX2::HistogramEntropy(jxl::Histogram const&)
Line
Count
Source
64
31.8M
void HistogramEntropy(const Histogram& a) {
65
31.8M
  a.entropy = 0.0f;
66
31.8M
  if (a.total_count == 0) return;
67
68
12.9M
  const HWY_CAPPED(float, Histogram::kRounding) df;
69
12.9M
  const HWY_CAPPED(int32_t, Histogram::kRounding) di;
70
71
12.9M
  const auto inv_tot = Set(df, 1.0f / a.total_count);
72
12.9M
  auto entropy_lanes = Zero(df);
73
12.9M
  auto total = Set(df, a.total_count);
74
75
37.5M
  for (size_t i = 0; i < a.counts.size(); i += Lanes(di)) {
76
24.6M
    const auto counts = LoadU(di, &a.counts[i]);
77
24.6M
    entropy_lanes =
78
24.6M
        Add(entropy_lanes, Entropy(ConvertTo(df, counts), inv_tot, total));
79
24.6M
  }
80
12.9M
  a.entropy += GetLane(SumOfLanes(df, entropy_lanes));
81
12.9M
}
Unexecuted instantiation: jxl::N_AVX3::HistogramEntropy(jxl::Histogram const&)
Unexecuted instantiation: jxl::N_AVX3_ZEN4::HistogramEntropy(jxl::Histogram const&)
Unexecuted instantiation: jxl::N_AVX3_SPR::HistogramEntropy(jxl::Histogram const&)
Unexecuted instantiation: jxl::N_SSE2::HistogramEntropy(jxl::Histogram const&)
82
83
222M
float HistogramDistance(const Histogram& a, const Histogram& b) {
84
222M
  if (a.total_count == 0 || b.total_count == 0) return 0;
85
86
222M
  const HWY_CAPPED(float, Histogram::kRounding) df;
87
222M
  const HWY_CAPPED(int32_t, Histogram::kRounding) di;
88
89
222M
  const auto inv_tot = Set(df, 1.0f / (a.total_count + b.total_count));
90
222M
  auto distance_lanes = Zero(df);
91
222M
  auto total = Set(df, a.total_count + b.total_count);
92
93
866M
  for (size_t i = 0; i < std::max(a.counts.size(), b.counts.size());
94
643M
       i += Lanes(di)) {
95
643M
    const auto a_counts =
96
643M
        a.counts.size() > i ? LoadU(di, &a.counts[i]) : Zero(di);
97
643M
    const auto b_counts =
98
643M
        b.counts.size() > i ? LoadU(di, &b.counts[i]) : Zero(di);
99
643M
    const auto counts = ConvertTo(df, Add(a_counts, b_counts));
100
643M
    distance_lanes = Add(distance_lanes, Entropy(counts, inv_tot, total));
101
643M
  }
102
222M
  const float total_distance = GetLane(SumOfLanes(df, distance_lanes));
103
222M
  return total_distance - a.entropy - b.entropy;
104
222M
}
Unexecuted instantiation: jxl::N_SSE4::HistogramDistance(jxl::Histogram const&, jxl::Histogram const&)
jxl::N_AVX2::HistogramDistance(jxl::Histogram const&, jxl::Histogram const&)
Line
Count
Source
83
222M
float HistogramDistance(const Histogram& a, const Histogram& b) {
84
222M
  if (a.total_count == 0 || b.total_count == 0) return 0;
85
86
222M
  const HWY_CAPPED(float, Histogram::kRounding) df;
87
222M
  const HWY_CAPPED(int32_t, Histogram::kRounding) di;
88
89
222M
  const auto inv_tot = Set(df, 1.0f / (a.total_count + b.total_count));
90
222M
  auto distance_lanes = Zero(df);
91
222M
  auto total = Set(df, a.total_count + b.total_count);
92
93
866M
  for (size_t i = 0; i < std::max(a.counts.size(), b.counts.size());
94
643M
       i += Lanes(di)) {
95
643M
    const auto a_counts =
96
643M
        a.counts.size() > i ? LoadU(di, &a.counts[i]) : Zero(di);
97
643M
    const auto b_counts =
98
643M
        b.counts.size() > i ? LoadU(di, &b.counts[i]) : Zero(di);
99
643M
    const auto counts = ConvertTo(df, Add(a_counts, b_counts));
100
643M
    distance_lanes = Add(distance_lanes, Entropy(counts, inv_tot, total));
101
643M
  }
102
222M
  const float total_distance = GetLane(SumOfLanes(df, distance_lanes));
103
222M
  return total_distance - a.entropy - b.entropy;
104
222M
}
Unexecuted instantiation: jxl::N_AVX3::HistogramDistance(jxl::Histogram const&, jxl::Histogram const&)
Unexecuted instantiation: jxl::N_AVX3_ZEN4::HistogramDistance(jxl::Histogram const&, jxl::Histogram const&)
Unexecuted instantiation: jxl::N_AVX3_SPR::HistogramDistance(jxl::Histogram const&, jxl::Histogram const&)
Unexecuted instantiation: jxl::N_SSE2::HistogramDistance(jxl::Histogram const&, jxl::Histogram const&)
105
106
constexpr const float kInfinity = std::numeric_limits<float>::infinity();
107
108
10.8k
float HistogramKLDivergence(const Histogram& actual, const Histogram& coding) {
109
10.8k
  if (actual.total_count == 0) return 0;
110
10.8k
  if (coding.total_count == 0) return kInfinity;
111
112
10.8k
  const HWY_CAPPED(float, Histogram::kRounding) df;
113
10.8k
  const HWY_CAPPED(int32_t, Histogram::kRounding) di;
114
115
10.8k
  const auto coding_inv = Set(df, 1.0f / coding.total_count);
116
10.8k
  auto cost_lanes = Zero(df);
117
118
27.7k
  for (size_t i = 0; i < actual.counts.size(); i += Lanes(di)) {
119
16.8k
    const auto counts = LoadU(di, &actual.counts[i]);
120
16.8k
    const auto coding_counts =
121
16.8k
        coding.counts.size() > i ? LoadU(di, &coding.counts[i]) : Zero(di);
122
16.8k
    const auto coding_probs = Mul(ConvertTo(df, coding_counts), coding_inv);
123
16.8k
    const auto neg_coding_cost = BitCast(
124
16.8k
        df,
125
16.8k
        IfThenZeroElse(Eq(counts, Zero(di)),
126
16.8k
                       IfThenElse(Eq(coding_counts, Zero(di)),
127
16.8k
                                  BitCast(di, Set(df, -kInfinity)),
128
16.8k
                                  BitCast(di, FastLog2f(df, coding_probs)))));
129
16.8k
    cost_lanes = NegMulAdd(ConvertTo(df, counts), neg_coding_cost, cost_lanes);
130
16.8k
  }
131
10.8k
  const float total_cost = GetLane(SumOfLanes(df, cost_lanes));
132
10.8k
  return total_cost - actual.entropy;
133
10.8k
}
Unexecuted instantiation: jxl::N_SSE4::HistogramKLDivergence(jxl::Histogram const&, jxl::Histogram const&)
jxl::N_AVX2::HistogramKLDivergence(jxl::Histogram const&, jxl::Histogram const&)
Line
Count
Source
108
10.8k
float HistogramKLDivergence(const Histogram& actual, const Histogram& coding) {
109
10.8k
  if (actual.total_count == 0) return 0;
110
10.8k
  if (coding.total_count == 0) return kInfinity;
111
112
10.8k
  const HWY_CAPPED(float, Histogram::kRounding) df;
113
10.8k
  const HWY_CAPPED(int32_t, Histogram::kRounding) di;
114
115
10.8k
  const auto coding_inv = Set(df, 1.0f / coding.total_count);
116
10.8k
  auto cost_lanes = Zero(df);
117
118
27.7k
  for (size_t i = 0; i < actual.counts.size(); i += Lanes(di)) {
119
16.8k
    const auto counts = LoadU(di, &actual.counts[i]);
120
16.8k
    const auto coding_counts =
121
16.8k
        coding.counts.size() > i ? LoadU(di, &coding.counts[i]) : Zero(di);
122
16.8k
    const auto coding_probs = Mul(ConvertTo(df, coding_counts), coding_inv);
123
16.8k
    const auto neg_coding_cost = BitCast(
124
16.8k
        df,
125
16.8k
        IfThenZeroElse(Eq(counts, Zero(di)),
126
16.8k
                       IfThenElse(Eq(coding_counts, Zero(di)),
127
16.8k
                                  BitCast(di, Set(df, -kInfinity)),
128
16.8k
                                  BitCast(di, FastLog2f(df, coding_probs)))));
129
16.8k
    cost_lanes = NegMulAdd(ConvertTo(df, counts), neg_coding_cost, cost_lanes);
130
16.8k
  }
131
10.8k
  const float total_cost = GetLane(SumOfLanes(df, cost_lanes));
132
10.8k
  return total_cost - actual.entropy;
133
10.8k
}
Unexecuted instantiation: jxl::N_AVX3::HistogramKLDivergence(jxl::Histogram const&, jxl::Histogram const&)
Unexecuted instantiation: jxl::N_AVX3_ZEN4::HistogramKLDivergence(jxl::Histogram const&, jxl::Histogram const&)
Unexecuted instantiation: jxl::N_AVX3_SPR::HistogramKLDivergence(jxl::Histogram const&, jxl::Histogram const&)
Unexecuted instantiation: jxl::N_SSE2::HistogramKLDivergence(jxl::Histogram const&, jxl::Histogram const&)
134
135
// First step of a k-means clustering with a fancy distance metric.
136
Status FastClusterHistograms(const std::vector<Histogram>& in,
137
                             size_t max_histograms, std::vector<Histogram>* out,
138
28.1k
                             std::vector<uint32_t>* histogram_symbols) {
139
28.1k
  const size_t prev_histograms = out->size();
140
28.1k
  out->reserve(max_histograms);
141
28.1k
  histogram_symbols->clear();
142
28.1k
  histogram_symbols->resize(in.size(), max_histograms);
143
144
28.1k
  std::vector<float> dists(in.size(), std::numeric_limits<float>::max());
145
28.1k
  size_t largest_idx = 0;
146
26.9M
  for (size_t i = 0; i < in.size(); i++) {
147
26.9M
    if (in[i].total_count == 0) {
148
22.5M
      (*histogram_symbols)[i] = 0;
149
22.5M
      dists[i] = 0.0f;
150
22.5M
      continue;
151
22.5M
    }
152
4.39M
    HistogramEntropy(in[i]);
153
4.39M
    if (in[i].total_count > in[largest_idx].total_count) {
154
53.3k
      largest_idx = i;
155
53.3k
    }
156
4.39M
  }
157
158
28.1k
  if (prev_histograms > 0) {
159
78
    for (size_t j = 0; j < prev_histograms; ++j) {
160
39
      HistogramEntropy((*out)[j]);
161
39
    }
162
87.6k
    for (size_t i = 0; i < in.size(); i++) {
163
87.6k
      if (dists[i] == 0.0f) continue;
164
11.0k
      for (size_t j = 0; j < prev_histograms; ++j) {
165
5.54k
        dists[i] = std::min(HistogramKLDivergence(in[i], (*out)[j]), dists[i]);
166
5.54k
      }
167
5.54k
    }
168
39
    auto max_dist = std::max_element(dists.begin(), dists.end());
169
39
    if (*max_dist > 0.0f) {
170
39
      largest_idx = max_dist - dists.begin();
171
39
    }
172
39
  }
173
174
28.1k
  constexpr float kMinDistanceForDistinct = 48.0f;
175
192k
  while (out->size() < max_histograms) {
176
192k
    (*histogram_symbols)[largest_idx] = out->size();
177
192k
    out->push_back(in[largest_idx]);
178
192k
    dists[largest_idx] = 0.0f;
179
192k
    largest_idx = 0;
180
393M
    for (size_t i = 0; i < in.size(); i++) {
181
393M
      if (dists[i] == 0.0f) continue;
182
113M
      dists[i] = std::min(HistogramDistance(in[i], out->back()), dists[i]);
183
113M
      if (dists[i] > dists[largest_idx]) largest_idx = i;
184
113M
    }
185
192k
    if (dists[largest_idx] < kMinDistanceForDistinct) break;
186
192k
  }
187
188
26.9M
  for (size_t i = 0; i < in.size(); i++) {
189
26.9M
    if ((*histogram_symbols)[i] != max_histograms) continue;
190
4.19M
    size_t best = 0;
191
4.19M
    float best_dist = std::numeric_limits<float>::max();
192
114M
    for (size_t j = 0; j < out->size(); j++) {
193
109M
      float dist = j < prev_histograms ? HistogramKLDivergence(in[i], (*out)[j])
194
109M
                                       : HistogramDistance(in[i], (*out)[j]);
195
109M
      if (dist < best_dist) {
196
11.4M
        best = j;
197
11.4M
        best_dist = dist;
198
11.4M
      }
199
109M
    }
200
4.19M
    JXL_ENSURE(best_dist < std::numeric_limits<float>::max());
201
4.19M
    if (best >= prev_histograms) {
202
4.19M
      (*out)[best].AddHistogram(in[i]);
203
4.19M
      HistogramEntropy((*out)[best]);
204
4.19M
    }
205
4.19M
    (*histogram_symbols)[i] = best;
206
4.19M
  }
207
28.1k
  return true;
208
28.1k
}
Unexecuted instantiation: jxl::N_SSE4::FastClusterHistograms(std::__1::vector<jxl::Histogram, std::__1::allocator<jxl::Histogram> > const&, unsigned long, std::__1::vector<jxl::Histogram, std::__1::allocator<jxl::Histogram> >*, std::__1::vector<unsigned int, std::__1::allocator<unsigned int> >*)
jxl::N_AVX2::FastClusterHistograms(std::__1::vector<jxl::Histogram, std::__1::allocator<jxl::Histogram> > const&, unsigned long, std::__1::vector<jxl::Histogram, std::__1::allocator<jxl::Histogram> >*, std::__1::vector<unsigned int, std::__1::allocator<unsigned int> >*)
Line
Count
Source
138
28.1k
                             std::vector<uint32_t>* histogram_symbols) {
139
28.1k
  const size_t prev_histograms = out->size();
140
28.1k
  out->reserve(max_histograms);
141
28.1k
  histogram_symbols->clear();
142
28.1k
  histogram_symbols->resize(in.size(), max_histograms);
143
144
28.1k
  std::vector<float> dists(in.size(), std::numeric_limits<float>::max());
145
28.1k
  size_t largest_idx = 0;
146
26.9M
  for (size_t i = 0; i < in.size(); i++) {
147
26.9M
    if (in[i].total_count == 0) {
148
22.5M
      (*histogram_symbols)[i] = 0;
149
22.5M
      dists[i] = 0.0f;
150
22.5M
      continue;
151
22.5M
    }
152
4.39M
    HistogramEntropy(in[i]);
153
4.39M
    if (in[i].total_count > in[largest_idx].total_count) {
154
53.3k
      largest_idx = i;
155
53.3k
    }
156
4.39M
  }
157
158
28.1k
  if (prev_histograms > 0) {
159
78
    for (size_t j = 0; j < prev_histograms; ++j) {
160
39
      HistogramEntropy((*out)[j]);
161
39
    }
162
87.6k
    for (size_t i = 0; i < in.size(); i++) {
163
87.6k
      if (dists[i] == 0.0f) continue;
164
11.0k
      for (size_t j = 0; j < prev_histograms; ++j) {
165
5.54k
        dists[i] = std::min(HistogramKLDivergence(in[i], (*out)[j]), dists[i]);
166
5.54k
      }
167
5.54k
    }
168
39
    auto max_dist = std::max_element(dists.begin(), dists.end());
169
39
    if (*max_dist > 0.0f) {
170
39
      largest_idx = max_dist - dists.begin();
171
39
    }
172
39
  }
173
174
28.1k
  constexpr float kMinDistanceForDistinct = 48.0f;
175
192k
  while (out->size() < max_histograms) {
176
192k
    (*histogram_symbols)[largest_idx] = out->size();
177
192k
    out->push_back(in[largest_idx]);
178
192k
    dists[largest_idx] = 0.0f;
179
192k
    largest_idx = 0;
180
393M
    for (size_t i = 0; i < in.size(); i++) {
181
393M
      if (dists[i] == 0.0f) continue;
182
113M
      dists[i] = std::min(HistogramDistance(in[i], out->back()), dists[i]);
183
113M
      if (dists[i] > dists[largest_idx]) largest_idx = i;
184
113M
    }
185
192k
    if (dists[largest_idx] < kMinDistanceForDistinct) break;
186
192k
  }
187
188
26.9M
  for (size_t i = 0; i < in.size(); i++) {
189
26.9M
    if ((*histogram_symbols)[i] != max_histograms) continue;
190
4.19M
    size_t best = 0;
191
4.19M
    float best_dist = std::numeric_limits<float>::max();
192
114M
    for (size_t j = 0; j < out->size(); j++) {
193
109M
      float dist = j < prev_histograms ? HistogramKLDivergence(in[i], (*out)[j])
194
109M
                                       : HistogramDistance(in[i], (*out)[j]);
195
109M
      if (dist < best_dist) {
196
11.4M
        best = j;
197
11.4M
        best_dist = dist;
198
11.4M
      }
199
109M
    }
200
4.19M
    JXL_ENSURE(best_dist < std::numeric_limits<float>::max());
201
4.19M
    if (best >= prev_histograms) {
202
4.19M
      (*out)[best].AddHistogram(in[i]);
203
4.19M
      HistogramEntropy((*out)[best]);
204
4.19M
    }
205
4.19M
    (*histogram_symbols)[i] = best;
206
4.19M
  }
207
28.1k
  return true;
208
28.1k
}
Unexecuted instantiation: jxl::N_AVX3::FastClusterHistograms(std::__1::vector<jxl::Histogram, std::__1::allocator<jxl::Histogram> > const&, unsigned long, std::__1::vector<jxl::Histogram, std::__1::allocator<jxl::Histogram> >*, std::__1::vector<unsigned int, std::__1::allocator<unsigned int> >*)
Unexecuted instantiation: jxl::N_AVX3_ZEN4::FastClusterHistograms(std::__1::vector<jxl::Histogram, std::__1::allocator<jxl::Histogram> > const&, unsigned long, std::__1::vector<jxl::Histogram, std::__1::allocator<jxl::Histogram> >*, std::__1::vector<unsigned int, std::__1::allocator<unsigned int> >*)
Unexecuted instantiation: jxl::N_AVX3_SPR::FastClusterHistograms(std::__1::vector<jxl::Histogram, std::__1::allocator<jxl::Histogram> > const&, unsigned long, std::__1::vector<jxl::Histogram, std::__1::allocator<jxl::Histogram> >*, std::__1::vector<unsigned int, std::__1::allocator<unsigned int> >*)
Unexecuted instantiation: jxl::N_SSE2::FastClusterHistograms(std::__1::vector<jxl::Histogram, std::__1::allocator<jxl::Histogram> > const&, unsigned long, std::__1::vector<jxl::Histogram, std::__1::allocator<jxl::Histogram> >*, std::__1::vector<unsigned int, std::__1::allocator<unsigned int> >*)
209
210
// NOLINTNEXTLINE(google-readability-namespace-comments)
211
}  // namespace HWY_NAMESPACE
212
}  // namespace jxl
213
HWY_AFTER_NAMESPACE();
214
215
#if HWY_ONCE
216
namespace jxl {
217
HWY_EXPORT(FastClusterHistograms);  // Local function
218
HWY_EXPORT(HistogramEntropy);       // Local function
219
HWY_EXPORT(HistogramCondition);     // Local function
220
221
831k
void Histogram::Condition() { HWY_DYNAMIC_DISPATCH(HistogramCondition)(*this); }
222
223
23.2M
float Histogram::ShannonEntropy() const {
224
23.2M
  HWY_DYNAMIC_DISPATCH(HistogramEntropy)(*this);
225
23.2M
  return entropy;
226
23.2M
}
227
228
namespace {
229
// -----------------------------------------------------------------------------
230
// Histogram refinement
231
232
// Reorder histograms in *out so that the new symbols in *symbols come in
233
// increasing order.
234
void HistogramReindex(std::vector<Histogram>* out, size_t prev_histograms,
235
28.1k
                      std::vector<uint32_t>* symbols) {
236
28.1k
  std::vector<Histogram> tmp(*out);
237
28.1k
  std::map<int, int> new_index;
238
28.1k
  for (size_t i = 0; i < prev_histograms; ++i) {
239
39
    new_index[i] = i;
240
39
  }
241
28.1k
  int next_index = prev_histograms;
242
26.9M
  for (uint32_t symbol : *symbols) {
243
26.9M
    if (new_index.find(symbol) == new_index.end()) {
244
190k
      new_index[symbol] = next_index;
245
190k
      (*out)[next_index] = tmp[symbol];
246
190k
      ++next_index;
247
190k
    }
248
26.9M
  }
249
28.1k
  out->resize(next_index);
250
26.9M
  for (uint32_t& symbol : *symbols) {
251
26.9M
    symbol = new_index[symbol];
252
26.9M
  }
253
28.1k
}
254
255
}  // namespace
256
257
// Clusters similar histograms in 'in' together, the selected histograms are
258
// placed in 'out', and for each index in 'in', *histogram_symbols will
259
// indicate which of the 'out' histograms is the best approximation.
260
Status ClusterHistograms(const HistogramParams& params,
261
                         const std::vector<Histogram>& in,
262
                         size_t max_histograms, std::vector<Histogram>* out,
263
28.1k
                         std::vector<uint32_t>* histogram_symbols) {
264
28.1k
  size_t prev_histograms = out->size();
265
28.1k
  max_histograms = std::min(max_histograms, params.max_histograms);
266
28.1k
  max_histograms = std::min(max_histograms, in.size());
267
28.1k
  if (params.clustering == HistogramParams::ClusteringType::kFastest) {
268
0
    max_histograms = std::min(max_histograms, static_cast<size_t>(4));
269
0
  }
270
271
28.1k
  JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(FastClusterHistograms)(
272
28.1k
      in, prev_histograms + max_histograms, out, histogram_symbols));
273
274
28.1k
  if (prev_histograms == 0 &&
275
28.0k
      params.clustering == HistogramParams::ClusteringType::kBest) {
276
30.5k
    for (auto& histo : *out) {
277
30.5k
      JXL_ASSIGN_OR_RETURN(histo.entropy, histo.ANSPopulationCost());
278
30.5k
    }
279
10.6k
    uint32_t next_version = 2;
280
10.6k
    std::vector<uint32_t> version(out->size(), 1);
281
10.6k
    std::vector<uint32_t> renumbering(out->size());
282
10.6k
    std::iota(renumbering.begin(), renumbering.end(), 0);
283
284
    // Try to pair up clusters if doing so reduces the total cost.
285
286
10.6k
    struct HistogramPair {
287
      // validity of a pair: p.version == max(version[i], version[j])
288
10.6k
      float cost;
289
10.6k
      uint32_t first;
290
10.6k
      uint32_t second;
291
10.6k
      uint32_t version;
292
      // We use > because priority queues sort in *decreasing* order, but we
293
      // want lower cost elements to appear first.
294
10.6k
      bool operator<(const HistogramPair& other) const {
295
6.08k
        return std::make_tuple(cost, first, second, version) >
296
6.08k
               std::make_tuple(other.cost, other.first, other.second,
297
6.08k
                               other.version);
298
6.08k
      }
299
10.6k
    };
300
301
    // Create list of all pairs by increasing merging cost.
302
10.6k
    std::priority_queue<HistogramPair> pairs_to_merge;
303
41.1k
    for (uint32_t i = 0; i < out->size(); i++) {
304
80.0k
      for (uint32_t j = i + 1; j < out->size(); j++) {
305
49.5k
        Histogram histo;
306
49.5k
        histo.AddHistogram((*out)[i]);
307
49.5k
        histo.AddHistogram((*out)[j]);
308
49.5k
        JXL_ASSIGN_OR_RETURN(float cost, histo.ANSPopulationCost());
309
49.5k
        cost -= (*out)[i].entropy + (*out)[j].entropy;
310
        // Avoid enqueueing pairs that are not advantageous to merge.
311
49.5k
        if (cost >= 0) continue;
312
3.08k
        pairs_to_merge.push(
313
3.08k
            HistogramPair{cost, i, j, std::max(version[i], version[j])});
314
3.08k
      }
315
30.5k
    }
316
317
    // Merge the best pair to merge, add new pairs that get formed as a
318
    // consequence.
319
14.1k
    while (!pairs_to_merge.empty()) {
320
3.45k
      uint32_t first = pairs_to_merge.top().first;
321
3.45k
      uint32_t second = pairs_to_merge.top().second;
322
3.45k
      uint32_t ver = pairs_to_merge.top().version;
323
3.45k
      pairs_to_merge.pop();
324
3.45k
      if (ver != std::max(version[first], version[second]) ||
325
2.51k
          version[first] == 0 || version[second] == 0) {
326
1.70k
        continue;
327
1.70k
      }
328
1.75k
      (*out)[first].AddHistogram((*out)[second]);
329
1.75k
      JXL_ASSIGN_OR_RETURN((*out)[first].entropy,
330
1.75k
                           (*out)[first].ANSPopulationCost());
331
8.90k
      for (uint32_t& item : renumbering) {
332
8.90k
        if (item == second) {
333
1.83k
          item = first;
334
1.83k
        }
335
8.90k
      }
336
1.75k
      version[second] = 0;
337
1.75k
      version[first] = next_version++;
338
10.6k
      for (uint32_t j = 0; j < out->size(); j++) {
339
8.90k
        if (j == first) continue;
340
7.15k
        if (version[j] == 0) continue;
341
4.82k
        Histogram histo;
342
4.82k
        histo.AddHistogram((*out)[first]);
343
4.82k
        histo.AddHistogram((*out)[j]);
344
4.82k
        JXL_ASSIGN_OR_RETURN(float merge_cost, histo.ANSPopulationCost());
345
4.82k
        merge_cost -= (*out)[first].entropy + (*out)[j].entropy;
346
        // Avoid enqueueing pairs that are not advantageous to merge.
347
4.82k
        if (merge_cost >= 0) continue;
348
374
        pairs_to_merge.push(
349
374
            HistogramPair{merge_cost, std::min(first, j), std::max(first, j),
350
374
                          std::max(version[first], version[j])});
351
374
      }
352
1.75k
    }
353
10.6k
    std::vector<uint32_t> reverse_renumbering(out->size(), -1);
354
10.6k
    size_t num_alive = 0;
355
41.1k
    for (size_t i = 0; i < out->size(); i++) {
356
30.5k
      if (version[i] == 0) continue;
357
28.7k
      (*out)[num_alive++] = (*out)[i];
358
28.7k
      reverse_renumbering[i] = num_alive - 1;
359
28.7k
    }
360
10.6k
    out->resize(num_alive);
361
60.7k
    for (uint32_t& item : *histogram_symbols) {
362
60.7k
      item = reverse_renumbering[renumbering[item]];
363
60.7k
    }
364
10.6k
  }
365
366
  // Convert the context map to a canonical form.
367
28.1k
  HistogramReindex(out, prev_histograms, histogram_symbols);
368
28.1k
  return true;
369
28.1k
}
370
371
}  // namespace jxl
372
#endif  // HWY_ONCE