/src/libjxl/lib/jxl/quant_weights.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | #include "lib/jxl/quant_weights.h" |
6 | | |
7 | | #include <jxl/memory_manager.h> |
8 | | |
9 | | #include <cmath> |
10 | | #include <cstdint> |
11 | | #include <cstdio> |
12 | | #include <cstdlib> |
13 | | #include <vector> |
14 | | |
15 | | #include "lib/jxl/ac_strategy.h" |
16 | | #include "lib/jxl/base/compiler_specific.h" |
17 | | #include "lib/jxl/base/status.h" |
18 | | #include "lib/jxl/coeff_order_fwd.h" |
19 | | #include "lib/jxl/dct_scales.h" |
20 | | #include "lib/jxl/dec_bit_reader.h" |
21 | | #include "lib/jxl/dec_modular.h" |
22 | | #include "lib/jxl/fields.h" |
23 | | #include "lib/jxl/frame_dimensions.h" |
24 | | #include "lib/jxl/memory_manager_internal.h" |
25 | | |
26 | | #undef HWY_TARGET_INCLUDE |
27 | | #define HWY_TARGET_INCLUDE "lib/jxl/quant_weights.cc" |
28 | | #include <hwy/foreach_target.h> |
29 | | #include <hwy/highway.h> |
30 | | |
31 | | #include "lib/jxl/base/fast_math-inl.h" |
32 | | |
33 | | HWY_BEFORE_NAMESPACE(); |
34 | | namespace jxl { |
35 | | namespace HWY_NAMESPACE { |
36 | | |
37 | | // These templates are not found via ADL. |
38 | | using hwy::HWY_NAMESPACE::Lt; |
39 | | using hwy::HWY_NAMESPACE::MulAdd; |
40 | | using hwy::HWY_NAMESPACE::Sqrt; |
41 | | |
42 | | // kQuantWeights[N * N * c + N * y + x] is the relative weight of the (x, y) |
43 | | // coefficient in component c. Higher weights correspond to finer quantization |
44 | | // intervals and more bits spent in encoding. |
45 | | |
46 | | static constexpr const float kAlmostZero = 1e-8f; |
47 | | |
48 | | void GetQuantWeightsDCT2(const QuantEncoding::DCT2Weights& dct2weights, |
49 | 219 | float* weights) { |
50 | 876 | for (size_t c = 0; c < 3; c++) { |
51 | 657 | size_t start = c * 64; |
52 | 657 | weights[start] = 0xBAD; |
53 | 657 | weights[start + 1] = weights[start + 8] = dct2weights[c][0]; |
54 | 657 | weights[start + 9] = dct2weights[c][1]; |
55 | 1.97k | for (size_t y = 0; y < 2; y++) { |
56 | 3.94k | for (size_t x = 0; x < 2; x++) { |
57 | 2.62k | weights[start + y * 8 + x + 2] = dct2weights[c][2]; |
58 | 2.62k | weights[start + (y + 2) * 8 + x] = dct2weights[c][2]; |
59 | 2.62k | } |
60 | 1.31k | } |
61 | 1.97k | for (size_t y = 0; y < 2; y++) { |
62 | 3.94k | for (size_t x = 0; x < 2; x++) { |
63 | 2.62k | weights[start + (y + 2) * 8 + x + 2] = dct2weights[c][3]; |
64 | 2.62k | } |
65 | 1.31k | } |
66 | 3.28k | for (size_t y = 0; y < 4; y++) { |
67 | 13.1k | for (size_t x = 0; x < 4; x++) { |
68 | 10.5k | weights[start + y * 8 + x + 4] = dct2weights[c][4]; |
69 | 10.5k | weights[start + (y + 4) * 8 + x] = dct2weights[c][4]; |
70 | 10.5k | } |
71 | 2.62k | } |
72 | 3.28k | for (size_t y = 0; y < 4; y++) { |
73 | 13.1k | for (size_t x = 0; x < 4; x++) { |
74 | 10.5k | weights[start + (y + 4) * 8 + x + 4] = dct2weights[c][5]; |
75 | 10.5k | } |
76 | 2.62k | } |
77 | 657 | } |
78 | 219 | } Unexecuted instantiation: jxl::N_SSE4::GetQuantWeightsDCT2(std::__1::array<std::__1::array<float, 6ul>, 3ul> const&, float*) jxl::N_AVX2::GetQuantWeightsDCT2(std::__1::array<std::__1::array<float, 6ul>, 3ul> const&, float*) Line | Count | Source | 49 | 219 | float* weights) { | 50 | 876 | for (size_t c = 0; c < 3; c++) { | 51 | 657 | size_t start = c * 64; | 52 | 657 | weights[start] = 0xBAD; | 53 | 657 | weights[start + 1] = weights[start + 8] = dct2weights[c][0]; | 54 | 657 | weights[start + 9] = dct2weights[c][1]; | 55 | 1.97k | for (size_t y = 0; y < 2; y++) { | 56 | 3.94k | for (size_t x = 0; x < 2; x++) { | 57 | 2.62k | weights[start + y * 8 + x + 2] = dct2weights[c][2]; | 58 | 2.62k | weights[start + (y + 2) * 8 + x] = dct2weights[c][2]; | 59 | 2.62k | } | 60 | 1.31k | } | 61 | 1.97k | for (size_t y = 0; y < 2; y++) { | 62 | 3.94k | for (size_t x = 0; x < 2; x++) { | 63 | 2.62k | weights[start + (y + 2) * 8 + x + 2] = dct2weights[c][3]; | 64 | 2.62k | } | 65 | 1.31k | } | 66 | 3.28k | for (size_t y = 0; y < 4; y++) { | 67 | 13.1k | for (size_t x = 0; x < 4; x++) { | 68 | 10.5k | weights[start + y * 8 + x + 4] = dct2weights[c][4]; | 69 | 10.5k | weights[start + (y + 4) * 8 + x] = dct2weights[c][4]; | 70 | 10.5k | } | 71 | 2.62k | } | 72 | 3.28k | for (size_t y = 0; y < 4; y++) { | 73 | 13.1k | for (size_t x = 0; x < 4; x++) { | 74 | 10.5k | weights[start + (y + 4) * 8 + x + 4] = dct2weights[c][5]; | 75 | 10.5k | } | 76 | 2.62k | } | 77 | 657 | } | 78 | 219 | } |
Unexecuted instantiation: jxl::N_SSE2::GetQuantWeightsDCT2(std::__1::array<std::__1::array<float, 6ul>, 3ul> const&, float*) |
79 | | |
80 | | void GetQuantWeightsIdentity(const QuantEncoding::IdWeights& idweights, |
81 | 274 | float* weights) { |
82 | 1.09k | for (size_t c = 0; c < 3; c++) { |
83 | 53.4k | for (int i = 0; i < 64; i++) { |
84 | 52.6k | weights[64 * c + i] = idweights[c][0]; |
85 | 52.6k | } |
86 | 822 | weights[64 * c + 1] = idweights[c][1]; |
87 | 822 | weights[64 * c + 8] = idweights[c][1]; |
88 | 822 | weights[64 * c + 9] = idweights[c][2]; |
89 | 822 | } |
90 | 274 | } Unexecuted instantiation: jxl::N_SSE4::GetQuantWeightsIdentity(std::__1::array<std::__1::array<float, 3ul>, 3ul> const&, float*) jxl::N_AVX2::GetQuantWeightsIdentity(std::__1::array<std::__1::array<float, 3ul>, 3ul> const&, float*) Line | Count | Source | 81 | 274 | float* weights) { | 82 | 1.09k | for (size_t c = 0; c < 3; c++) { | 83 | 53.4k | for (int i = 0; i < 64; i++) { | 84 | 52.6k | weights[64 * c + i] = idweights[c][0]; | 85 | 52.6k | } | 86 | 822 | weights[64 * c + 1] = idweights[c][1]; | 87 | 822 | weights[64 * c + 8] = idweights[c][1]; | 88 | 822 | weights[64 * c + 9] = idweights[c][2]; | 89 | 822 | } | 90 | 274 | } |
Unexecuted instantiation: jxl::N_SSE2::GetQuantWeightsIdentity(std::__1::array<std::__1::array<float, 3ul>, 3ul> const&, float*) |
91 | | |
92 | | StatusOr<float> Interpolate(float pos, float max, const float* array, |
93 | 7.23k | size_t len) { |
94 | 7.23k | float scaled_pos = pos * (len - 1) / max; |
95 | 7.23k | size_t idx = scaled_pos; |
96 | 7.23k | JXL_ENSURE(idx + 1 < len); |
97 | 7.23k | float a = array[idx]; |
98 | 7.23k | float b = array[idx + 1]; |
99 | 7.23k | return a * FastPowf(b / a, scaled_pos - idx); |
100 | 7.23k | } Unexecuted instantiation: jxl::N_SSE4::Interpolate(float, float, float const*, unsigned long) jxl::N_AVX2::Interpolate(float, float, float const*, unsigned long) Line | Count | Source | 93 | 7.23k | size_t len) { | 94 | 7.23k | float scaled_pos = pos * (len - 1) / max; | 95 | 7.23k | size_t idx = scaled_pos; | 96 | 7.23k | JXL_ENSURE(idx + 1 < len); | 97 | 7.23k | float a = array[idx]; | 98 | 7.23k | float b = array[idx + 1]; | 99 | 7.23k | return a * FastPowf(b / a, scaled_pos - idx); | 100 | 7.23k | } |
Unexecuted instantiation: jxl::N_SSE2::Interpolate(float, float, float const*, unsigned long) |
101 | | |
102 | 65.7k | float Mult(float v) { |
103 | 65.7k | if (v > 0.0f) return 1.0f + v; |
104 | 65.5k | return 1.0f / (1.0f - v); |
105 | 65.7k | } Unexecuted instantiation: jxl::N_SSE4::Mult(float) Line | Count | Source | 102 | 65.7k | float Mult(float v) { | 103 | 65.7k | if (v > 0.0f) return 1.0f + v; | 104 | 65.5k | return 1.0f / (1.0f - v); | 105 | 65.7k | } |
Unexecuted instantiation: jxl::N_SSE2::Mult(float) |
106 | | |
107 | | using DF4 = HWY_CAPPED(float, 4); |
108 | | |
109 | | hwy::HWY_NAMESPACE::Vec<DF4> InterpolateVec( |
110 | 1.28M | hwy::HWY_NAMESPACE::Vec<DF4> scaled_pos, const float* array) { |
111 | 1.28M | HWY_CAPPED(int32_t, 4) di; |
112 | | |
113 | 1.28M | auto idx = ConvertTo(di, scaled_pos); |
114 | | |
115 | 1.28M | auto frac = Sub(scaled_pos, ConvertTo(DF4(), idx)); |
116 | | |
117 | | // TODO(veluca): in theory, this could be done with 8 TableLookupBytes, but |
118 | | // it's probably slower. |
119 | 1.28M | auto a = GatherIndex(DF4(), array, idx); |
120 | 1.28M | auto b = GatherIndex(DF4(), array + 1, idx); |
121 | | |
122 | 1.28M | return Mul(a, FastPowf(DF4(), Div(b, a), frac)); |
123 | 1.28M | } Unexecuted instantiation: jxl::N_SSE4::InterpolateVec(hwy::N_SSE4::Vec128<float, 4ul>, float const*) jxl::N_AVX2::InterpolateVec(hwy::N_AVX2::Vec128<float, 4ul>, float const*) Line | Count | Source | 110 | 1.28M | hwy::HWY_NAMESPACE::Vec<DF4> scaled_pos, const float* array) { | 111 | 1.28M | HWY_CAPPED(int32_t, 4) di; | 112 | | | 113 | 1.28M | auto idx = ConvertTo(di, scaled_pos); | 114 | | | 115 | 1.28M | auto frac = Sub(scaled_pos, ConvertTo(DF4(), idx)); | 116 | | | 117 | | // TODO(veluca): in theory, this could be done with 8 TableLookupBytes, but | 118 | | // it's probably slower. | 119 | 1.28M | auto a = GatherIndex(DF4(), array, idx); | 120 | 1.28M | auto b = GatherIndex(DF4(), array + 1, idx); | 121 | | | 122 | 1.28M | return Mul(a, FastPowf(DF4(), Div(b, a), frac)); | 123 | 1.28M | } |
Unexecuted instantiation: jxl::N_SSE2::InterpolateVec(hwy::N_SSE2::Vec128<float, 4ul>, float const*) |
124 | | |
125 | | // Computes quant weights for a COLS*ROWS-sized transform, using num_bands |
126 | | // eccentricity bands and num_ebands eccentricity bands. If print_mode is 1, |
127 | | // prints the resulting matrix; if print_mode is 2, prints the matrix in a |
128 | | // format suitable for a 3d plot with gnuplot. |
129 | | Status GetQuantWeights( |
130 | | size_t ROWS, size_t COLS, |
131 | | const DctQuantWeightParams::DistanceBandsArray& distance_bands, |
132 | 4.12k | size_t num_bands, float* out) { |
133 | 16.4k | for (size_t c = 0; c < 3; c++) { |
134 | 12.3k | float bands[DctQuantWeightParams::kMaxDistanceBands] = { |
135 | 12.3k | distance_bands[c][0]}; |
136 | 12.3k | if (bands[0] < kAlmostZero) return JXL_FAILURE("Invalid distance bands"); |
137 | 76.3k | for (size_t i = 1; i < num_bands; i++) { |
138 | 63.9k | bands[i] = bands[i - 1] * Mult(distance_bands[c][i]); |
139 | 63.9k | if (bands[i] < kAlmostZero) return JXL_FAILURE("Invalid distance bands"); |
140 | 63.9k | } |
141 | 12.3k | float scale = (num_bands - 1) / (kSqrt2 + 1e-6f); |
142 | 12.3k | float rcpcol = scale / (COLS - 1); |
143 | 12.3k | float rcprow = scale / (ROWS - 1); |
144 | 12.3k | JXL_ENSURE(COLS >= Lanes(DF4())); |
145 | 12.3k | HWY_ALIGN float l0123[4] = {0, 1, 2, 3}; |
146 | 169k | for (uint32_t y = 0; y < ROWS; y++) { |
147 | 157k | float dy = y * rcprow; |
148 | 157k | float dy2 = dy * dy; |
149 | 1.44M | for (uint32_t x = 0; x < COLS; x += Lanes(DF4())) { |
150 | 1.28M | auto dx = |
151 | 1.28M | Mul(Add(Set(DF4(), x), Load(DF4(), l0123)), Set(DF4(), rcpcol)); |
152 | 1.28M | auto scaled_distance = Sqrt(MulAdd(dx, dx, Set(DF4(), dy2))); |
153 | 1.28M | auto weight = num_bands == 1 ? Set(DF4(), bands[0]) |
154 | 1.28M | : InterpolateVec(scaled_distance, bands); |
155 | 1.28M | StoreU(weight, DF4(), out + c * COLS * ROWS + y * COLS + x); |
156 | 1.28M | } |
157 | 157k | } |
158 | 12.3k | } |
159 | 4.11k | return true; |
160 | 4.12k | } Unexecuted instantiation: jxl::N_SSE4::GetQuantWeights(unsigned long, unsigned long, std::__1::array<std::__1::array<float, 17ul>, 3ul> const&, unsigned long, float*) jxl::N_AVX2::GetQuantWeights(unsigned long, unsigned long, std::__1::array<std::__1::array<float, 17ul>, 3ul> const&, unsigned long, float*) Line | Count | Source | 132 | 4.12k | size_t num_bands, float* out) { | 133 | 16.4k | for (size_t c = 0; c < 3; c++) { | 134 | 12.3k | float bands[DctQuantWeightParams::kMaxDistanceBands] = { | 135 | 12.3k | distance_bands[c][0]}; | 136 | 12.3k | if (bands[0] < kAlmostZero) return JXL_FAILURE("Invalid distance bands"); | 137 | 76.3k | for (size_t i = 1; i < num_bands; i++) { | 138 | 63.9k | bands[i] = bands[i - 1] * Mult(distance_bands[c][i]); | 139 | 63.9k | if (bands[i] < kAlmostZero) return JXL_FAILURE("Invalid distance bands"); | 140 | 63.9k | } | 141 | 12.3k | float scale = (num_bands - 1) / (kSqrt2 + 1e-6f); | 142 | 12.3k | float rcpcol = scale / (COLS - 1); | 143 | 12.3k | float rcprow = scale / (ROWS - 1); | 144 | 12.3k | JXL_ENSURE(COLS >= Lanes(DF4())); | 145 | 12.3k | HWY_ALIGN float l0123[4] = {0, 1, 2, 3}; | 146 | 169k | for (uint32_t y = 0; y < ROWS; y++) { | 147 | 157k | float dy = y * rcprow; | 148 | 157k | float dy2 = dy * dy; | 149 | 1.44M | for (uint32_t x = 0; x < COLS; x += Lanes(DF4())) { | 150 | 1.28M | auto dx = | 151 | 1.28M | Mul(Add(Set(DF4(), x), Load(DF4(), l0123)), Set(DF4(), rcpcol)); | 152 | 1.28M | auto scaled_distance = Sqrt(MulAdd(dx, dx, Set(DF4(), dy2))); | 153 | 1.28M | auto weight = num_bands == 1 ? Set(DF4(), bands[0]) | 154 | 1.28M | : InterpolateVec(scaled_distance, bands); | 155 | 1.28M | StoreU(weight, DF4(), out + c * COLS * ROWS + y * COLS + x); | 156 | 1.28M | } | 157 | 157k | } | 158 | 12.3k | } | 159 | 4.11k | return true; | 160 | 4.12k | } |
Unexecuted instantiation: jxl::N_SSE2::GetQuantWeights(unsigned long, unsigned long, std::__1::array<std::__1::array<float, 17ul>, 3ul> const&, unsigned long, float*) |
161 | | |
162 | | // TODO(veluca): SIMD-fy. With 256x256, this is actually slow. |
163 | | Status ComputeQuantTable(const QuantEncoding& encoding, |
164 | | float* JXL_RESTRICT table, |
165 | | float* JXL_RESTRICT inv_table, size_t table_num, |
166 | 4.41k | QuantTable kind, size_t* pos) { |
167 | 4.41k | constexpr size_t N = kBlockDim; |
168 | 4.41k | size_t quant_table_idx = static_cast<size_t>(kind); |
169 | 4.41k | size_t wrows = 8 * DequantMatrices::required_size_x[quant_table_idx]; |
170 | 4.41k | size_t wcols = 8 * DequantMatrices::required_size_y[quant_table_idx]; |
171 | 4.41k | size_t num = wrows * wcols; |
172 | | |
173 | 4.41k | std::vector<float> weights(3 * num); |
174 | | |
175 | 4.41k | switch (encoding.mode) { |
176 | 0 | case QuantEncoding::kQuantModeLibrary: { |
177 | | // Library and copy quant encoding should get replaced by the actual |
178 | | // parameters by the caller. |
179 | 0 | JXL_ENSURE(false); |
180 | 0 | break; |
181 | 0 | } |
182 | 274 | case QuantEncoding::kQuantModeID: { |
183 | 274 | JXL_ENSURE(num == kDCTBlockSize); |
184 | 274 | GetQuantWeightsIdentity(encoding.idweights, weights.data()); |
185 | 274 | break; |
186 | 274 | } |
187 | 219 | case QuantEncoding::kQuantModeDCT2: { |
188 | 219 | JXL_ENSURE(num == kDCTBlockSize); |
189 | 219 | GetQuantWeightsDCT2(encoding.dct2weights, weights.data()); |
190 | 219 | break; |
191 | 219 | } |
192 | 214 | case QuantEncoding::kQuantModeDCT4: { |
193 | 214 | JXL_ENSURE(num == kDCTBlockSize); |
194 | 214 | float weights4x4[3 * 4 * 4]; |
195 | | // Always use 4x4 GetQuantWeights for DCT4 quantization tables. |
196 | 214 | JXL_RETURN_IF_ERROR( |
197 | 214 | GetQuantWeights(4, 4, encoding.dct_params.distance_bands, |
198 | 214 | encoding.dct_params.num_distance_bands, weights4x4)); |
199 | 848 | for (size_t c = 0; c < 3; c++) { |
200 | 5.72k | for (size_t y = 0; y < kBlockDim; y++) { |
201 | 45.7k | for (size_t x = 0; x < kBlockDim; x++) { |
202 | 40.7k | weights[c * num + y * kBlockDim + x] = |
203 | 40.7k | weights4x4[c * 16 + (y / 2) * 4 + (x / 2)]; |
204 | 40.7k | } |
205 | 5.08k | } |
206 | 636 | weights[c * num + 1] /= encoding.dct4multipliers[c][0]; |
207 | 636 | weights[c * num + N] /= encoding.dct4multipliers[c][0]; |
208 | 636 | weights[c * num + N + 1] /= encoding.dct4multipliers[c][1]; |
209 | 636 | } |
210 | 212 | break; |
211 | 214 | } |
212 | 202 | case QuantEncoding::kQuantModeDCT4X8: { |
213 | 202 | JXL_ENSURE(num == kDCTBlockSize); |
214 | 202 | float weights4x8[3 * 4 * 8]; |
215 | | // Always use 4x8 GetQuantWeights for DCT4X8 quantization tables. |
216 | 202 | JXL_RETURN_IF_ERROR( |
217 | 202 | GetQuantWeights(4, 8, encoding.dct_params.distance_bands, |
218 | 202 | encoding.dct_params.num_distance_bands, weights4x8)); |
219 | 800 | for (size_t c = 0; c < 3; c++) { |
220 | 5.40k | for (size_t y = 0; y < kBlockDim; y++) { |
221 | 43.2k | for (size_t x = 0; x < kBlockDim; x++) { |
222 | 38.4k | weights[c * num + y * kBlockDim + x] = |
223 | 38.4k | weights4x8[c * 32 + (y / 2) * 8 + x]; |
224 | 38.4k | } |
225 | 4.80k | } |
226 | 600 | weights[c * num + N] /= encoding.dct4x8multipliers[c]; |
227 | 600 | } |
228 | 200 | break; |
229 | 202 | } |
230 | 3.30k | case QuantEncoding::kQuantModeDCT: { |
231 | 3.30k | JXL_RETURN_IF_ERROR(GetQuantWeights( |
232 | 3.30k | wrows, wcols, encoding.dct_params.distance_bands, |
233 | 3.30k | encoding.dct_params.num_distance_bands, weights.data())); |
234 | 3.30k | break; |
235 | 3.30k | } |
236 | 3.30k | case QuantEncoding::kQuantModeRAW: { |
237 | 0 | if (!encoding.qraw.qtable || encoding.qraw.qtable->size() != 3 * num) { |
238 | 0 | return JXL_FAILURE("Invalid table encoding"); |
239 | 0 | } |
240 | 0 | int* qtable = encoding.qraw.qtable->data(); |
241 | 0 | for (size_t i = 0; i < 3 * num; i++) { |
242 | 0 | weights[i] = 1.f / (encoding.qraw.qtable_den * qtable[i]); |
243 | 0 | } |
244 | 0 | break; |
245 | 0 | } |
246 | 203 | case QuantEncoding::kQuantModeAFV: { |
247 | 203 | constexpr float kFreqs[] = { |
248 | 203 | 0xBAD, |
249 | 203 | 0xBAD, |
250 | 203 | 0.8517778890324296, |
251 | 203 | 5.37778436506804, |
252 | 203 | 0xBAD, |
253 | 203 | 0xBAD, |
254 | 203 | 4.734747904497923, |
255 | 203 | 5.449245381693219, |
256 | 203 | 1.6598270267479331, |
257 | 203 | 4, |
258 | 203 | 7.275749096817861, |
259 | 203 | 10.423227632456525, |
260 | 203 | 2.662932286148962, |
261 | 203 | 7.630657783650829, |
262 | 203 | 8.962388608184032, |
263 | 203 | 12.97166202570235, |
264 | 203 | }; |
265 | | |
266 | 203 | float weights4x8[3 * 4 * 8]; |
267 | 203 | JXL_RETURN_IF_ERROR(( |
268 | 203 | GetQuantWeights(4, 8, encoding.dct_params.distance_bands, |
269 | 203 | encoding.dct_params.num_distance_bands, weights4x8))); |
270 | 203 | float weights4x4[3 * 4 * 4]; |
271 | 203 | JXL_RETURN_IF_ERROR((GetQuantWeights( |
272 | 203 | 4, 4, encoding.dct_params_afv_4x4.distance_bands, |
273 | 203 | encoding.dct_params_afv_4x4.num_distance_bands, weights4x4))); |
274 | | |
275 | 203 | constexpr float lo = 0.8517778890324296; |
276 | 203 | constexpr float hi = 12.97166202570235f - lo + 1e-6f; |
277 | 806 | for (size_t c = 0; c < 3; c++) { |
278 | 606 | float bands[4]; |
279 | 606 | bands[0] = encoding.afv_weights[c][5]; |
280 | 606 | if (bands[0] < kAlmostZero) return JXL_FAILURE("Invalid AFV bands"); |
281 | 2.41k | for (size_t i = 1; i < 4; i++) { |
282 | 1.80k | bands[i] = bands[i - 1] * Mult(encoding.afv_weights[c][i + 5]); |
283 | 1.80k | if (bands[i] < kAlmostZero) return JXL_FAILURE("Invalid AFV bands"); |
284 | 1.80k | } |
285 | 603 | size_t start = c * 64; |
286 | 10.2k | auto set_weight = [&start, &weights](size_t x, size_t y, float val) { |
287 | 10.2k | weights[start + y * 8 + x] = val; |
288 | 10.2k | }; Unexecuted instantiation: quant_weights.cc:jxl::N_SSE4::ComputeQuantTable(jxl::QuantEncoding const&, float*, float*, unsigned long, jxl::QuantTable, unsigned long*)::$_0::operator()(unsigned long, unsigned long, float) const quant_weights.cc:jxl::N_AVX2::ComputeQuantTable(jxl::QuantEncoding const&, float*, float*, unsigned long, jxl::QuantTable, unsigned long*)::$_0::operator()(unsigned long, unsigned long, float) const Line | Count | Source | 286 | 10.2k | auto set_weight = [&start, &weights](size_t x, size_t y, float val) { | 287 | 10.2k | weights[start + y * 8 + x] = val; | 288 | 10.2k | }; |
Unexecuted instantiation: quant_weights.cc:jxl::N_SSE2::ComputeQuantTable(jxl::QuantEncoding const&, float*, float*, unsigned long, jxl::QuantTable, unsigned long*)::$_0::operator()(unsigned long, unsigned long, float) const |
289 | 603 | weights[start] = 1; // Not used, but causes MSAN error otherwise. |
290 | | // Weights for (0, 1) and (1, 0). |
291 | 603 | set_weight(0, 1, encoding.afv_weights[c][0]); |
292 | 603 | set_weight(1, 0, encoding.afv_weights[c][1]); |
293 | | // AFV special weights for 3-pixel corner. |
294 | 603 | set_weight(0, 2, encoding.afv_weights[c][2]); |
295 | 603 | set_weight(2, 0, encoding.afv_weights[c][3]); |
296 | 603 | set_weight(2, 2, encoding.afv_weights[c][4]); |
297 | | |
298 | | // All other AFV weights. |
299 | 3.01k | for (size_t y = 0; y < 4; y++) { |
300 | 12.0k | for (size_t x = 0; x < 4; x++) { |
301 | 9.64k | if (x < 2 && y < 2) continue; |
302 | 14.4k | JXL_ASSIGN_OR_RETURN( |
303 | 14.4k | float val, Interpolate(kFreqs[y * 4 + x] - lo, hi, bands, 4)); |
304 | 14.4k | set_weight(2 * x, 2 * y, val); |
305 | 14.4k | } |
306 | 2.41k | } |
307 | | |
308 | | // Put 4x8 weights in odd rows, except (1, 0). |
309 | 3.01k | for (size_t y = 0; y < kBlockDim / 2; y++) { |
310 | 21.7k | for (size_t x = 0; x < kBlockDim; x++) { |
311 | 19.2k | if (x == 0 && y == 0) continue; |
312 | 18.6k | weights[c * num + (2 * y + 1) * kBlockDim + x] = |
313 | 18.6k | weights4x8[c * 32 + y * 8 + x]; |
314 | 18.6k | } |
315 | 2.41k | } |
316 | | // Put 4x4 weights in even rows / odd columns, except (0, 1). |
317 | 3.01k | for (size_t y = 0; y < kBlockDim / 2; y++) { |
318 | 12.0k | for (size_t x = 0; x < kBlockDim / 2; x++) { |
319 | 9.64k | if (x == 0 && y == 0) continue; |
320 | 9.04k | weights[c * num + (2 * y) * kBlockDim + 2 * x + 1] = |
321 | 9.04k | weights4x4[c * 16 + y * 4 + x]; |
322 | 9.04k | } |
323 | 2.41k | } |
324 | 603 | } |
325 | 200 | break; |
326 | 203 | } |
327 | 4.41k | } |
328 | 4.40k | size_t prev_pos = *pos; |
329 | 4.40k | HWY_CAPPED(float, 64) d; |
330 | 667k | for (size_t i = 0; i < num * 3; i += Lanes(d)) { |
331 | 663k | auto inv_val = LoadU(d, weights.data() + i); |
332 | 663k | if (JXL_UNLIKELY(!AllFalse(d, Ge(inv_val, Set(d, 1.0f / kAlmostZero))) || |
333 | 663k | !AllFalse(d, Lt(inv_val, Set(d, kAlmostZero))))) { |
334 | 21 | return JXL_FAILURE("Invalid quantization table"); |
335 | 21 | } |
336 | 663k | auto val = Div(Set(d, 1.0f), inv_val); |
337 | 663k | StoreU(val, d, table + *pos + i); |
338 | 663k | StoreU(inv_val, d, inv_table + *pos + i); |
339 | 663k | } |
340 | 4.38k | (*pos) += 3 * num; |
341 | | |
342 | | // Ensure that the lowest frequencies have a 0 inverse table. |
343 | | // This does not affect en/decoding, but allows AC strategy selection to be |
344 | | // slightly simpler. |
345 | 4.38k | size_t xs = DequantMatrices::required_size_x[quant_table_idx]; |
346 | 4.38k | size_t ys = DequantMatrices::required_size_y[quant_table_idx]; |
347 | 4.38k | CoefficientLayout(&ys, &xs); |
348 | 17.5k | for (size_t c = 0; c < 3; c++) { |
349 | 34.8k | for (size_t y = 0; y < ys; y++) { |
350 | 104k | for (size_t x = 0; x < xs; x++) { |
351 | 82.8k | inv_table[prev_pos + c * ys * xs * kDCTBlockSize + y * kBlockDim * xs + |
352 | 82.8k | x] = 0; |
353 | 82.8k | } |
354 | 21.6k | } |
355 | 13.1k | } |
356 | 4.38k | return true; |
357 | 4.40k | } Unexecuted instantiation: jxl::N_SSE4::ComputeQuantTable(jxl::QuantEncoding const&, float*, float*, unsigned long, jxl::QuantTable, unsigned long*) jxl::N_AVX2::ComputeQuantTable(jxl::QuantEncoding const&, float*, float*, unsigned long, jxl::QuantTable, unsigned long*) Line | Count | Source | 166 | 4.41k | QuantTable kind, size_t* pos) { | 167 | 4.41k | constexpr size_t N = kBlockDim; | 168 | 4.41k | size_t quant_table_idx = static_cast<size_t>(kind); | 169 | 4.41k | size_t wrows = 8 * DequantMatrices::required_size_x[quant_table_idx]; | 170 | 4.41k | size_t wcols = 8 * DequantMatrices::required_size_y[quant_table_idx]; | 171 | 4.41k | size_t num = wrows * wcols; | 172 | | | 173 | 4.41k | std::vector<float> weights(3 * num); | 174 | | | 175 | 4.41k | switch (encoding.mode) { | 176 | 0 | case QuantEncoding::kQuantModeLibrary: { | 177 | | // Library and copy quant encoding should get replaced by the actual | 178 | | // parameters by the caller. | 179 | 0 | JXL_ENSURE(false); | 180 | 0 | break; | 181 | 0 | } | 182 | 274 | case QuantEncoding::kQuantModeID: { | 183 | 274 | JXL_ENSURE(num == kDCTBlockSize); | 184 | 274 | GetQuantWeightsIdentity(encoding.idweights, weights.data()); | 185 | 274 | break; | 186 | 274 | } | 187 | 219 | case QuantEncoding::kQuantModeDCT2: { | 188 | 219 | JXL_ENSURE(num == kDCTBlockSize); | 189 | 219 | GetQuantWeightsDCT2(encoding.dct2weights, weights.data()); | 190 | 219 | break; | 191 | 219 | } | 192 | 214 | case QuantEncoding::kQuantModeDCT4: { | 193 | 214 | JXL_ENSURE(num == kDCTBlockSize); | 194 | 214 | float weights4x4[3 * 4 * 4]; | 195 | | // Always use 4x4 GetQuantWeights for DCT4 quantization tables. | 196 | 214 | JXL_RETURN_IF_ERROR( | 197 | 214 | GetQuantWeights(4, 4, encoding.dct_params.distance_bands, | 198 | 214 | encoding.dct_params.num_distance_bands, weights4x4)); | 199 | 848 | for (size_t c = 0; c < 3; c++) { | 200 | 5.72k | for (size_t y = 0; y < kBlockDim; y++) { | 201 | 45.7k | for (size_t x = 0; x < kBlockDim; x++) { | 202 | 40.7k | weights[c * num + y * kBlockDim + x] = | 203 | 40.7k | weights4x4[c * 16 + (y / 2) * 4 + (x / 2)]; | 204 | 40.7k | } | 205 | 5.08k | } | 206 | 636 | weights[c * num + 1] /= encoding.dct4multipliers[c][0]; | 207 | 636 | weights[c * num + N] /= encoding.dct4multipliers[c][0]; | 208 | 636 | weights[c * num + N + 1] /= encoding.dct4multipliers[c][1]; | 209 | 636 | } | 210 | 212 | break; | 211 | 214 | } | 212 | 202 | case QuantEncoding::kQuantModeDCT4X8: { | 213 | 202 | JXL_ENSURE(num == kDCTBlockSize); | 214 | 202 | float weights4x8[3 * 4 * 8]; | 215 | | // Always use 4x8 GetQuantWeights for DCT4X8 quantization tables. | 216 | 202 | JXL_RETURN_IF_ERROR( | 217 | 202 | GetQuantWeights(4, 8, encoding.dct_params.distance_bands, | 218 | 202 | encoding.dct_params.num_distance_bands, weights4x8)); | 219 | 800 | for (size_t c = 0; c < 3; c++) { | 220 | 5.40k | for (size_t y = 0; y < kBlockDim; y++) { | 221 | 43.2k | for (size_t x = 0; x < kBlockDim; x++) { | 222 | 38.4k | weights[c * num + y * kBlockDim + x] = | 223 | 38.4k | weights4x8[c * 32 + (y / 2) * 8 + x]; | 224 | 38.4k | } | 225 | 4.80k | } | 226 | 600 | weights[c * num + N] /= encoding.dct4x8multipliers[c]; | 227 | 600 | } | 228 | 200 | break; | 229 | 202 | } | 230 | 3.30k | case QuantEncoding::kQuantModeDCT: { | 231 | 3.30k | JXL_RETURN_IF_ERROR(GetQuantWeights( | 232 | 3.30k | wrows, wcols, encoding.dct_params.distance_bands, | 233 | 3.30k | encoding.dct_params.num_distance_bands, weights.data())); | 234 | 3.30k | break; | 235 | 3.30k | } | 236 | 3.30k | case QuantEncoding::kQuantModeRAW: { | 237 | 0 | if (!encoding.qraw.qtable || encoding.qraw.qtable->size() != 3 * num) { | 238 | 0 | return JXL_FAILURE("Invalid table encoding"); | 239 | 0 | } | 240 | 0 | int* qtable = encoding.qraw.qtable->data(); | 241 | 0 | for (size_t i = 0; i < 3 * num; i++) { | 242 | 0 | weights[i] = 1.f / (encoding.qraw.qtable_den * qtable[i]); | 243 | 0 | } | 244 | 0 | break; | 245 | 0 | } | 246 | 203 | case QuantEncoding::kQuantModeAFV: { | 247 | 203 | constexpr float kFreqs[] = { | 248 | 203 | 0xBAD, | 249 | 203 | 0xBAD, | 250 | 203 | 0.8517778890324296, | 251 | 203 | 5.37778436506804, | 252 | 203 | 0xBAD, | 253 | 203 | 0xBAD, | 254 | 203 | 4.734747904497923, | 255 | 203 | 5.449245381693219, | 256 | 203 | 1.6598270267479331, | 257 | 203 | 4, | 258 | 203 | 7.275749096817861, | 259 | 203 | 10.423227632456525, | 260 | 203 | 2.662932286148962, | 261 | 203 | 7.630657783650829, | 262 | 203 | 8.962388608184032, | 263 | 203 | 12.97166202570235, | 264 | 203 | }; | 265 | | | 266 | 203 | float weights4x8[3 * 4 * 8]; | 267 | 203 | JXL_RETURN_IF_ERROR(( | 268 | 203 | GetQuantWeights(4, 8, encoding.dct_params.distance_bands, | 269 | 203 | encoding.dct_params.num_distance_bands, weights4x8))); | 270 | 203 | float weights4x4[3 * 4 * 4]; | 271 | 203 | JXL_RETURN_IF_ERROR((GetQuantWeights( | 272 | 203 | 4, 4, encoding.dct_params_afv_4x4.distance_bands, | 273 | 203 | encoding.dct_params_afv_4x4.num_distance_bands, weights4x4))); | 274 | | | 275 | 203 | constexpr float lo = 0.8517778890324296; | 276 | 203 | constexpr float hi = 12.97166202570235f - lo + 1e-6f; | 277 | 806 | for (size_t c = 0; c < 3; c++) { | 278 | 606 | float bands[4]; | 279 | 606 | bands[0] = encoding.afv_weights[c][5]; | 280 | 606 | if (bands[0] < kAlmostZero) return JXL_FAILURE("Invalid AFV bands"); | 281 | 2.41k | for (size_t i = 1; i < 4; i++) { | 282 | 1.80k | bands[i] = bands[i - 1] * Mult(encoding.afv_weights[c][i + 5]); | 283 | 1.80k | if (bands[i] < kAlmostZero) return JXL_FAILURE("Invalid AFV bands"); | 284 | 1.80k | } | 285 | 603 | size_t start = c * 64; | 286 | 603 | auto set_weight = [&start, &weights](size_t x, size_t y, float val) { | 287 | 603 | weights[start + y * 8 + x] = val; | 288 | 603 | }; | 289 | 603 | weights[start] = 1; // Not used, but causes MSAN error otherwise. | 290 | | // Weights for (0, 1) and (1, 0). | 291 | 603 | set_weight(0, 1, encoding.afv_weights[c][0]); | 292 | 603 | set_weight(1, 0, encoding.afv_weights[c][1]); | 293 | | // AFV special weights for 3-pixel corner. | 294 | 603 | set_weight(0, 2, encoding.afv_weights[c][2]); | 295 | 603 | set_weight(2, 0, encoding.afv_weights[c][3]); | 296 | 603 | set_weight(2, 2, encoding.afv_weights[c][4]); | 297 | | | 298 | | // All other AFV weights. | 299 | 3.01k | for (size_t y = 0; y < 4; y++) { | 300 | 12.0k | for (size_t x = 0; x < 4; x++) { | 301 | 9.64k | if (x < 2 && y < 2) continue; | 302 | 14.4k | JXL_ASSIGN_OR_RETURN( | 303 | 14.4k | float val, Interpolate(kFreqs[y * 4 + x] - lo, hi, bands, 4)); | 304 | 14.4k | set_weight(2 * x, 2 * y, val); | 305 | 14.4k | } | 306 | 2.41k | } | 307 | | | 308 | | // Put 4x8 weights in odd rows, except (1, 0). | 309 | 3.01k | for (size_t y = 0; y < kBlockDim / 2; y++) { | 310 | 21.7k | for (size_t x = 0; x < kBlockDim; x++) { | 311 | 19.2k | if (x == 0 && y == 0) continue; | 312 | 18.6k | weights[c * num + (2 * y + 1) * kBlockDim + x] = | 313 | 18.6k | weights4x8[c * 32 + y * 8 + x]; | 314 | 18.6k | } | 315 | 2.41k | } | 316 | | // Put 4x4 weights in even rows / odd columns, except (0, 1). | 317 | 3.01k | for (size_t y = 0; y < kBlockDim / 2; y++) { | 318 | 12.0k | for (size_t x = 0; x < kBlockDim / 2; x++) { | 319 | 9.64k | if (x == 0 && y == 0) continue; | 320 | 9.04k | weights[c * num + (2 * y) * kBlockDim + 2 * x + 1] = | 321 | 9.04k | weights4x4[c * 16 + y * 4 + x]; | 322 | 9.04k | } | 323 | 2.41k | } | 324 | 603 | } | 325 | 200 | break; | 326 | 203 | } | 327 | 4.41k | } | 328 | 4.40k | size_t prev_pos = *pos; | 329 | 4.40k | HWY_CAPPED(float, 64) d; | 330 | 667k | for (size_t i = 0; i < num * 3; i += Lanes(d)) { | 331 | 663k | auto inv_val = LoadU(d, weights.data() + i); | 332 | 663k | if (JXL_UNLIKELY(!AllFalse(d, Ge(inv_val, Set(d, 1.0f / kAlmostZero))) || | 333 | 663k | !AllFalse(d, Lt(inv_val, Set(d, kAlmostZero))))) { | 334 | 21 | return JXL_FAILURE("Invalid quantization table"); | 335 | 21 | } | 336 | 663k | auto val = Div(Set(d, 1.0f), inv_val); | 337 | 663k | StoreU(val, d, table + *pos + i); | 338 | 663k | StoreU(inv_val, d, inv_table + *pos + i); | 339 | 663k | } | 340 | 4.38k | (*pos) += 3 * num; | 341 | | | 342 | | // Ensure that the lowest frequencies have a 0 inverse table. | 343 | | // This does not affect en/decoding, but allows AC strategy selection to be | 344 | | // slightly simpler. | 345 | 4.38k | size_t xs = DequantMatrices::required_size_x[quant_table_idx]; | 346 | 4.38k | size_t ys = DequantMatrices::required_size_y[quant_table_idx]; | 347 | 4.38k | CoefficientLayout(&ys, &xs); | 348 | 17.5k | for (size_t c = 0; c < 3; c++) { | 349 | 34.8k | for (size_t y = 0; y < ys; y++) { | 350 | 104k | for (size_t x = 0; x < xs; x++) { | 351 | 82.8k | inv_table[prev_pos + c * ys * xs * kDCTBlockSize + y * kBlockDim * xs + | 352 | 82.8k | x] = 0; | 353 | 82.8k | } | 354 | 21.6k | } | 355 | 13.1k | } | 356 | 4.38k | return true; | 357 | 4.40k | } |
Unexecuted instantiation: jxl::N_SSE2::ComputeQuantTable(jxl::QuantEncoding const&, float*, float*, unsigned long, jxl::QuantTable, unsigned long*) |
358 | | |
359 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
360 | | } // namespace HWY_NAMESPACE |
361 | | } // namespace jxl |
362 | | HWY_AFTER_NAMESPACE(); |
363 | | |
364 | | #if HWY_ONCE |
365 | | |
366 | | namespace jxl { |
367 | | namespace { |
368 | | |
369 | | HWY_EXPORT(ComputeQuantTable); |
370 | | |
371 | | constexpr const float kAlmostZero = 1e-8f; |
372 | | |
373 | 119 | Status DecodeDctParams(BitReader* br, DctQuantWeightParams* params) { |
374 | 119 | params->num_distance_bands = |
375 | 119 | br->ReadFixedBits<DctQuantWeightParams::kLog2MaxDistanceBands>() + 1; |
376 | 374 | for (size_t c = 0; c < 3; c++) { |
377 | 1.81k | for (size_t i = 0; i < params->num_distance_bands; i++) { |
378 | 1.51k | JXL_RETURN_IF_ERROR(F16Coder::Read(br, ¶ms->distance_bands[c][i])); |
379 | 1.51k | } |
380 | 296 | if (params->distance_bands[c][0] < kAlmostZero) { |
381 | 41 | return JXL_FAILURE("Distance band seed is too small"); |
382 | 41 | } |
383 | 255 | params->distance_bands[c][0] *= 64.0f; |
384 | 255 | } |
385 | 73 | return true; |
386 | 119 | } |
387 | | |
388 | | Status Decode(JxlMemoryManager* memory_manager, BitReader* br, |
389 | | QuantEncoding* encoding, size_t required_size_x, |
390 | | size_t required_size_y, size_t idx, |
391 | 2.51k | ModularFrameDecoder* modular_frame_decoder) { |
392 | 2.51k | size_t required_size = required_size_x * required_size_y; |
393 | 2.51k | required_size_x *= kBlockDim; |
394 | 2.51k | required_size_y *= kBlockDim; |
395 | 2.51k | int mode = br->ReadFixedBits<kLog2NumQuantModes>(); |
396 | 2.51k | switch (mode) { |
397 | 2.16k | case QuantEncoding::kQuantModeLibrary: { |
398 | 2.16k | encoding->predefined = br->ReadFixedBits<kCeilLog2NumPredefinedTables>(); |
399 | 2.16k | if (encoding->predefined >= kNumPredefinedTables) { |
400 | 0 | return JXL_FAILURE("Invalid predefined table"); |
401 | 0 | } |
402 | 2.16k | break; |
403 | 2.16k | } |
404 | 2.16k | case QuantEncoding::kQuantModeID: { |
405 | 48 | if (required_size != 1) return JXL_FAILURE("Invalid mode"); |
406 | 139 | for (size_t c = 0; c < 3; c++) { |
407 | 415 | for (size_t i = 0; i < 3; i++) { |
408 | 321 | JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->idweights[c][i])); |
409 | 312 | if (std::abs(encoding->idweights[c][i]) < kAlmostZero) { |
410 | 11 | return JXL_FAILURE("ID Quantizer is too small"); |
411 | 11 | } |
412 | 301 | encoding->idweights[c][i] *= 64; |
413 | 301 | } |
414 | 114 | } |
415 | 25 | break; |
416 | 45 | } |
417 | 68 | case QuantEncoding::kQuantModeDCT2: { |
418 | 68 | if (required_size != 1) return JXL_FAILURE("Invalid mode"); |
419 | 166 | for (size_t c = 0; c < 3; c++) { |
420 | 855 | for (size_t i = 0; i < 6; i++) { |
421 | 755 | JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->dct2weights[c][i])); |
422 | 735 | if (std::abs(encoding->dct2weights[c][i]) < kAlmostZero) { |
423 | 28 | return JXL_FAILURE("Quantizer is too small"); |
424 | 28 | } |
425 | 707 | encoding->dct2weights[c][i] *= 64; |
426 | 707 | } |
427 | 148 | } |
428 | 18 | break; |
429 | 66 | } |
430 | 44 | case QuantEncoding::kQuantModeDCT4X8: { |
431 | 44 | if (required_size != 1) return JXL_FAILURE("Invalid mode"); |
432 | 142 | for (size_t c = 0; c < 3; c++) { |
433 | 111 | JXL_RETURN_IF_ERROR( |
434 | 111 | F16Coder::Read(br, &encoding->dct4x8multipliers[c])); |
435 | 106 | if (std::abs(encoding->dct4x8multipliers[c]) < kAlmostZero) { |
436 | 5 | return JXL_FAILURE("DCT4X8 multiplier is too small"); |
437 | 5 | } |
438 | 106 | } |
439 | 31 | JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params)); |
440 | 18 | break; |
441 | 31 | } |
442 | 31 | case QuantEncoding::kQuantModeDCT4: { |
443 | 31 | if (required_size != 1) return JXL_FAILURE("Invalid mode"); |
444 | 90 | for (size_t c = 0; c < 3; c++) { |
445 | 203 | for (size_t i = 0; i < 2; i++) { |
446 | 142 | JXL_RETURN_IF_ERROR( |
447 | 142 | F16Coder::Read(br, &encoding->dct4multipliers[c][i])); |
448 | 136 | if (std::abs(encoding->dct4multipliers[c][i]) < kAlmostZero) { |
449 | 7 | return JXL_FAILURE("DCT4 multiplier is too small"); |
450 | 7 | } |
451 | 136 | } |
452 | 74 | } |
453 | 16 | JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params)); |
454 | 13 | break; |
455 | 16 | } |
456 | 47 | case QuantEncoding::kQuantModeAFV: { |
457 | 47 | if (required_size != 1) return JXL_FAILURE("Invalid mode"); |
458 | 129 | for (size_t c = 0; c < 3; c++) { |
459 | 966 | for (size_t i = 0; i < 9; i++) { |
460 | 883 | JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->afv_weights[c][i])); |
461 | 883 | } |
462 | 581 | for (size_t i = 0; i < 6; i++) { |
463 | 498 | encoding->afv_weights[c][i] *= 64; |
464 | 498 | } |
465 | 83 | } |
466 | 18 | JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params)); |
467 | 11 | JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params_afv_4x4)); |
468 | 10 | break; |
469 | 11 | } |
470 | 43 | case QuantEncoding::kQuantModeDCT: { |
471 | 43 | JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params)); |
472 | 21 | break; |
473 | 43 | } |
474 | 68 | case QuantEncoding::kQuantModeRAW: { |
475 | | // Set mode early, to avoid mem-leak. |
476 | 68 | encoding->mode = QuantEncoding::kQuantModeRAW; |
477 | 68 | JXL_RETURN_IF_ERROR(ModularFrameDecoder::DecodeQuantTable( |
478 | 68 | memory_manager, required_size_x, required_size_y, br, encoding, idx, |
479 | 68 | modular_frame_decoder)); |
480 | 10 | break; |
481 | 68 | } |
482 | 10 | default: |
483 | 0 | return JXL_FAILURE("Invalid quantization table encoding"); |
484 | 2.51k | } |
485 | 2.28k | encoding->mode = static_cast<QuantEncoding::Mode>(mode); |
486 | 2.28k | return true; |
487 | 2.51k | } |
488 | | |
489 | | } // namespace |
490 | | |
491 | | #if JXL_CXX_LANG < JXL_CXX_17 |
492 | | constexpr const std::array<int, 17> DequantMatrices::required_size_x; |
493 | | constexpr const std::array<int, 17> DequantMatrices::required_size_y; |
494 | | constexpr const size_t DequantMatrices::kSumRequiredXy; |
495 | | #endif |
496 | | |
497 | | Status DequantMatrices::Decode(JxlMemoryManager* memory_manager, BitReader* br, |
498 | 2.05k | ModularFrameDecoder* modular_frame_decoder) { |
499 | 2.05k | size_t all_default = br->ReadBits(1); |
500 | 2.05k | size_t num_tables = all_default ? 0 : static_cast<size_t>(kNumQuantTables); |
501 | 2.05k | encodings_.clear(); |
502 | 2.05k | encodings_.resize(kNumQuantTables, QuantEncoding::Library<0>()); |
503 | 4.33k | for (size_t i = 0; i < num_tables; i++) { |
504 | 2.51k | JXL_RETURN_IF_ERROR(jxl::Decode(memory_manager, br, &encodings_[i], |
505 | 2.51k | required_size_x[i % kNumQuantTables], |
506 | 2.51k | required_size_y[i % kNumQuantTables], i, |
507 | 2.51k | modular_frame_decoder)); |
508 | 2.51k | } |
509 | 1.81k | computed_mask_ = 0; |
510 | 1.81k | return true; |
511 | 2.05k | } |
512 | | |
513 | 25.7k | Status DequantMatrices::DecodeDC(BitReader* br) { |
514 | 25.7k | bool all_default = static_cast<bool>(br->ReadBits(1)); |
515 | 25.7k | if (!br->AllReadsWithinBounds()) return JXL_FAILURE("EOS during DecodeDC"); |
516 | 25.6k | if (!all_default) { |
517 | 28.1k | for (size_t c = 0; c < 3; c++) { |
518 | 21.0k | JXL_RETURN_IF_ERROR(F16Coder::Read(br, &dc_quant_[c])); |
519 | 21.0k | dc_quant_[c] *= 1.0f / 128.0f; |
520 | | // Negative values and nearly zero are invalid values. |
521 | 21.0k | if (dc_quant_[c] < kAlmostZero) { |
522 | 35 | return JXL_FAILURE("Invalid dc_quant: coefficient is too small."); |
523 | 35 | } |
524 | 21.0k | inv_dc_quant_[c] = 1.0f / dc_quant_[c]; |
525 | 21.0k | } |
526 | 7.04k | } |
527 | 25.5k | return true; |
528 | 25.6k | } |
529 | | |
530 | 0 | constexpr float V(float v) { return static_cast<float>(v); } |
531 | | |
532 | | namespace { |
533 | | struct DequantMatricesLibraryDef { |
534 | | // DCT8 |
535 | 2 | static constexpr QuantEncodingInternal DCT() { |
536 | 2 | return QuantEncodingInternal::DCT(DctQuantWeightParams({{{{ |
537 | 2 | V(3150.0), |
538 | 2 | V(0.0), |
539 | 2 | V(-0.4), |
540 | 2 | V(-0.4), |
541 | 2 | V(-0.4), |
542 | 2 | V(-2.0), |
543 | 2 | }}, |
544 | 2 | {{ |
545 | 2 | V(560.0), |
546 | 2 | V(0.0), |
547 | 2 | V(-0.3), |
548 | 2 | V(-0.3), |
549 | 2 | V(-0.3), |
550 | 2 | V(-0.3), |
551 | 2 | }}, |
552 | 2 | {{ |
553 | 2 | V(512.0), |
554 | 2 | V(-2.0), |
555 | 2 | V(-1.0), |
556 | 2 | V(0.0), |
557 | 2 | V(-1.0), |
558 | 2 | V(-2.0), |
559 | 2 | }}}}, |
560 | 2 | 6)); |
561 | 2 | } |
562 | | |
563 | | // Identity |
564 | 2 | static constexpr QuantEncodingInternal IDENTITY() { |
565 | 2 | return QuantEncodingInternal::Identity({{{{ |
566 | 2 | V(280.0), |
567 | 2 | V(3160.0), |
568 | 2 | V(3160.0), |
569 | 2 | }}, |
570 | 2 | {{ |
571 | 2 | V(60.0), |
572 | 2 | V(864.0), |
573 | 2 | V(864.0), |
574 | 2 | }}, |
575 | 2 | {{ |
576 | 2 | V(18.0), |
577 | 2 | V(200.0), |
578 | 2 | V(200.0), |
579 | 2 | }}}}); |
580 | 2 | } |
581 | | |
582 | | // DCT2 |
583 | 2 | static constexpr QuantEncodingInternal DCT2X2() { |
584 | 2 | return QuantEncodingInternal::DCT2({{{{ |
585 | 2 | V(3840.0), |
586 | 2 | V(2560.0), |
587 | 2 | V(1280.0), |
588 | 2 | V(640.0), |
589 | 2 | V(480.0), |
590 | 2 | V(300.0), |
591 | 2 | }}, |
592 | 2 | {{ |
593 | 2 | V(960.0), |
594 | 2 | V(640.0), |
595 | 2 | V(320.0), |
596 | 2 | V(180.0), |
597 | 2 | V(140.0), |
598 | 2 | V(120.0), |
599 | 2 | }}, |
600 | 2 | {{ |
601 | 2 | V(640.0), |
602 | 2 | V(320.0), |
603 | 2 | V(128.0), |
604 | 2 | V(64.0), |
605 | 2 | V(32.0), |
606 | 2 | V(16.0), |
607 | 2 | }}}}); |
608 | 2 | } |
609 | | |
610 | | // DCT4 (quant_kind 3) |
611 | 4 | static constexpr QuantEncodingInternal DCT4X4() { |
612 | 4 | return QuantEncodingInternal::DCT4(DctQuantWeightParams({{{{ |
613 | 4 | V(2200.0), |
614 | 4 | V(0.0), |
615 | 4 | V(0.0), |
616 | 4 | V(0.0), |
617 | 4 | }}, |
618 | 4 | {{ |
619 | 4 | V(392.0), |
620 | 4 | V(0.0), |
621 | 4 | V(0.0), |
622 | 4 | V(0.0), |
623 | 4 | }}, |
624 | 4 | {{ |
625 | 4 | V(112.0), |
626 | 4 | V(-0.25), |
627 | 4 | V(-0.25), |
628 | 4 | V(-0.5), |
629 | 4 | }}}}, |
630 | 4 | 4), |
631 | | /* kMul */ |
632 | 4 | {{{{ |
633 | 4 | V(1.0), |
634 | 4 | V(1.0), |
635 | 4 | }}, |
636 | 4 | {{ |
637 | 4 | V(1.0), |
638 | 4 | V(1.0), |
639 | 4 | }}, |
640 | 4 | {{ |
641 | 4 | V(1.0), |
642 | 4 | V(1.0), |
643 | 4 | }}}}); |
644 | 4 | } |
645 | | |
646 | | // DCT16 |
647 | 2 | static constexpr QuantEncodingInternal DCT16X16() { |
648 | 2 | return QuantEncodingInternal::DCT( |
649 | 2 | DctQuantWeightParams({{{{ |
650 | 2 | V(8996.8725711814115328), |
651 | 2 | V(-1.3000777393353804), |
652 | 2 | V(-0.49424529824571225), |
653 | 2 | V(-0.439093774457103443), |
654 | 2 | V(-0.6350101832695744), |
655 | 2 | V(-0.90177264050827612), |
656 | 2 | V(-1.6162099239887414), |
657 | 2 | }}, |
658 | 2 | {{ |
659 | 2 | V(3191.48366296844234752), |
660 | 2 | V(-0.67424582104194355), |
661 | 2 | V(-0.80745813428471001), |
662 | 2 | V(-0.44925837484843441), |
663 | 2 | V(-0.35865440981033403), |
664 | 2 | V(-0.31322389111877305), |
665 | 2 | V(-0.37615025315725483), |
666 | 2 | }}, |
667 | 2 | {{ |
668 | 2 | V(1157.50408145487200256), |
669 | 2 | V(-2.0531423165804414), |
670 | 2 | V(-1.4), |
671 | 2 | V(-0.50687130033378396), |
672 | 2 | V(-0.42708730624733904), |
673 | 2 | V(-1.4856834539296244), |
674 | 2 | V(-4.9209142884401604), |
675 | 2 | }}}}, |
676 | 2 | 7)); |
677 | 2 | } |
678 | | |
679 | | // DCT32 |
680 | 2 | static constexpr QuantEncodingInternal DCT32X32() { |
681 | 2 | return QuantEncodingInternal::DCT( |
682 | 2 | DctQuantWeightParams({{{{ |
683 | 2 | V(15718.40830982518931456), |
684 | 2 | V(-1.025), |
685 | 2 | V(-0.98), |
686 | 2 | V(-0.9012), |
687 | 2 | V(-0.4), |
688 | 2 | V(-0.48819395464), |
689 | 2 | V(-0.421064), |
690 | 2 | V(-0.27), |
691 | 2 | }}, |
692 | 2 | {{ |
693 | 2 | V(7305.7636810695983104), |
694 | 2 | V(-0.8041958212306401), |
695 | 2 | V(-0.7633036457487539), |
696 | 2 | V(-0.55660379990111464), |
697 | 2 | V(-0.49785304658857626), |
698 | 2 | V(-0.43699592683512467), |
699 | 2 | V(-0.40180866526242109), |
700 | 2 | V(-0.27321683125358037), |
701 | 2 | }}, |
702 | 2 | {{ |
703 | 2 | V(3803.53173721215041536), |
704 | 2 | V(-3.060733579805728), |
705 | 2 | V(-2.0413270132490346), |
706 | 2 | V(-2.0235650159727417), |
707 | 2 | V(-0.5495389509954993), |
708 | 2 | V(-0.4), |
709 | 2 | V(-0.4), |
710 | 2 | V(-0.3), |
711 | 2 | }}}}, |
712 | 2 | 8)); |
713 | 2 | } |
714 | | |
715 | | // DCT16X8 |
716 | 2 | static constexpr QuantEncodingInternal DCT8X16() { |
717 | 2 | return QuantEncodingInternal::DCT( |
718 | 2 | DctQuantWeightParams({{{{ |
719 | 2 | V(7240.7734393502), |
720 | 2 | V(-0.7), |
721 | 2 | V(-0.7), |
722 | 2 | V(-0.2), |
723 | 2 | V(-0.2), |
724 | 2 | V(-0.2), |
725 | 2 | V(-0.5), |
726 | 2 | }}, |
727 | 2 | {{ |
728 | 2 | V(1448.15468787004), |
729 | 2 | V(-0.5), |
730 | 2 | V(-0.5), |
731 | 2 | V(-0.5), |
732 | 2 | V(-0.2), |
733 | 2 | V(-0.2), |
734 | 2 | V(-0.2), |
735 | 2 | }}, |
736 | 2 | {{ |
737 | 2 | V(506.854140754517), |
738 | 2 | V(-1.4), |
739 | 2 | V(-0.2), |
740 | 2 | V(-0.5), |
741 | 2 | V(-0.5), |
742 | 2 | V(-1.5), |
743 | 2 | V(-3.6), |
744 | 2 | }}}}, |
745 | 2 | 7)); |
746 | 2 | } |
747 | | |
748 | | // DCT32X8 |
749 | 2 | static constexpr QuantEncodingInternal DCT8X32() { |
750 | 2 | return QuantEncodingInternal::DCT( |
751 | 2 | DctQuantWeightParams({{{{ |
752 | 2 | V(16283.2494710648897), |
753 | 2 | V(-1.7812845336559429), |
754 | 2 | V(-1.6309059012653515), |
755 | 2 | V(-1.0382179034313539), |
756 | 2 | V(-0.85), |
757 | 2 | V(-0.7), |
758 | 2 | V(-0.9), |
759 | 2 | V(-1.2360638576849587), |
760 | 2 | }}, |
761 | 2 | {{ |
762 | 2 | V(5089.15750884921511936), |
763 | 2 | V(-0.320049391452786891), |
764 | 2 | V(-0.35362849922161446), |
765 | 2 | V(-0.30340000000000003), |
766 | 2 | V(-0.61), |
767 | 2 | V(-0.5), |
768 | 2 | V(-0.5), |
769 | 2 | V(-0.6), |
770 | 2 | }}, |
771 | 2 | {{ |
772 | 2 | V(3397.77603275308720128), |
773 | 2 | V(-0.321327362693153371), |
774 | 2 | V(-0.34507619223117997), |
775 | 2 | V(-0.70340000000000003), |
776 | 2 | V(-0.9), |
777 | 2 | V(-1.0), |
778 | 2 | V(-1.0), |
779 | 2 | V(-1.1754605576265209), |
780 | 2 | }}}}, |
781 | 2 | 8)); |
782 | 2 | } |
783 | | |
784 | | // DCT32X16 |
785 | 2 | static constexpr QuantEncodingInternal DCT16X32() { |
786 | 2 | return QuantEncodingInternal::DCT( |
787 | 2 | DctQuantWeightParams({{{{ |
788 | 2 | V(13844.97076442300573), |
789 | 2 | V(-0.97113799999999995), |
790 | 2 | V(-0.658), |
791 | 2 | V(-0.42026), |
792 | 2 | V(-0.22712), |
793 | 2 | V(-0.2206), |
794 | 2 | V(-0.226), |
795 | 2 | V(-0.6), |
796 | 2 | }}, |
797 | 2 | {{ |
798 | 2 | V(4798.964084220744293), |
799 | 2 | V(-0.61125308982767057), |
800 | 2 | V(-0.83770786552491361), |
801 | 2 | V(-0.79014862079498627), |
802 | 2 | V(-0.2692727459704829), |
803 | 2 | V(-0.38272769465388551), |
804 | 2 | V(-0.22924222653091453), |
805 | 2 | V(-0.20719098826199578), |
806 | 2 | }}, |
807 | 2 | {{ |
808 | 2 | V(1807.236946760964614), |
809 | 2 | V(-1.2), |
810 | 2 | V(-1.2), |
811 | 2 | V(-0.7), |
812 | 2 | V(-0.7), |
813 | 2 | V(-0.7), |
814 | 2 | V(-0.4), |
815 | 2 | V(-0.5), |
816 | 2 | }}}}, |
817 | 2 | 8)); |
818 | 2 | } |
819 | | |
820 | | // DCT4X8 and 8x4 |
821 | 4 | static constexpr QuantEncodingInternal DCT4X8() { |
822 | 4 | return QuantEncodingInternal::DCT4X8( |
823 | 4 | DctQuantWeightParams({{ |
824 | 4 | {{ |
825 | 4 | V(2198.050556016380522), |
826 | 4 | V(-0.96269623020744692), |
827 | 4 | V(-0.76194253026666783), |
828 | 4 | V(-0.6551140670773547), |
829 | 4 | }}, |
830 | 4 | {{ |
831 | 4 | V(764.3655248643528689), |
832 | 4 | V(-0.92630200888366945), |
833 | 4 | V(-0.9675229603596517), |
834 | 4 | V(-0.27845290869168118), |
835 | 4 | }}, |
836 | 4 | {{ |
837 | 4 | V(527.107573587542228), |
838 | 4 | V(-1.4594385811273854), |
839 | 4 | V(-1.450082094097871593), |
840 | 4 | V(-1.5843722511996204), |
841 | 4 | }}, |
842 | 4 | }}, |
843 | 4 | 4), |
844 | | /* kMuls */ |
845 | 4 | {{ |
846 | 4 | V(1.0), |
847 | 4 | V(1.0), |
848 | 4 | V(1.0), |
849 | 4 | }}); |
850 | 4 | } |
851 | | // AFV |
852 | 2 | static QuantEncodingInternal AFV0() { |
853 | 2 | return QuantEncodingInternal::AFV(DCT4X8().dct_params, DCT4X4().dct_params, |
854 | 2 | {{{{ |
855 | | // 4x4/4x8 DC tendency. |
856 | 2 | V(3072.0), |
857 | 2 | V(3072.0), |
858 | | // AFV corner. |
859 | 2 | V(256.0), |
860 | 2 | V(256.0), |
861 | 2 | V(256.0), |
862 | | // AFV high freqs. |
863 | 2 | V(414.0), |
864 | 2 | V(0.0), |
865 | 2 | V(0.0), |
866 | 2 | V(0.0), |
867 | 2 | }}, |
868 | 2 | {{ |
869 | | // 4x4/4x8 DC tendency. |
870 | 2 | V(1024.0), |
871 | 2 | V(1024.0), |
872 | | // AFV corner. |
873 | 2 | V(50), |
874 | 2 | V(50), |
875 | 2 | V(50), |
876 | | // AFV high freqs. |
877 | 2 | V(58.0), |
878 | 2 | V(0.0), |
879 | 2 | V(0.0), |
880 | 2 | V(0.0), |
881 | 2 | }}, |
882 | 2 | {{ |
883 | | // 4x4/4x8 DC tendency. |
884 | 2 | V(384.0), |
885 | 2 | V(384.0), |
886 | | // AFV corner. |
887 | 2 | V(12.0), |
888 | 2 | V(12.0), |
889 | 2 | V(12.0), |
890 | | // AFV high freqs. |
891 | 2 | V(22.0), |
892 | 2 | V(-0.25), |
893 | 2 | V(-0.25), |
894 | 2 | V(-0.25), |
895 | 2 | }}}}); |
896 | 2 | } |
897 | | |
898 | | // DCT64 |
899 | 2 | static QuantEncodingInternal DCT64X64() { |
900 | 2 | return QuantEncodingInternal::DCT( |
901 | 2 | DctQuantWeightParams({{{{ |
902 | 2 | V(0.9 * 26629.073922049845), |
903 | 2 | V(-1.025), |
904 | 2 | V(-0.78), |
905 | 2 | V(-0.65012), |
906 | 2 | V(-0.19041574084286472), |
907 | 2 | V(-0.20819395464), |
908 | 2 | V(-0.421064), |
909 | 2 | V(-0.32733845535848671), |
910 | 2 | }}, |
911 | 2 | {{ |
912 | 2 | V(0.9 * 9311.3238710010046), |
913 | 2 | V(-0.3041958212306401), |
914 | 2 | V(-0.3633036457487539), |
915 | 2 | V(-0.35660379990111464), |
916 | 2 | V(-0.3443074455424403), |
917 | 2 | V(-0.33699592683512467), |
918 | 2 | V(-0.30180866526242109), |
919 | 2 | V(-0.27321683125358037), |
920 | 2 | }}, |
921 | 2 | {{ |
922 | 2 | V(0.9 * 4992.2486445538634), |
923 | 2 | V(-1.2), |
924 | 2 | V(-1.2), |
925 | 2 | V(-0.8), |
926 | 2 | V(-0.7), |
927 | 2 | V(-0.7), |
928 | 2 | V(-0.4), |
929 | 2 | V(-0.5), |
930 | 2 | }}}}, |
931 | 2 | 8)); |
932 | 2 | } |
933 | | |
934 | | // DCT64X32 |
935 | 2 | static QuantEncodingInternal DCT32X64() { |
936 | 2 | return QuantEncodingInternal::DCT( |
937 | 2 | DctQuantWeightParams({{{{ |
938 | 2 | V(0.65 * 23629.073922049845), |
939 | 2 | V(-1.025), |
940 | 2 | V(-0.78), |
941 | 2 | V(-0.65012), |
942 | 2 | V(-0.19041574084286472), |
943 | 2 | V(-0.20819395464), |
944 | 2 | V(-0.421064), |
945 | 2 | V(-0.32733845535848671), |
946 | 2 | }}, |
947 | 2 | {{ |
948 | 2 | V(0.65 * 8611.3238710010046), |
949 | 2 | V(-0.3041958212306401), |
950 | 2 | V(-0.3633036457487539), |
951 | 2 | V(-0.35660379990111464), |
952 | 2 | V(-0.3443074455424403), |
953 | 2 | V(-0.33699592683512467), |
954 | 2 | V(-0.30180866526242109), |
955 | 2 | V(-0.27321683125358037), |
956 | 2 | }}, |
957 | 2 | {{ |
958 | 2 | V(0.65 * 4492.2486445538634), |
959 | 2 | V(-1.2), |
960 | 2 | V(-1.2), |
961 | 2 | V(-0.8), |
962 | 2 | V(-0.7), |
963 | 2 | V(-0.7), |
964 | 2 | V(-0.4), |
965 | 2 | V(-0.5), |
966 | 2 | }}}}, |
967 | 2 | 8)); |
968 | 2 | } |
969 | | // DCT128X128 |
970 | 2 | static QuantEncodingInternal DCT128X128() { |
971 | 2 | return QuantEncodingInternal::DCT( |
972 | 2 | DctQuantWeightParams({{{{ |
973 | 2 | V(1.8 * 26629.073922049845), |
974 | 2 | V(-1.025), |
975 | 2 | V(-0.78), |
976 | 2 | V(-0.65012), |
977 | 2 | V(-0.19041574084286472), |
978 | 2 | V(-0.20819395464), |
979 | 2 | V(-0.421064), |
980 | 2 | V(-0.32733845535848671), |
981 | 2 | }}, |
982 | 2 | {{ |
983 | 2 | V(1.8 * 9311.3238710010046), |
984 | 2 | V(-0.3041958212306401), |
985 | 2 | V(-0.3633036457487539), |
986 | 2 | V(-0.35660379990111464), |
987 | 2 | V(-0.3443074455424403), |
988 | 2 | V(-0.33699592683512467), |
989 | 2 | V(-0.30180866526242109), |
990 | 2 | V(-0.27321683125358037), |
991 | 2 | }}, |
992 | 2 | {{ |
993 | 2 | V(1.8 * 4992.2486445538634), |
994 | 2 | V(-1.2), |
995 | 2 | V(-1.2), |
996 | 2 | V(-0.8), |
997 | 2 | V(-0.7), |
998 | 2 | V(-0.7), |
999 | 2 | V(-0.4), |
1000 | 2 | V(-0.5), |
1001 | 2 | }}}}, |
1002 | 2 | 8)); |
1003 | 2 | } |
1004 | | |
1005 | | // DCT128X64 |
1006 | 2 | static QuantEncodingInternal DCT64X128() { |
1007 | 2 | return QuantEncodingInternal::DCT( |
1008 | 2 | DctQuantWeightParams({{{{ |
1009 | 2 | V(1.3 * 23629.073922049845), |
1010 | 2 | V(-1.025), |
1011 | 2 | V(-0.78), |
1012 | 2 | V(-0.65012), |
1013 | 2 | V(-0.19041574084286472), |
1014 | 2 | V(-0.20819395464), |
1015 | 2 | V(-0.421064), |
1016 | 2 | V(-0.32733845535848671), |
1017 | 2 | }}, |
1018 | 2 | {{ |
1019 | 2 | V(1.3 * 8611.3238710010046), |
1020 | 2 | V(-0.3041958212306401), |
1021 | 2 | V(-0.3633036457487539), |
1022 | 2 | V(-0.35660379990111464), |
1023 | 2 | V(-0.3443074455424403), |
1024 | 2 | V(-0.33699592683512467), |
1025 | 2 | V(-0.30180866526242109), |
1026 | 2 | V(-0.27321683125358037), |
1027 | 2 | }}, |
1028 | 2 | {{ |
1029 | 2 | V(1.3 * 4492.2486445538634), |
1030 | 2 | V(-1.2), |
1031 | 2 | V(-1.2), |
1032 | 2 | V(-0.8), |
1033 | 2 | V(-0.7), |
1034 | 2 | V(-0.7), |
1035 | 2 | V(-0.4), |
1036 | 2 | V(-0.5), |
1037 | 2 | }}}}, |
1038 | 2 | 8)); |
1039 | 2 | } |
1040 | | // DCT256X256 |
1041 | 2 | static QuantEncodingInternal DCT256X256() { |
1042 | 2 | return QuantEncodingInternal::DCT( |
1043 | 2 | DctQuantWeightParams({{{{ |
1044 | 2 | V(3.6 * 26629.073922049845), |
1045 | 2 | V(-1.025), |
1046 | 2 | V(-0.78), |
1047 | 2 | V(-0.65012), |
1048 | 2 | V(-0.19041574084286472), |
1049 | 2 | V(-0.20819395464), |
1050 | 2 | V(-0.421064), |
1051 | 2 | V(-0.32733845535848671), |
1052 | 2 | }}, |
1053 | 2 | {{ |
1054 | 2 | V(3.6 * 9311.3238710010046), |
1055 | 2 | V(-0.3041958212306401), |
1056 | 2 | V(-0.3633036457487539), |
1057 | 2 | V(-0.35660379990111464), |
1058 | 2 | V(-0.3443074455424403), |
1059 | 2 | V(-0.33699592683512467), |
1060 | 2 | V(-0.30180866526242109), |
1061 | 2 | V(-0.27321683125358037), |
1062 | 2 | }}, |
1063 | 2 | {{ |
1064 | 2 | V(3.6 * 4992.2486445538634), |
1065 | 2 | V(-1.2), |
1066 | 2 | V(-1.2), |
1067 | 2 | V(-0.8), |
1068 | 2 | V(-0.7), |
1069 | 2 | V(-0.7), |
1070 | 2 | V(-0.4), |
1071 | 2 | V(-0.5), |
1072 | 2 | }}}}, |
1073 | 2 | 8)); |
1074 | 2 | } |
1075 | | |
1076 | | // DCT256X128 |
1077 | 2 | static QuantEncodingInternal DCT128X256() { |
1078 | 2 | return QuantEncodingInternal::DCT( |
1079 | 2 | DctQuantWeightParams({{{{ |
1080 | 2 | V(2.6 * 23629.073922049845), |
1081 | 2 | V(-1.025), |
1082 | 2 | V(-0.78), |
1083 | 2 | V(-0.65012), |
1084 | 2 | V(-0.19041574084286472), |
1085 | 2 | V(-0.20819395464), |
1086 | 2 | V(-0.421064), |
1087 | 2 | V(-0.32733845535848671), |
1088 | 2 | }}, |
1089 | 2 | {{ |
1090 | 2 | V(2.6 * 8611.3238710010046), |
1091 | 2 | V(-0.3041958212306401), |
1092 | 2 | V(-0.3633036457487539), |
1093 | 2 | V(-0.35660379990111464), |
1094 | 2 | V(-0.3443074455424403), |
1095 | 2 | V(-0.33699592683512467), |
1096 | 2 | V(-0.30180866526242109), |
1097 | 2 | V(-0.27321683125358037), |
1098 | 2 | }}, |
1099 | 2 | {{ |
1100 | 2 | V(2.6 * 4492.2486445538634), |
1101 | 2 | V(-1.2), |
1102 | 2 | V(-1.2), |
1103 | 2 | V(-0.8), |
1104 | 2 | V(-0.7), |
1105 | 2 | V(-0.7), |
1106 | 2 | V(-0.4), |
1107 | 2 | V(-0.5), |
1108 | 2 | }}}}, |
1109 | 2 | 8)); |
1110 | 2 | } |
1111 | | }; |
1112 | | } // namespace |
1113 | | |
1114 | 2 | DequantMatrices::DequantLibraryInternal DequantMatrices::LibraryInit() { |
1115 | 2 | static_assert(kNumQuantTables == 17, |
1116 | 2 | "Update this function when adding new quantization kinds."); |
1117 | 2 | static_assert(kNumPredefinedTables == 1, |
1118 | 2 | "Update this function when adding new quantization matrices to " |
1119 | 2 | "the library."); |
1120 | | |
1121 | | // The library and the indices need to be kept in sync manually. |
1122 | 2 | static_assert(0 == static_cast<uint8_t>(QuantTable::DCT), |
1123 | 2 | "Update the DequantLibrary array below."); |
1124 | 2 | static_assert(1 == static_cast<uint8_t>(QuantTable::IDENTITY), |
1125 | 2 | "Update the DequantLibrary array below."); |
1126 | 2 | static_assert(2 == static_cast<uint8_t>(QuantTable::DCT2X2), |
1127 | 2 | "Update the DequantLibrary array below."); |
1128 | 2 | static_assert(3 == static_cast<uint8_t>(QuantTable::DCT4X4), |
1129 | 2 | "Update the DequantLibrary array below."); |
1130 | 2 | static_assert(4 == static_cast<uint8_t>(QuantTable::DCT16X16), |
1131 | 2 | "Update the DequantLibrary array below."); |
1132 | 2 | static_assert(5 == static_cast<uint8_t>(QuantTable::DCT32X32), |
1133 | 2 | "Update the DequantLibrary array below."); |
1134 | 2 | static_assert(6 == static_cast<uint8_t>(QuantTable::DCT8X16), |
1135 | 2 | "Update the DequantLibrary array below."); |
1136 | 2 | static_assert(7 == static_cast<uint8_t>(QuantTable::DCT8X32), |
1137 | 2 | "Update the DequantLibrary array below."); |
1138 | 2 | static_assert(8 == static_cast<uint8_t>(QuantTable::DCT16X32), |
1139 | 2 | "Update the DequantLibrary array below."); |
1140 | 2 | static_assert(9 == static_cast<uint8_t>(QuantTable::DCT4X8), |
1141 | 2 | "Update the DequantLibrary array below."); |
1142 | 2 | static_assert(10 == static_cast<uint8_t>(QuantTable::AFV0), |
1143 | 2 | "Update the DequantLibrary array below."); |
1144 | 2 | static_assert(11 == static_cast<uint8_t>(QuantTable::DCT64X64), |
1145 | 2 | "Update the DequantLibrary array below."); |
1146 | 2 | static_assert(12 == static_cast<uint8_t>(QuantTable::DCT32X64), |
1147 | 2 | "Update the DequantLibrary array below."); |
1148 | 2 | static_assert(13 == static_cast<uint8_t>(QuantTable::DCT128X128), |
1149 | 2 | "Update the DequantLibrary array below."); |
1150 | 2 | static_assert(14 == static_cast<uint8_t>(QuantTable::DCT64X128), |
1151 | 2 | "Update the DequantLibrary array below."); |
1152 | 2 | static_assert(15 == static_cast<uint8_t>(QuantTable::DCT256X256), |
1153 | 2 | "Update the DequantLibrary array below."); |
1154 | 2 | static_assert(16 == static_cast<uint8_t>(QuantTable::DCT128X256), |
1155 | 2 | "Update the DequantLibrary array below."); |
1156 | 2 | return DequantMatrices::DequantLibraryInternal{{ |
1157 | 2 | DequantMatricesLibraryDef::DCT(), |
1158 | 2 | DequantMatricesLibraryDef::IDENTITY(), |
1159 | 2 | DequantMatricesLibraryDef::DCT2X2(), |
1160 | 2 | DequantMatricesLibraryDef::DCT4X4(), |
1161 | 2 | DequantMatricesLibraryDef::DCT16X16(), |
1162 | 2 | DequantMatricesLibraryDef::DCT32X32(), |
1163 | 2 | DequantMatricesLibraryDef::DCT8X16(), |
1164 | 2 | DequantMatricesLibraryDef::DCT8X32(), |
1165 | 2 | DequantMatricesLibraryDef::DCT16X32(), |
1166 | 2 | DequantMatricesLibraryDef::DCT4X8(), |
1167 | 2 | DequantMatricesLibraryDef::AFV0(), |
1168 | 2 | DequantMatricesLibraryDef::DCT64X64(), |
1169 | 2 | DequantMatricesLibraryDef::DCT32X64(), |
1170 | | // Same default for large transforms (128+) as for 64x* transforms. |
1171 | 2 | DequantMatricesLibraryDef::DCT128X128(), |
1172 | 2 | DequantMatricesLibraryDef::DCT64X128(), |
1173 | 2 | DequantMatricesLibraryDef::DCT256X256(), |
1174 | 2 | DequantMatricesLibraryDef::DCT128X256(), |
1175 | 2 | }}; |
1176 | 2 | } |
1177 | | |
1178 | 2.00k | const QuantEncoding* DequantMatrices::Library() { |
1179 | 2.00k | static const DequantMatrices::DequantLibraryInternal kDequantLibrary = |
1180 | 2.00k | DequantMatrices::LibraryInit(); |
1181 | | // Downcast the result to a const QuantEncoding* from QuantEncodingInternal* |
1182 | | // since the subclass (QuantEncoding) doesn't add any new members and users |
1183 | | // will need to upcast to QuantEncodingInternal to access the members of that |
1184 | | // class. This allows to have kDequantLibrary as a constexpr value while still |
1185 | | // allowing to create QuantEncoding::RAW() instances that use std::vector in |
1186 | | // C++11. |
1187 | 2.00k | return reinterpret_cast<const QuantEncoding*>(kDequantLibrary.data()); |
1188 | 2.00k | } |
1189 | | |
1190 | 54.7k | DequantMatrices::DequantMatrices() { |
1191 | 54.7k | encodings_.resize(kNumQuantTables, QuantEncoding::Library<0>()); |
1192 | 54.7k | size_t pos = 0; |
1193 | 54.7k | size_t offsets[kNumQuantTables * 3]; |
1194 | 986k | for (size_t i = 0; i < static_cast<size_t>(kNumQuantTables); i++) { |
1195 | 931k | size_t num = required_size_x[i] * required_size_y[i] * kDCTBlockSize; |
1196 | 3.72M | for (size_t c = 0; c < 3; c++) { |
1197 | 2.79M | offsets[3 * i + c] = pos + c * num; |
1198 | 2.79M | } |
1199 | 931k | pos += 3 * num; |
1200 | 931k | } |
1201 | 1.53M | for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) { |
1202 | 5.91M | for (size_t c = 0; c < 3; c++) { |
1203 | 4.43M | table_offsets_[i * 3 + c] = |
1204 | 4.43M | offsets[static_cast<size_t>(kAcStrategyToQuantTableMap[i]) * 3 + c]; |
1205 | 4.43M | } |
1206 | 1.47M | } |
1207 | 54.7k | } |
1208 | | |
1209 | | Status DequantMatrices::EnsureComputed(JxlMemoryManager* memory_manager, |
1210 | 2.00k | uint32_t acs_mask) { |
1211 | 2.00k | const QuantEncoding* library = Library(); |
1212 | | |
1213 | 2.00k | if (!table_storage_) { |
1214 | 2.00k | size_t table_storage_bytes = 2 * kTotalTableSize * sizeof(float); |
1215 | 2.00k | JXL_ASSIGN_OR_RETURN( |
1216 | 2.00k | table_storage_, |
1217 | 2.00k | AlignedMemory::Create(memory_manager, table_storage_bytes)); |
1218 | 2.00k | table_ = table_storage_.address<float>(); |
1219 | 2.00k | inv_table_ = table_ + kTotalTableSize; |
1220 | 2.00k | } |
1221 | | |
1222 | 2.00k | size_t offsets[kNumQuantTables * 3 + 1]; |
1223 | 2.00k | size_t pos = 0; |
1224 | 36.0k | for (size_t i = 0; i < kNumQuantTables; i++) { |
1225 | 34.0k | size_t num = required_size_x[i] * required_size_y[i] * kDCTBlockSize; |
1226 | 136k | for (size_t c = 0; c < 3; c++) { |
1227 | 102k | offsets[3 * i + c] = pos + c * num; |
1228 | 102k | } |
1229 | 34.0k | pos += 3 * num; |
1230 | 34.0k | } |
1231 | 2.00k | offsets[kNumQuantTables * 3] = pos; |
1232 | 2.00k | JXL_ENSURE(pos == kTotalTableSize); |
1233 | | |
1234 | 2.00k | uint32_t kind_mask = 0; |
1235 | 56.0k | for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) { |
1236 | 54.0k | if (acs_mask & (1u << i)) { |
1237 | 5.90k | kind_mask |= 1u << static_cast<uint32_t>(kAcStrategyToQuantTableMap[i]); |
1238 | 5.90k | } |
1239 | 54.0k | } |
1240 | 2.00k | uint32_t computed_kind_mask = 0; |
1241 | 56.0k | for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) { |
1242 | 54.0k | if (computed_mask_ & (1u << i)) { |
1243 | 0 | computed_kind_mask |= |
1244 | 0 | 1u << static_cast<uint32_t>(kAcStrategyToQuantTableMap[i]); |
1245 | 0 | } |
1246 | 54.0k | } |
1247 | 35.5k | for (size_t table = 0; table < kNumQuantTables; table++) { |
1248 | 33.6k | if ((1 << table) & computed_kind_mask) continue; |
1249 | 33.6k | if ((1 << table) & ~kind_mask) continue; |
1250 | 4.41k | size_t offset = offsets[table * 3]; |
1251 | 4.41k | float* mutable_table = table_storage_.address<float>(); |
1252 | 4.41k | if (encodings_[table].mode == QuantEncoding::kQuantModeLibrary) { |
1253 | 4.37k | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ComputeQuantTable)( |
1254 | 4.37k | library[table], mutable_table, mutable_table + kTotalTableSize, table, |
1255 | 4.37k | QuantTable(table), &offset)); |
1256 | 4.37k | } else { |
1257 | 42 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ComputeQuantTable)( |
1258 | 42 | encodings_[table], mutable_table, mutable_table + kTotalTableSize, |
1259 | 42 | table, QuantTable(table), &offset)); |
1260 | 42 | } |
1261 | 4.38k | JXL_ENSURE(offset == offsets[table * 3 + 3]); |
1262 | 4.38k | } |
1263 | 1.97k | computed_mask_ |= acs_mask; |
1264 | | |
1265 | 1.97k | return true; |
1266 | 2.00k | } |
1267 | | |
1268 | | } // namespace jxl |
1269 | | #endif |