/src/libjxl/lib/jxl/quant_weights.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | #include "lib/jxl/quant_weights.h" |
6 | | |
7 | | #include <jxl/memory_manager.h> |
8 | | |
9 | | #include <cmath> |
10 | | #include <cstdio> |
11 | | #include <cstdlib> |
12 | | |
13 | | #include "lib/jxl/base/compiler_specific.h" |
14 | | #include "lib/jxl/base/status.h" |
15 | | #include "lib/jxl/dct_scales.h" |
16 | | #include "lib/jxl/dec_modular.h" |
17 | | #include "lib/jxl/fields.h" |
18 | | #include "lib/jxl/memory_manager_internal.h" |
19 | | |
20 | | #undef HWY_TARGET_INCLUDE |
21 | | #define HWY_TARGET_INCLUDE "lib/jxl/quant_weights.cc" |
22 | | #include <hwy/foreach_target.h> |
23 | | #include <hwy/highway.h> |
24 | | |
25 | | #include "lib/jxl/base/fast_math-inl.h" |
26 | | |
27 | | HWY_BEFORE_NAMESPACE(); |
28 | | namespace jxl { |
29 | | namespace HWY_NAMESPACE { |
30 | | |
31 | | // These templates are not found via ADL. |
32 | | using hwy::HWY_NAMESPACE::Lt; |
33 | | using hwy::HWY_NAMESPACE::MulAdd; |
34 | | using hwy::HWY_NAMESPACE::Sqrt; |
35 | | |
36 | | // kQuantWeights[N * N * c + N * y + x] is the relative weight of the (x, y) |
37 | | // coefficient in component c. Higher weights correspond to finer quantization |
38 | | // intervals and more bits spent in encoding. |
39 | | |
40 | | static constexpr const float kAlmostZero = 1e-8f; |
41 | | |
42 | | void GetQuantWeightsDCT2(const QuantEncoding::DCT2Weights& dct2weights, |
43 | 597 | float* weights) { |
44 | 2.38k | for (size_t c = 0; c < 3; c++) { |
45 | 1.79k | size_t start = c * 64; |
46 | 1.79k | weights[start] = 0xBAD; |
47 | 1.79k | weights[start + 1] = weights[start + 8] = dct2weights[c][0]; |
48 | 1.79k | weights[start + 9] = dct2weights[c][1]; |
49 | 5.37k | for (size_t y = 0; y < 2; y++) { |
50 | 10.7k | for (size_t x = 0; x < 2; x++) { |
51 | 7.16k | weights[start + y * 8 + x + 2] = dct2weights[c][2]; |
52 | 7.16k | weights[start + (y + 2) * 8 + x] = dct2weights[c][2]; |
53 | 7.16k | } |
54 | 3.58k | } |
55 | 5.37k | for (size_t y = 0; y < 2; y++) { |
56 | 10.7k | for (size_t x = 0; x < 2; x++) { |
57 | 7.16k | weights[start + (y + 2) * 8 + x + 2] = dct2weights[c][3]; |
58 | 7.16k | } |
59 | 3.58k | } |
60 | 8.95k | for (size_t y = 0; y < 4; y++) { |
61 | 35.8k | for (size_t x = 0; x < 4; x++) { |
62 | 28.6k | weights[start + y * 8 + x + 4] = dct2weights[c][4]; |
63 | 28.6k | weights[start + (y + 4) * 8 + x] = dct2weights[c][4]; |
64 | 28.6k | } |
65 | 7.16k | } |
66 | 8.95k | for (size_t y = 0; y < 4; y++) { |
67 | 35.8k | for (size_t x = 0; x < 4; x++) { |
68 | 28.6k | weights[start + (y + 4) * 8 + x + 4] = dct2weights[c][5]; |
69 | 28.6k | } |
70 | 7.16k | } |
71 | 1.79k | } |
72 | 597 | } |
73 | | |
74 | | void GetQuantWeightsIdentity(const QuantEncoding::IdWeights& idweights, |
75 | 2.80k | float* weights) { |
76 | 11.2k | for (size_t c = 0; c < 3; c++) { |
77 | 546k | for (int i = 0; i < 64; i++) { |
78 | 538k | weights[64 * c + i] = idweights[c][0]; |
79 | 538k | } |
80 | 8.41k | weights[64 * c + 1] = idweights[c][1]; |
81 | 8.41k | weights[64 * c + 8] = idweights[c][1]; |
82 | 8.41k | weights[64 * c + 9] = idweights[c][2]; |
83 | 8.41k | } |
84 | 2.80k | } |
85 | | |
86 | | StatusOr<float> Interpolate(float pos, float max, const float* array, |
87 | 151k | size_t len) { |
88 | 151k | float scaled_pos = pos * (len - 1) / max; |
89 | 151k | size_t idx = scaled_pos; |
90 | 151k | JXL_ENSURE(idx + 1 < len); |
91 | 151k | float a = array[idx]; |
92 | 151k | float b = array[idx + 1]; |
93 | 151k | return a * FastPowf(b / a, scaled_pos - idx); |
94 | 151k | } |
95 | | |
96 | 368k | float Mult(float v) { |
97 | 368k | if (v > 0.0f) return 1.0f + v; |
98 | 368k | return 1.0f / (1.0f - v); |
99 | 368k | } |
100 | | |
101 | | using DF4 = HWY_CAPPED(float, 4); |
102 | | |
103 | | hwy::HWY_NAMESPACE::Vec<DF4> InterpolateVec( |
104 | 5.27M | hwy::HWY_NAMESPACE::Vec<DF4> scaled_pos, const float* array) { |
105 | 5.27M | HWY_CAPPED(int32_t, 4) di; |
106 | | |
107 | 5.27M | auto idx = ConvertTo(di, scaled_pos); |
108 | | |
109 | 5.27M | auto frac = Sub(scaled_pos, ConvertTo(DF4(), idx)); |
110 | | |
111 | | // TODO(veluca): in theory, this could be done with 8 TableLookupBytes, but |
112 | | // it's probably slower. |
113 | 5.27M | auto a = GatherIndex(DF4(), array, idx); |
114 | 5.27M | auto b = GatherIndex(DF4(), array + 1, idx); |
115 | | |
116 | 5.27M | return Mul(a, FastPowf(DF4(), Div(b, a), frac)); |
117 | 5.27M | } |
118 | | |
119 | | // Computes quant weights for a COLS*ROWS-sized transform, using num_bands |
120 | | // eccentricity bands and num_ebands eccentricity bands. If print_mode is 1, |
121 | | // prints the resulting matrix; if print_mode is 2, prints the matrix in a |
122 | | // format suitable for a 3d plot with gnuplot. |
123 | | Status GetQuantWeights( |
124 | | size_t ROWS, size_t COLS, |
125 | | const DctQuantWeightParams::DistanceBandsArray& distance_bands, |
126 | 23.8k | size_t num_bands, float* out) { |
127 | 95.1k | for (size_t c = 0; c < 3; c++) { |
128 | 71.4k | float bands[DctQuantWeightParams::kMaxDistanceBands] = { |
129 | 71.4k | distance_bands[c][0]}; |
130 | 71.4k | if (bands[0] < kAlmostZero) return JXL_FAILURE("Invalid distance bands"); |
131 | 402k | for (size_t i = 1; i < num_bands; i++) { |
132 | 330k | bands[i] = bands[i - 1] * Mult(distance_bands[c][i]); |
133 | 330k | if (bands[i] < kAlmostZero) return JXL_FAILURE("Invalid distance bands"); |
134 | 330k | } |
135 | 71.3k | float scale = (num_bands - 1) / (kSqrt2 + 1e-6f); |
136 | 71.3k | float rcpcol = scale / (COLS - 1); |
137 | 71.3k | float rcprow = scale / (ROWS - 1); |
138 | 71.3k | JXL_ENSURE(COLS >= Lanes(DF4())); |
139 | 71.3k | HWY_ALIGN float l0123[4] = {0, 1, 2, 3}; |
140 | 543k | for (uint32_t y = 0; y < ROWS; y++) { |
141 | 472k | float dy = y * rcprow; |
142 | 472k | float dy2 = dy * dy; |
143 | 5.75M | for (uint32_t x = 0; x < COLS; x += Lanes(DF4())) { |
144 | 5.28M | auto dx = |
145 | 5.28M | Mul(Add(Set(DF4(), x), Load(DF4(), l0123)), Set(DF4(), rcpcol)); |
146 | 5.28M | auto scaled_distance = Sqrt(MulAdd(dx, dx, Set(DF4(), dy2))); |
147 | 5.28M | auto weight = num_bands == 1 ? Set(DF4(), bands[0]) |
148 | 5.28M | : InterpolateVec(scaled_distance, bands); |
149 | 5.28M | StoreU(weight, DF4(), out + c * COLS * ROWS + y * COLS + x); |
150 | 5.28M | } |
151 | 472k | } |
152 | 71.3k | } |
153 | 23.7k | return true; |
154 | 23.8k | } |
155 | | |
156 | | // TODO(veluca): SIMD-fy. With 256x256, this is actually slow. |
157 | | Status ComputeQuantTable(const QuantEncoding& encoding, |
158 | | float* JXL_RESTRICT table, |
159 | | float* JXL_RESTRICT inv_table, size_t table_num, |
160 | 22.9k | QuantTable kind, size_t* pos) { |
161 | 22.9k | constexpr size_t N = kBlockDim; |
162 | 22.9k | size_t quant_table_idx = static_cast<size_t>(kind); |
163 | 22.9k | size_t wrows = 8 * DequantMatrices::required_size_x[quant_table_idx]; |
164 | 22.9k | size_t wcols = 8 * DequantMatrices::required_size_y[quant_table_idx]; |
165 | 22.9k | size_t num = wrows * wcols; |
166 | | |
167 | 22.9k | std::vector<float> weights(3 * num); |
168 | | |
169 | 22.9k | switch (encoding.mode) { |
170 | 0 | case QuantEncoding::kQuantModeLibrary: { |
171 | | // Library and copy quant encoding should get replaced by the actual |
172 | | // parameters by the caller. |
173 | 0 | JXL_ENSURE(false); |
174 | 0 | break; |
175 | 0 | } |
176 | 2.80k | case QuantEncoding::kQuantModeID: { |
177 | 2.80k | JXL_ENSURE(num == kDCTBlockSize); |
178 | 2.80k | GetQuantWeightsIdentity(encoding.idweights, weights.data()); |
179 | 2.80k | break; |
180 | 2.80k | } |
181 | 597 | case QuantEncoding::kQuantModeDCT2: { |
182 | 597 | JXL_ENSURE(num == kDCTBlockSize); |
183 | 597 | GetQuantWeightsDCT2(encoding.dct2weights, weights.data()); |
184 | 597 | break; |
185 | 597 | } |
186 | 80 | case QuantEncoding::kQuantModeDCT4: { |
187 | 80 | JXL_ENSURE(num == kDCTBlockSize); |
188 | 80 | float weights4x4[3 * 4 * 4]; |
189 | | // Always use 4x4 GetQuantWeights for DCT4 quantization tables. |
190 | 80 | JXL_RETURN_IF_ERROR( |
191 | 80 | GetQuantWeights(4, 4, encoding.dct_params.distance_bands, |
192 | 80 | encoding.dct_params.num_distance_bands, weights4x4)); |
193 | 304 | for (size_t c = 0; c < 3; c++) { |
194 | 2.05k | for (size_t y = 0; y < kBlockDim; y++) { |
195 | 16.4k | for (size_t x = 0; x < kBlockDim; x++) { |
196 | 14.5k | weights[c * num + y * kBlockDim + x] = |
197 | 14.5k | weights4x4[c * 16 + (y / 2) * 4 + (x / 2)]; |
198 | 14.5k | } |
199 | 1.82k | } |
200 | 228 | weights[c * num + 1] /= encoding.dct4multipliers[c][0]; |
201 | 228 | weights[c * num + N] /= encoding.dct4multipliers[c][0]; |
202 | 228 | weights[c * num + N + 1] /= encoding.dct4multipliers[c][1]; |
203 | 228 | } |
204 | 76 | break; |
205 | 80 | } |
206 | 24 | case QuantEncoding::kQuantModeDCT4X8: { |
207 | 24 | JXL_ENSURE(num == kDCTBlockSize); |
208 | 24 | float weights4x8[3 * 4 * 8]; |
209 | | // Always use 4x8 GetQuantWeights for DCT4X8 quantization tables. |
210 | 24 | JXL_RETURN_IF_ERROR( |
211 | 24 | GetQuantWeights(4, 8, encoding.dct_params.distance_bands, |
212 | 24 | encoding.dct_params.num_distance_bands, weights4x8)); |
213 | 92 | for (size_t c = 0; c < 3; c++) { |
214 | 621 | for (size_t y = 0; y < kBlockDim; y++) { |
215 | 4.96k | for (size_t x = 0; x < kBlockDim; x++) { |
216 | 4.41k | weights[c * num + y * kBlockDim + x] = |
217 | 4.41k | weights4x8[c * 32 + (y / 2) * 8 + x]; |
218 | 4.41k | } |
219 | 552 | } |
220 | 69 | weights[c * num + N] /= encoding.dct4x8multipliers[c]; |
221 | 69 | } |
222 | 23 | break; |
223 | 24 | } |
224 | 15.2k | case QuantEncoding::kQuantModeDCT: { |
225 | 15.2k | JXL_RETURN_IF_ERROR(GetQuantWeights( |
226 | 15.2k | wrows, wcols, encoding.dct_params.distance_bands, |
227 | 15.2k | encoding.dct_params.num_distance_bands, weights.data())); |
228 | 15.2k | break; |
229 | 15.2k | } |
230 | 15.2k | case QuantEncoding::kQuantModeRAW: { |
231 | 1 | if (!encoding.qraw.qtable || encoding.qraw.qtable->size() != 3 * num) { |
232 | 0 | return JXL_FAILURE("Invalid table encoding"); |
233 | 0 | } |
234 | 1 | int* qtable = encoding.qraw.qtable->data(); |
235 | 193 | for (size_t i = 0; i < 3 * num; i++) { |
236 | 192 | weights[i] = 1.f / (encoding.qraw.qtable_den * qtable[i]); |
237 | 192 | } |
238 | 1 | break; |
239 | 1 | } |
240 | 4.22k | case QuantEncoding::kQuantModeAFV: { |
241 | 4.22k | constexpr float kFreqs[] = { |
242 | 4.22k | 0xBAD, |
243 | 4.22k | 0xBAD, |
244 | 4.22k | 0.8517778890324296, |
245 | 4.22k | 5.37778436506804, |
246 | 4.22k | 0xBAD, |
247 | 4.22k | 0xBAD, |
248 | 4.22k | 4.734747904497923, |
249 | 4.22k | 5.449245381693219, |
250 | 4.22k | 1.6598270267479331, |
251 | 4.22k | 4, |
252 | 4.22k | 7.275749096817861, |
253 | 4.22k | 10.423227632456525, |
254 | 4.22k | 2.662932286148962, |
255 | 4.22k | 7.630657783650829, |
256 | 4.22k | 8.962388608184032, |
257 | 4.22k | 12.97166202570235, |
258 | 4.22k | }; |
259 | | |
260 | 4.22k | float weights4x8[3 * 4 * 8]; |
261 | 4.22k | JXL_RETURN_IF_ERROR(( |
262 | 4.22k | GetQuantWeights(4, 8, encoding.dct_params.distance_bands, |
263 | 4.22k | encoding.dct_params.num_distance_bands, weights4x8))); |
264 | 4.22k | float weights4x4[3 * 4 * 4]; |
265 | 4.22k | JXL_RETURN_IF_ERROR((GetQuantWeights( |
266 | 4.22k | 4, 4, encoding.dct_params_afv_4x4.distance_bands, |
267 | 4.22k | encoding.dct_params_afv_4x4.num_distance_bands, weights4x4))); |
268 | | |
269 | 4.22k | constexpr float lo = 0.8517778890324296; |
270 | 4.22k | constexpr float hi = 12.97166202570235f - lo + 1e-6f; |
271 | 16.8k | for (size_t c = 0; c < 3; c++) { |
272 | 12.6k | float bands[4]; |
273 | 12.6k | bands[0] = encoding.afv_weights[c][5]; |
274 | 12.6k | if (bands[0] < kAlmostZero) return JXL_FAILURE("Invalid AFV bands"); |
275 | 50.6k | for (size_t i = 1; i < 4; i++) { |
276 | 37.9k | bands[i] = bands[i - 1] * Mult(encoding.afv_weights[c][i + 5]); |
277 | 37.9k | if (bands[i] < kAlmostZero) return JXL_FAILURE("Invalid AFV bands"); |
278 | 37.9k | } |
279 | 12.6k | size_t start = c * 64; |
280 | 215k | auto set_weight = [&start, &weights](size_t x, size_t y, float val) { |
281 | 215k | weights[start + y * 8 + x] = val; |
282 | 215k | }; |
283 | 12.6k | weights[start] = 1; // Not used, but causes MSAN error otherwise. |
284 | | // Weights for (0, 1) and (1, 0). |
285 | 12.6k | set_weight(0, 1, encoding.afv_weights[c][0]); |
286 | 12.6k | set_weight(1, 0, encoding.afv_weights[c][1]); |
287 | | // AFV special weights for 3-pixel corner. |
288 | 12.6k | set_weight(0, 2, encoding.afv_weights[c][2]); |
289 | 12.6k | set_weight(2, 0, encoding.afv_weights[c][3]); |
290 | 12.6k | set_weight(2, 2, encoding.afv_weights[c][4]); |
291 | | |
292 | | // All other AFV weights. |
293 | 63.3k | for (size_t y = 0; y < 4; y++) { |
294 | 253k | for (size_t x = 0; x < 4; x++) { |
295 | 202k | if (x < 2 && y < 2) continue; |
296 | 303k | JXL_ASSIGN_OR_RETURN( |
297 | 303k | float val, Interpolate(kFreqs[y * 4 + x] - lo, hi, bands, 4)); |
298 | 303k | set_weight(2 * x, 2 * y, val); |
299 | 303k | } |
300 | 50.6k | } |
301 | | |
302 | | // Put 4x8 weights in odd rows, except (1, 0). |
303 | 63.3k | for (size_t y = 0; y < kBlockDim / 2; y++) { |
304 | 455k | for (size_t x = 0; x < kBlockDim; x++) { |
305 | 405k | if (x == 0 && y == 0) continue; |
306 | 392k | weights[c * num + (2 * y + 1) * kBlockDim + x] = |
307 | 392k | weights4x8[c * 32 + y * 8 + x]; |
308 | 392k | } |
309 | 50.6k | } |
310 | | // Put 4x4 weights in even rows / odd columns, except (0, 1). |
311 | 63.3k | for (size_t y = 0; y < kBlockDim / 2; y++) { |
312 | 253k | for (size_t x = 0; x < kBlockDim / 2; x++) { |
313 | 202k | if (x == 0 && y == 0) continue; |
314 | 189k | weights[c * num + (2 * y) * kBlockDim + 2 * x + 1] = |
315 | 189k | weights4x4[c * 16 + y * 4 + x]; |
316 | 189k | } |
317 | 50.6k | } |
318 | 12.6k | } |
319 | 4.21k | break; |
320 | 4.22k | } |
321 | 22.9k | } |
322 | 22.9k | size_t prev_pos = *pos; |
323 | 22.9k | HWY_CAPPED(float, 64) d; |
324 | 6.16M | for (size_t i = 0; i < num * 3; i += Lanes(d)) { |
325 | 6.14M | auto inv_val = LoadU(d, weights.data() + i); |
326 | 6.14M | if (JXL_UNLIKELY(!AllFalse(d, Ge(inv_val, Set(d, 1.0f / kAlmostZero))) || |
327 | 6.14M | !AllFalse(d, Lt(inv_val, Set(d, kAlmostZero))))) { |
328 | 19 | return JXL_FAILURE("Invalid quantization table"); |
329 | 19 | } |
330 | 6.14M | auto val = Div(Set(d, 1.0f), inv_val); |
331 | 6.14M | StoreU(val, d, table + *pos + i); |
332 | 6.14M | StoreU(inv_val, d, inv_table + *pos + i); |
333 | 6.14M | } |
334 | 22.9k | (*pos) += 3 * num; |
335 | | |
336 | | // Ensure that the lowest frequencies have a 0 inverse table. |
337 | | // This does not affect en/decoding, but allows AC strategy selection to be |
338 | | // slightly simpler. |
339 | 22.9k | size_t xs = DequantMatrices::required_size_x[quant_table_idx]; |
340 | 22.9k | size_t ys = DequantMatrices::required_size_y[quant_table_idx]; |
341 | 22.9k | CoefficientLayout(&ys, &xs); |
342 | 91.7k | for (size_t c = 0; c < 3; c++) { |
343 | 138k | for (size_t y = 0; y < ys; y++) { |
344 | 165k | for (size_t x = 0; x < xs; x++) { |
345 | 96.0k | inv_table[prev_pos + c * ys * xs * kDCTBlockSize + y * kBlockDim * xs + |
346 | 96.0k | x] = 0; |
347 | 96.0k | } |
348 | 69.2k | } |
349 | 68.8k | } |
350 | 22.9k | return true; |
351 | 22.9k | } |
352 | | |
353 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
354 | | } // namespace HWY_NAMESPACE |
355 | | } // namespace jxl |
356 | | HWY_AFTER_NAMESPACE(); |
357 | | |
358 | | #if HWY_ONCE |
359 | | |
360 | | namespace jxl { |
361 | | namespace { |
362 | | |
363 | | HWY_EXPORT(ComputeQuantTable); |
364 | | |
365 | | constexpr const float kAlmostZero = 1e-8f; |
366 | | |
367 | 177 | Status DecodeDctParams(BitReader* br, DctQuantWeightParams* params) { |
368 | 177 | params->num_distance_bands = |
369 | 177 | br->ReadFixedBits<DctQuantWeightParams::kLog2MaxDistanceBands>() + 1; |
370 | 608 | for (size_t c = 0; c < 3; c++) { |
371 | 2.30k | for (size_t i = 0; i < params->num_distance_bands; i++) { |
372 | 1.83k | JXL_RETURN_IF_ERROR(F16Coder::Read(br, ¶ms->distance_bands[c][i])); |
373 | 1.83k | } |
374 | 472 | if (params->distance_bands[c][0] < kAlmostZero) { |
375 | 41 | return JXL_FAILURE("Distance band seed is too small"); |
376 | 41 | } |
377 | 431 | params->distance_bands[c][0] *= 64.0f; |
378 | 431 | } |
379 | 131 | return true; |
380 | 177 | } |
381 | | |
382 | | Status Decode(JxlMemoryManager* memory_manager, BitReader* br, |
383 | | QuantEncoding* encoding, size_t required_size_x, |
384 | | size_t required_size_y, size_t idx, |
385 | 3.67k | ModularFrameDecoder* modular_frame_decoder) { |
386 | 3.67k | size_t required_size = required_size_x * required_size_y; |
387 | 3.67k | required_size_x *= kBlockDim; |
388 | 3.67k | required_size_y *= kBlockDim; |
389 | 3.67k | int mode = br->ReadFixedBits<kLog2NumQuantModes>(); |
390 | 3.67k | switch (mode) { |
391 | 3.22k | case QuantEncoding::kQuantModeLibrary: { |
392 | 3.22k | encoding->predefined = br->ReadFixedBits<kCeilLog2NumPredefinedTables>(); |
393 | 3.22k | if (encoding->predefined >= kNumPredefinedTables) { |
394 | 0 | return JXL_FAILURE("Invalid predefined table"); |
395 | 0 | } |
396 | 3.22k | break; |
397 | 3.22k | } |
398 | 3.22k | case QuantEncoding::kQuantModeID: { |
399 | 59 | if (required_size != 1) return JXL_FAILURE("Invalid mode"); |
400 | 124 | for (size_t c = 0; c < 3; c++) { |
401 | 362 | for (size_t i = 0; i < 3; i++) { |
402 | 289 | JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->idweights[c][i])); |
403 | 277 | if (std::abs(encoding->idweights[c][i]) < kAlmostZero) { |
404 | 23 | return JXL_FAILURE("ID Quantizer is too small"); |
405 | 23 | } |
406 | 254 | encoding->idweights[c][i] *= 64; |
407 | 254 | } |
408 | 108 | } |
409 | 16 | break; |
410 | 51 | } |
411 | 72 | case QuantEncoding::kQuantModeDCT2: { |
412 | 72 | if (required_size != 1) return JXL_FAILURE("Invalid mode"); |
413 | 156 | for (size_t c = 0; c < 3; c++) { |
414 | 794 | for (size_t i = 0; i < 6; i++) { |
415 | 707 | JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->dct2weights[c][i])); |
416 | 687 | if (std::abs(encoding->dct2weights[c][i]) < kAlmostZero) { |
417 | 33 | return JXL_FAILURE("Quantizer is too small"); |
418 | 33 | } |
419 | 654 | encoding->dct2weights[c][i] *= 64; |
420 | 654 | } |
421 | 140 | } |
422 | 16 | break; |
423 | 69 | } |
424 | 61 | case QuantEncoding::kQuantModeDCT4X8: { |
425 | 61 | if (required_size != 1) return JXL_FAILURE("Invalid mode"); |
426 | 179 | for (size_t c = 0; c < 3; c++) { |
427 | 143 | JXL_RETURN_IF_ERROR( |
428 | 143 | F16Coder::Read(br, &encoding->dct4x8multipliers[c])); |
429 | 140 | if (std::abs(encoding->dct4x8multipliers[c]) < kAlmostZero) { |
430 | 13 | return JXL_FAILURE("DCT4X8 multiplier is too small"); |
431 | 13 | } |
432 | 140 | } |
433 | 36 | JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params)); |
434 | 25 | break; |
435 | 36 | } |
436 | 41 | case QuantEncoding::kQuantModeDCT4: { |
437 | 41 | if (required_size != 1) return JXL_FAILURE("Invalid mode"); |
438 | 116 | for (size_t c = 0; c < 3; c++) { |
439 | 255 | for (size_t i = 0; i < 2; i++) { |
440 | 179 | JXL_RETURN_IF_ERROR( |
441 | 179 | F16Coder::Read(br, &encoding->dct4multipliers[c][i])); |
442 | 173 | if (std::abs(encoding->dct4multipliers[c][i]) < kAlmostZero) { |
443 | 13 | return JXL_FAILURE("DCT4 multiplier is too small"); |
444 | 13 | } |
445 | 173 | } |
446 | 95 | } |
447 | 21 | JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params)); |
448 | 19 | break; |
449 | 21 | } |
450 | 52 | case QuantEncoding::kQuantModeAFV: { |
451 | 52 | if (required_size != 1) return JXL_FAILURE("Invalid mode"); |
452 | 150 | for (size_t c = 0; c < 3; c++) { |
453 | 1.12k | for (size_t i = 0; i < 9; i++) { |
454 | 1.02k | JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->afv_weights[c][i])); |
455 | 1.02k | } |
456 | 693 | for (size_t i = 0; i < 6; i++) { |
457 | 594 | encoding->afv_weights[c][i] *= 64; |
458 | 594 | } |
459 | 99 | } |
460 | 24 | JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params)); |
461 | 19 | JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params_afv_4x4)); |
462 | 17 | break; |
463 | 19 | } |
464 | 77 | case QuantEncoding::kQuantModeDCT: { |
465 | 77 | JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params)); |
466 | 51 | break; |
467 | 77 | } |
468 | 90 | case QuantEncoding::kQuantModeRAW: { |
469 | | // Set mode early, to avoid mem-leak. |
470 | 90 | encoding->mode = QuantEncoding::kQuantModeRAW; |
471 | 90 | JXL_RETURN_IF_ERROR(ModularFrameDecoder::DecodeQuantTable( |
472 | 90 | memory_manager, required_size_x, required_size_y, br, encoding, idx, |
473 | 90 | modular_frame_decoder)); |
474 | 23 | break; |
475 | 90 | } |
476 | 23 | default: |
477 | 0 | return JXL_FAILURE("Invalid quantization table encoding"); |
478 | 3.67k | } |
479 | 3.38k | encoding->mode = static_cast<QuantEncoding::Mode>(mode); |
480 | 3.38k | return true; |
481 | 3.67k | } |
482 | | |
483 | | } // namespace |
484 | | |
485 | | #if JXL_CXX_LANG < JXL_CXX_17 |
486 | | constexpr const std::array<int, 17> DequantMatrices::required_size_x; |
487 | | constexpr const std::array<int, 17> DequantMatrices::required_size_y; |
488 | | constexpr const size_t DequantMatrices::kSumRequiredXy; |
489 | | #endif |
490 | | |
491 | | Status DequantMatrices::Decode(JxlMemoryManager* memory_manager, BitReader* br, |
492 | 10.7k | ModularFrameDecoder* modular_frame_decoder) { |
493 | 10.7k | size_t all_default = br->ReadBits(1); |
494 | 10.7k | size_t num_tables = all_default ? 0 : static_cast<size_t>(kNumQuantTables); |
495 | 10.7k | encodings_.clear(); |
496 | 10.7k | encodings_.resize(kNumQuantTables, QuantEncoding::Library<0>()); |
497 | 14.0k | for (size_t i = 0; i < num_tables; i++) { |
498 | 3.67k | JXL_RETURN_IF_ERROR(jxl::Decode(memory_manager, br, &encodings_[i], |
499 | 3.67k | required_size_x[i % kNumQuantTables], |
500 | 3.67k | required_size_y[i % kNumQuantTables], i, |
501 | 3.67k | modular_frame_decoder)); |
502 | 3.67k | } |
503 | 10.4k | computed_mask_ = 0; |
504 | 10.4k | return true; |
505 | 10.7k | } |
506 | | |
507 | 42.8k | Status DequantMatrices::DecodeDC(BitReader* br) { |
508 | 42.8k | bool all_default = static_cast<bool>(br->ReadBits(1)); |
509 | 42.8k | if (!br->AllReadsWithinBounds()) return JXL_FAILURE("EOS during DecodeDC"); |
510 | 42.0k | if (!all_default) { |
511 | 9.18k | for (size_t c = 0; c < 3; c++) { |
512 | 6.92k | JXL_RETURN_IF_ERROR(F16Coder::Read(br, &dc_quant_[c])); |
513 | 6.91k | dc_quant_[c] *= 1.0f / 128.0f; |
514 | | // Negative values and nearly zero are invalid values. |
515 | 6.91k | if (dc_quant_[c] < kAlmostZero) { |
516 | 74 | return JXL_FAILURE("Invalid dc_quant: coefficient is too small."); |
517 | 74 | } |
518 | 6.83k | inv_dc_quant_[c] = 1.0f / dc_quant_[c]; |
519 | 6.83k | } |
520 | 2.35k | } |
521 | 42.0k | return true; |
522 | 42.0k | } |
523 | | |
524 | 0 | constexpr float V(float v) { return static_cast<float>(v); } |
525 | | |
526 | | namespace { |
527 | | struct DequantMatricesLibraryDef { |
528 | | // DCT8 |
529 | 1 | static constexpr QuantEncodingInternal DCT() { |
530 | 1 | return QuantEncodingInternal::DCT(DctQuantWeightParams({{{{ |
531 | 1 | V(3150.0), |
532 | 1 | V(0.0), |
533 | 1 | V(-0.4), |
534 | 1 | V(-0.4), |
535 | 1 | V(-0.4), |
536 | 1 | V(-2.0), |
537 | 1 | }}, |
538 | 1 | {{ |
539 | 1 | V(560.0), |
540 | 1 | V(0.0), |
541 | 1 | V(-0.3), |
542 | 1 | V(-0.3), |
543 | 1 | V(-0.3), |
544 | 1 | V(-0.3), |
545 | 1 | }}, |
546 | 1 | {{ |
547 | 1 | V(512.0), |
548 | 1 | V(-2.0), |
549 | 1 | V(-1.0), |
550 | 1 | V(0.0), |
551 | 1 | V(-1.0), |
552 | 1 | V(-2.0), |
553 | 1 | }}}}, |
554 | 1 | 6)); |
555 | 1 | } |
556 | | |
557 | | // Identity |
558 | 1 | static constexpr QuantEncodingInternal IDENTITY() { |
559 | 1 | return QuantEncodingInternal::Identity({{{{ |
560 | 1 | V(280.0), |
561 | 1 | V(3160.0), |
562 | 1 | V(3160.0), |
563 | 1 | }}, |
564 | 1 | {{ |
565 | 1 | V(60.0), |
566 | 1 | V(864.0), |
567 | 1 | V(864.0), |
568 | 1 | }}, |
569 | 1 | {{ |
570 | 1 | V(18.0), |
571 | 1 | V(200.0), |
572 | 1 | V(200.0), |
573 | 1 | }}}}); |
574 | 1 | } |
575 | | |
576 | | // DCT2 |
577 | 1 | static constexpr QuantEncodingInternal DCT2X2() { |
578 | 1 | return QuantEncodingInternal::DCT2({{{{ |
579 | 1 | V(3840.0), |
580 | 1 | V(2560.0), |
581 | 1 | V(1280.0), |
582 | 1 | V(640.0), |
583 | 1 | V(480.0), |
584 | 1 | V(300.0), |
585 | 1 | }}, |
586 | 1 | {{ |
587 | 1 | V(960.0), |
588 | 1 | V(640.0), |
589 | 1 | V(320.0), |
590 | 1 | V(180.0), |
591 | 1 | V(140.0), |
592 | 1 | V(120.0), |
593 | 1 | }}, |
594 | 1 | {{ |
595 | 1 | V(640.0), |
596 | 1 | V(320.0), |
597 | 1 | V(128.0), |
598 | 1 | V(64.0), |
599 | 1 | V(32.0), |
600 | 1 | V(16.0), |
601 | 1 | }}}}); |
602 | 1 | } |
603 | | |
604 | | // DCT4 (quant_kind 3) |
605 | 2 | static constexpr QuantEncodingInternal DCT4X4() { |
606 | 2 | return QuantEncodingInternal::DCT4(DctQuantWeightParams({{{{ |
607 | 2 | V(2200.0), |
608 | 2 | V(0.0), |
609 | 2 | V(0.0), |
610 | 2 | V(0.0), |
611 | 2 | }}, |
612 | 2 | {{ |
613 | 2 | V(392.0), |
614 | 2 | V(0.0), |
615 | 2 | V(0.0), |
616 | 2 | V(0.0), |
617 | 2 | }}, |
618 | 2 | {{ |
619 | 2 | V(112.0), |
620 | 2 | V(-0.25), |
621 | 2 | V(-0.25), |
622 | 2 | V(-0.5), |
623 | 2 | }}}}, |
624 | 2 | 4), |
625 | | /* kMul */ |
626 | 2 | {{{{ |
627 | 2 | V(1.0), |
628 | 2 | V(1.0), |
629 | 2 | }}, |
630 | 2 | {{ |
631 | 2 | V(1.0), |
632 | 2 | V(1.0), |
633 | 2 | }}, |
634 | 2 | {{ |
635 | 2 | V(1.0), |
636 | 2 | V(1.0), |
637 | 2 | }}}}); |
638 | 2 | } |
639 | | |
640 | | // DCT16 |
641 | 1 | static constexpr QuantEncodingInternal DCT16X16() { |
642 | 1 | return QuantEncodingInternal::DCT( |
643 | 1 | DctQuantWeightParams({{{{ |
644 | 1 | V(8996.8725711814115328), |
645 | 1 | V(-1.3000777393353804), |
646 | 1 | V(-0.49424529824571225), |
647 | 1 | V(-0.439093774457103443), |
648 | 1 | V(-0.6350101832695744), |
649 | 1 | V(-0.90177264050827612), |
650 | 1 | V(-1.6162099239887414), |
651 | 1 | }}, |
652 | 1 | {{ |
653 | 1 | V(3191.48366296844234752), |
654 | 1 | V(-0.67424582104194355), |
655 | 1 | V(-0.80745813428471001), |
656 | 1 | V(-0.44925837484843441), |
657 | 1 | V(-0.35865440981033403), |
658 | 1 | V(-0.31322389111877305), |
659 | 1 | V(-0.37615025315725483), |
660 | 1 | }}, |
661 | 1 | {{ |
662 | 1 | V(1157.50408145487200256), |
663 | 1 | V(-2.0531423165804414), |
664 | 1 | V(-1.4), |
665 | 1 | V(-0.50687130033378396), |
666 | 1 | V(-0.42708730624733904), |
667 | 1 | V(-1.4856834539296244), |
668 | 1 | V(-4.9209142884401604), |
669 | 1 | }}}}, |
670 | 1 | 7)); |
671 | 1 | } |
672 | | |
673 | | // DCT32 |
674 | 1 | static constexpr QuantEncodingInternal DCT32X32() { |
675 | 1 | return QuantEncodingInternal::DCT( |
676 | 1 | DctQuantWeightParams({{{{ |
677 | 1 | V(15718.40830982518931456), |
678 | 1 | V(-1.025), |
679 | 1 | V(-0.98), |
680 | 1 | V(-0.9012), |
681 | 1 | V(-0.4), |
682 | 1 | V(-0.48819395464), |
683 | 1 | V(-0.421064), |
684 | 1 | V(-0.27), |
685 | 1 | }}, |
686 | 1 | {{ |
687 | 1 | V(7305.7636810695983104), |
688 | 1 | V(-0.8041958212306401), |
689 | 1 | V(-0.7633036457487539), |
690 | 1 | V(-0.55660379990111464), |
691 | 1 | V(-0.49785304658857626), |
692 | 1 | V(-0.43699592683512467), |
693 | 1 | V(-0.40180866526242109), |
694 | 1 | V(-0.27321683125358037), |
695 | 1 | }}, |
696 | 1 | {{ |
697 | 1 | V(3803.53173721215041536), |
698 | 1 | V(-3.060733579805728), |
699 | 1 | V(-2.0413270132490346), |
700 | 1 | V(-2.0235650159727417), |
701 | 1 | V(-0.5495389509954993), |
702 | 1 | V(-0.4), |
703 | 1 | V(-0.4), |
704 | 1 | V(-0.3), |
705 | 1 | }}}}, |
706 | 1 | 8)); |
707 | 1 | } |
708 | | |
709 | | // DCT16X8 |
710 | 1 | static constexpr QuantEncodingInternal DCT8X16() { |
711 | 1 | return QuantEncodingInternal::DCT( |
712 | 1 | DctQuantWeightParams({{{{ |
713 | 1 | V(7240.7734393502), |
714 | 1 | V(-0.7), |
715 | 1 | V(-0.7), |
716 | 1 | V(-0.2), |
717 | 1 | V(-0.2), |
718 | 1 | V(-0.2), |
719 | 1 | V(-0.5), |
720 | 1 | }}, |
721 | 1 | {{ |
722 | 1 | V(1448.15468787004), |
723 | 1 | V(-0.5), |
724 | 1 | V(-0.5), |
725 | 1 | V(-0.5), |
726 | 1 | V(-0.2), |
727 | 1 | V(-0.2), |
728 | 1 | V(-0.2), |
729 | 1 | }}, |
730 | 1 | {{ |
731 | 1 | V(506.854140754517), |
732 | 1 | V(-1.4), |
733 | 1 | V(-0.2), |
734 | 1 | V(-0.5), |
735 | 1 | V(-0.5), |
736 | 1 | V(-1.5), |
737 | 1 | V(-3.6), |
738 | 1 | }}}}, |
739 | 1 | 7)); |
740 | 1 | } |
741 | | |
742 | | // DCT32X8 |
743 | 1 | static constexpr QuantEncodingInternal DCT8X32() { |
744 | 1 | return QuantEncodingInternal::DCT( |
745 | 1 | DctQuantWeightParams({{{{ |
746 | 1 | V(16283.2494710648897), |
747 | 1 | V(-1.7812845336559429), |
748 | 1 | V(-1.6309059012653515), |
749 | 1 | V(-1.0382179034313539), |
750 | 1 | V(-0.85), |
751 | 1 | V(-0.7), |
752 | 1 | V(-0.9), |
753 | 1 | V(-1.2360638576849587), |
754 | 1 | }}, |
755 | 1 | {{ |
756 | 1 | V(5089.15750884921511936), |
757 | 1 | V(-0.320049391452786891), |
758 | 1 | V(-0.35362849922161446), |
759 | 1 | V(-0.30340000000000003), |
760 | 1 | V(-0.61), |
761 | 1 | V(-0.5), |
762 | 1 | V(-0.5), |
763 | 1 | V(-0.6), |
764 | 1 | }}, |
765 | 1 | {{ |
766 | 1 | V(3397.77603275308720128), |
767 | 1 | V(-0.321327362693153371), |
768 | 1 | V(-0.34507619223117997), |
769 | 1 | V(-0.70340000000000003), |
770 | 1 | V(-0.9), |
771 | 1 | V(-1.0), |
772 | 1 | V(-1.0), |
773 | 1 | V(-1.1754605576265209), |
774 | 1 | }}}}, |
775 | 1 | 8)); |
776 | 1 | } |
777 | | |
778 | | // DCT32X16 |
779 | 1 | static constexpr QuantEncodingInternal DCT16X32() { |
780 | 1 | return QuantEncodingInternal::DCT( |
781 | 1 | DctQuantWeightParams({{{{ |
782 | 1 | V(13844.97076442300573), |
783 | 1 | V(-0.97113799999999995), |
784 | 1 | V(-0.658), |
785 | 1 | V(-0.42026), |
786 | 1 | V(-0.22712), |
787 | 1 | V(-0.2206), |
788 | 1 | V(-0.226), |
789 | 1 | V(-0.6), |
790 | 1 | }}, |
791 | 1 | {{ |
792 | 1 | V(4798.964084220744293), |
793 | 1 | V(-0.61125308982767057), |
794 | 1 | V(-0.83770786552491361), |
795 | 1 | V(-0.79014862079498627), |
796 | 1 | V(-0.2692727459704829), |
797 | 1 | V(-0.38272769465388551), |
798 | 1 | V(-0.22924222653091453), |
799 | 1 | V(-0.20719098826199578), |
800 | 1 | }}, |
801 | 1 | {{ |
802 | 1 | V(1807.236946760964614), |
803 | 1 | V(-1.2), |
804 | 1 | V(-1.2), |
805 | 1 | V(-0.7), |
806 | 1 | V(-0.7), |
807 | 1 | V(-0.7), |
808 | 1 | V(-0.4), |
809 | 1 | V(-0.5), |
810 | 1 | }}}}, |
811 | 1 | 8)); |
812 | 1 | } |
813 | | |
814 | | // DCT4X8 and 8x4 |
815 | 2 | static constexpr QuantEncodingInternal DCT4X8() { |
816 | 2 | return QuantEncodingInternal::DCT4X8( |
817 | 2 | DctQuantWeightParams({{ |
818 | 2 | {{ |
819 | 2 | V(2198.050556016380522), |
820 | 2 | V(-0.96269623020744692), |
821 | 2 | V(-0.76194253026666783), |
822 | 2 | V(-0.6551140670773547), |
823 | 2 | }}, |
824 | 2 | {{ |
825 | 2 | V(764.3655248643528689), |
826 | 2 | V(-0.92630200888366945), |
827 | 2 | V(-0.9675229603596517), |
828 | 2 | V(-0.27845290869168118), |
829 | 2 | }}, |
830 | 2 | {{ |
831 | 2 | V(527.107573587542228), |
832 | 2 | V(-1.4594385811273854), |
833 | 2 | V(-1.450082094097871593), |
834 | 2 | V(-1.5843722511996204), |
835 | 2 | }}, |
836 | 2 | }}, |
837 | 2 | 4), |
838 | | /* kMuls */ |
839 | 2 | {{ |
840 | 2 | V(1.0), |
841 | 2 | V(1.0), |
842 | 2 | V(1.0), |
843 | 2 | }}); |
844 | 2 | } |
845 | | // AFV |
846 | 1 | static QuantEncodingInternal AFV0() { |
847 | 1 | return QuantEncodingInternal::AFV(DCT4X8().dct_params, DCT4X4().dct_params, |
848 | 1 | {{{{ |
849 | | // 4x4/4x8 DC tendency. |
850 | 1 | V(3072.0), |
851 | 1 | V(3072.0), |
852 | | // AFV corner. |
853 | 1 | V(256.0), |
854 | 1 | V(256.0), |
855 | 1 | V(256.0), |
856 | | // AFV high freqs. |
857 | 1 | V(414.0), |
858 | 1 | V(0.0), |
859 | 1 | V(0.0), |
860 | 1 | V(0.0), |
861 | 1 | }}, |
862 | 1 | {{ |
863 | | // 4x4/4x8 DC tendency. |
864 | 1 | V(1024.0), |
865 | 1 | V(1024.0), |
866 | | // AFV corner. |
867 | 1 | V(50), |
868 | 1 | V(50), |
869 | 1 | V(50), |
870 | | // AFV high freqs. |
871 | 1 | V(58.0), |
872 | 1 | V(0.0), |
873 | 1 | V(0.0), |
874 | 1 | V(0.0), |
875 | 1 | }}, |
876 | 1 | {{ |
877 | | // 4x4/4x8 DC tendency. |
878 | 1 | V(384.0), |
879 | 1 | V(384.0), |
880 | | // AFV corner. |
881 | 1 | V(12.0), |
882 | 1 | V(12.0), |
883 | 1 | V(12.0), |
884 | | // AFV high freqs. |
885 | 1 | V(22.0), |
886 | 1 | V(-0.25), |
887 | 1 | V(-0.25), |
888 | 1 | V(-0.25), |
889 | 1 | }}}}); |
890 | 1 | } |
891 | | |
892 | | // DCT64 |
893 | 1 | static QuantEncodingInternal DCT64X64() { |
894 | 1 | return QuantEncodingInternal::DCT( |
895 | 1 | DctQuantWeightParams({{{{ |
896 | 1 | V(0.9 * 26629.073922049845), |
897 | 1 | V(-1.025), |
898 | 1 | V(-0.78), |
899 | 1 | V(-0.65012), |
900 | 1 | V(-0.19041574084286472), |
901 | 1 | V(-0.20819395464), |
902 | 1 | V(-0.421064), |
903 | 1 | V(-0.32733845535848671), |
904 | 1 | }}, |
905 | 1 | {{ |
906 | 1 | V(0.9 * 9311.3238710010046), |
907 | 1 | V(-0.3041958212306401), |
908 | 1 | V(-0.3633036457487539), |
909 | 1 | V(-0.35660379990111464), |
910 | 1 | V(-0.3443074455424403), |
911 | 1 | V(-0.33699592683512467), |
912 | 1 | V(-0.30180866526242109), |
913 | 1 | V(-0.27321683125358037), |
914 | 1 | }}, |
915 | 1 | {{ |
916 | 1 | V(0.9 * 4992.2486445538634), |
917 | 1 | V(-1.2), |
918 | 1 | V(-1.2), |
919 | 1 | V(-0.8), |
920 | 1 | V(-0.7), |
921 | 1 | V(-0.7), |
922 | 1 | V(-0.4), |
923 | 1 | V(-0.5), |
924 | 1 | }}}}, |
925 | 1 | 8)); |
926 | 1 | } |
927 | | |
928 | | // DCT64X32 |
929 | 1 | static QuantEncodingInternal DCT32X64() { |
930 | 1 | return QuantEncodingInternal::DCT( |
931 | 1 | DctQuantWeightParams({{{{ |
932 | 1 | V(0.65 * 23629.073922049845), |
933 | 1 | V(-1.025), |
934 | 1 | V(-0.78), |
935 | 1 | V(-0.65012), |
936 | 1 | V(-0.19041574084286472), |
937 | 1 | V(-0.20819395464), |
938 | 1 | V(-0.421064), |
939 | 1 | V(-0.32733845535848671), |
940 | 1 | }}, |
941 | 1 | {{ |
942 | 1 | V(0.65 * 8611.3238710010046), |
943 | 1 | V(-0.3041958212306401), |
944 | 1 | V(-0.3633036457487539), |
945 | 1 | V(-0.35660379990111464), |
946 | 1 | V(-0.3443074455424403), |
947 | 1 | V(-0.33699592683512467), |
948 | 1 | V(-0.30180866526242109), |
949 | 1 | V(-0.27321683125358037), |
950 | 1 | }}, |
951 | 1 | {{ |
952 | 1 | V(0.65 * 4492.2486445538634), |
953 | 1 | V(-1.2), |
954 | 1 | V(-1.2), |
955 | 1 | V(-0.8), |
956 | 1 | V(-0.7), |
957 | 1 | V(-0.7), |
958 | 1 | V(-0.4), |
959 | 1 | V(-0.5), |
960 | 1 | }}}}, |
961 | 1 | 8)); |
962 | 1 | } |
963 | | // DCT128X128 |
964 | 1 | static QuantEncodingInternal DCT128X128() { |
965 | 1 | return QuantEncodingInternal::DCT( |
966 | 1 | DctQuantWeightParams({{{{ |
967 | 1 | V(1.8 * 26629.073922049845), |
968 | 1 | V(-1.025), |
969 | 1 | V(-0.78), |
970 | 1 | V(-0.65012), |
971 | 1 | V(-0.19041574084286472), |
972 | 1 | V(-0.20819395464), |
973 | 1 | V(-0.421064), |
974 | 1 | V(-0.32733845535848671), |
975 | 1 | }}, |
976 | 1 | {{ |
977 | 1 | V(1.8 * 9311.3238710010046), |
978 | 1 | V(-0.3041958212306401), |
979 | 1 | V(-0.3633036457487539), |
980 | 1 | V(-0.35660379990111464), |
981 | 1 | V(-0.3443074455424403), |
982 | 1 | V(-0.33699592683512467), |
983 | 1 | V(-0.30180866526242109), |
984 | 1 | V(-0.27321683125358037), |
985 | 1 | }}, |
986 | 1 | {{ |
987 | 1 | V(1.8 * 4992.2486445538634), |
988 | 1 | V(-1.2), |
989 | 1 | V(-1.2), |
990 | 1 | V(-0.8), |
991 | 1 | V(-0.7), |
992 | 1 | V(-0.7), |
993 | 1 | V(-0.4), |
994 | 1 | V(-0.5), |
995 | 1 | }}}}, |
996 | 1 | 8)); |
997 | 1 | } |
998 | | |
999 | | // DCT128X64 |
1000 | 1 | static QuantEncodingInternal DCT64X128() { |
1001 | 1 | return QuantEncodingInternal::DCT( |
1002 | 1 | DctQuantWeightParams({{{{ |
1003 | 1 | V(1.3 * 23629.073922049845), |
1004 | 1 | V(-1.025), |
1005 | 1 | V(-0.78), |
1006 | 1 | V(-0.65012), |
1007 | 1 | V(-0.19041574084286472), |
1008 | 1 | V(-0.20819395464), |
1009 | 1 | V(-0.421064), |
1010 | 1 | V(-0.32733845535848671), |
1011 | 1 | }}, |
1012 | 1 | {{ |
1013 | 1 | V(1.3 * 8611.3238710010046), |
1014 | 1 | V(-0.3041958212306401), |
1015 | 1 | V(-0.3633036457487539), |
1016 | 1 | V(-0.35660379990111464), |
1017 | 1 | V(-0.3443074455424403), |
1018 | 1 | V(-0.33699592683512467), |
1019 | 1 | V(-0.30180866526242109), |
1020 | 1 | V(-0.27321683125358037), |
1021 | 1 | }}, |
1022 | 1 | {{ |
1023 | 1 | V(1.3 * 4492.2486445538634), |
1024 | 1 | V(-1.2), |
1025 | 1 | V(-1.2), |
1026 | 1 | V(-0.8), |
1027 | 1 | V(-0.7), |
1028 | 1 | V(-0.7), |
1029 | 1 | V(-0.4), |
1030 | 1 | V(-0.5), |
1031 | 1 | }}}}, |
1032 | 1 | 8)); |
1033 | 1 | } |
1034 | | // DCT256X256 |
1035 | 1 | static QuantEncodingInternal DCT256X256() { |
1036 | 1 | return QuantEncodingInternal::DCT( |
1037 | 1 | DctQuantWeightParams({{{{ |
1038 | 1 | V(3.6 * 26629.073922049845), |
1039 | 1 | V(-1.025), |
1040 | 1 | V(-0.78), |
1041 | 1 | V(-0.65012), |
1042 | 1 | V(-0.19041574084286472), |
1043 | 1 | V(-0.20819395464), |
1044 | 1 | V(-0.421064), |
1045 | 1 | V(-0.32733845535848671), |
1046 | 1 | }}, |
1047 | 1 | {{ |
1048 | 1 | V(3.6 * 9311.3238710010046), |
1049 | 1 | V(-0.3041958212306401), |
1050 | 1 | V(-0.3633036457487539), |
1051 | 1 | V(-0.35660379990111464), |
1052 | 1 | V(-0.3443074455424403), |
1053 | 1 | V(-0.33699592683512467), |
1054 | 1 | V(-0.30180866526242109), |
1055 | 1 | V(-0.27321683125358037), |
1056 | 1 | }}, |
1057 | 1 | {{ |
1058 | 1 | V(3.6 * 4992.2486445538634), |
1059 | 1 | V(-1.2), |
1060 | 1 | V(-1.2), |
1061 | 1 | V(-0.8), |
1062 | 1 | V(-0.7), |
1063 | 1 | V(-0.7), |
1064 | 1 | V(-0.4), |
1065 | 1 | V(-0.5), |
1066 | 1 | }}}}, |
1067 | 1 | 8)); |
1068 | 1 | } |
1069 | | |
1070 | | // DCT256X128 |
1071 | 1 | static QuantEncodingInternal DCT128X256() { |
1072 | 1 | return QuantEncodingInternal::DCT( |
1073 | 1 | DctQuantWeightParams({{{{ |
1074 | 1 | V(2.6 * 23629.073922049845), |
1075 | 1 | V(-1.025), |
1076 | 1 | V(-0.78), |
1077 | 1 | V(-0.65012), |
1078 | 1 | V(-0.19041574084286472), |
1079 | 1 | V(-0.20819395464), |
1080 | 1 | V(-0.421064), |
1081 | 1 | V(-0.32733845535848671), |
1082 | 1 | }}, |
1083 | 1 | {{ |
1084 | 1 | V(2.6 * 8611.3238710010046), |
1085 | 1 | V(-0.3041958212306401), |
1086 | 1 | V(-0.3633036457487539), |
1087 | 1 | V(-0.35660379990111464), |
1088 | 1 | V(-0.3443074455424403), |
1089 | 1 | V(-0.33699592683512467), |
1090 | 1 | V(-0.30180866526242109), |
1091 | 1 | V(-0.27321683125358037), |
1092 | 1 | }}, |
1093 | 1 | {{ |
1094 | 1 | V(2.6 * 4492.2486445538634), |
1095 | 1 | V(-1.2), |
1096 | 1 | V(-1.2), |
1097 | 1 | V(-0.8), |
1098 | 1 | V(-0.7), |
1099 | 1 | V(-0.7), |
1100 | 1 | V(-0.4), |
1101 | 1 | V(-0.5), |
1102 | 1 | }}}}, |
1103 | 1 | 8)); |
1104 | 1 | } |
1105 | | }; |
1106 | | } // namespace |
1107 | | |
1108 | 1 | DequantMatrices::DequantLibraryInternal DequantMatrices::LibraryInit() { |
1109 | 1 | static_assert(kNumQuantTables == 17, |
1110 | 1 | "Update this function when adding new quantization kinds."); |
1111 | 1 | static_assert(kNumPredefinedTables == 1, |
1112 | 1 | "Update this function when adding new quantization matrices to " |
1113 | 1 | "the library."); |
1114 | | |
1115 | | // The library and the indices need to be kept in sync manually. |
1116 | 1 | static_assert(0 == static_cast<uint8_t>(QuantTable::DCT), |
1117 | 1 | "Update the DequantLibrary array below."); |
1118 | 1 | static_assert(1 == static_cast<uint8_t>(QuantTable::IDENTITY), |
1119 | 1 | "Update the DequantLibrary array below."); |
1120 | 1 | static_assert(2 == static_cast<uint8_t>(QuantTable::DCT2X2), |
1121 | 1 | "Update the DequantLibrary array below."); |
1122 | 1 | static_assert(3 == static_cast<uint8_t>(QuantTable::DCT4X4), |
1123 | 1 | "Update the DequantLibrary array below."); |
1124 | 1 | static_assert(4 == static_cast<uint8_t>(QuantTable::DCT16X16), |
1125 | 1 | "Update the DequantLibrary array below."); |
1126 | 1 | static_assert(5 == static_cast<uint8_t>(QuantTable::DCT32X32), |
1127 | 1 | "Update the DequantLibrary array below."); |
1128 | 1 | static_assert(6 == static_cast<uint8_t>(QuantTable::DCT8X16), |
1129 | 1 | "Update the DequantLibrary array below."); |
1130 | 1 | static_assert(7 == static_cast<uint8_t>(QuantTable::DCT8X32), |
1131 | 1 | "Update the DequantLibrary array below."); |
1132 | 1 | static_assert(8 == static_cast<uint8_t>(QuantTable::DCT16X32), |
1133 | 1 | "Update the DequantLibrary array below."); |
1134 | 1 | static_assert(9 == static_cast<uint8_t>(QuantTable::DCT4X8), |
1135 | 1 | "Update the DequantLibrary array below."); |
1136 | 1 | static_assert(10 == static_cast<uint8_t>(QuantTable::AFV0), |
1137 | 1 | "Update the DequantLibrary array below."); |
1138 | 1 | static_assert(11 == static_cast<uint8_t>(QuantTable::DCT64X64), |
1139 | 1 | "Update the DequantLibrary array below."); |
1140 | 1 | static_assert(12 == static_cast<uint8_t>(QuantTable::DCT32X64), |
1141 | 1 | "Update the DequantLibrary array below."); |
1142 | 1 | static_assert(13 == static_cast<uint8_t>(QuantTable::DCT128X128), |
1143 | 1 | "Update the DequantLibrary array below."); |
1144 | 1 | static_assert(14 == static_cast<uint8_t>(QuantTable::DCT64X128), |
1145 | 1 | "Update the DequantLibrary array below."); |
1146 | 1 | static_assert(15 == static_cast<uint8_t>(QuantTable::DCT256X256), |
1147 | 1 | "Update the DequantLibrary array below."); |
1148 | 1 | static_assert(16 == static_cast<uint8_t>(QuantTable::DCT128X256), |
1149 | 1 | "Update the DequantLibrary array below."); |
1150 | 1 | return DequantMatrices::DequantLibraryInternal{{ |
1151 | 1 | DequantMatricesLibraryDef::DCT(), |
1152 | 1 | DequantMatricesLibraryDef::IDENTITY(), |
1153 | 1 | DequantMatricesLibraryDef::DCT2X2(), |
1154 | 1 | DequantMatricesLibraryDef::DCT4X4(), |
1155 | 1 | DequantMatricesLibraryDef::DCT16X16(), |
1156 | 1 | DequantMatricesLibraryDef::DCT32X32(), |
1157 | 1 | DequantMatricesLibraryDef::DCT8X16(), |
1158 | 1 | DequantMatricesLibraryDef::DCT8X32(), |
1159 | 1 | DequantMatricesLibraryDef::DCT16X32(), |
1160 | 1 | DequantMatricesLibraryDef::DCT4X8(), |
1161 | 1 | DequantMatricesLibraryDef::AFV0(), |
1162 | 1 | DequantMatricesLibraryDef::DCT64X64(), |
1163 | 1 | DequantMatricesLibraryDef::DCT32X64(), |
1164 | | // Same default for large transforms (128+) as for 64x* transforms. |
1165 | 1 | DequantMatricesLibraryDef::DCT128X128(), |
1166 | 1 | DequantMatricesLibraryDef::DCT64X128(), |
1167 | 1 | DequantMatricesLibraryDef::DCT256X256(), |
1168 | 1 | DequantMatricesLibraryDef::DCT128X256(), |
1169 | 1 | }}; |
1170 | 1 | } |
1171 | | |
1172 | 10.4k | const QuantEncoding* DequantMatrices::Library() { |
1173 | 10.4k | static const DequantMatrices::DequantLibraryInternal kDequantLibrary = |
1174 | 10.4k | DequantMatrices::LibraryInit(); |
1175 | | // Downcast the result to a const QuantEncoding* from QuantEncodingInternal* |
1176 | | // since the subclass (QuantEncoding) doesn't add any new members and users |
1177 | | // will need to upcast to QuantEncodingInternal to access the members of that |
1178 | | // class. This allows to have kDequantLibrary as a constexpr value while still |
1179 | | // allowing to create QuantEncoding::RAW() instances that use std::vector in |
1180 | | // C++11. |
1181 | 10.4k | return reinterpret_cast<const QuantEncoding*>(kDequantLibrary.data()); |
1182 | 10.4k | } |
1183 | | |
1184 | 142k | DequantMatrices::DequantMatrices() { |
1185 | 142k | encodings_.resize(kNumQuantTables, QuantEncoding::Library<0>()); |
1186 | 142k | size_t pos = 0; |
1187 | 142k | size_t offsets[kNumQuantTables * 3]; |
1188 | 2.57M | for (size_t i = 0; i < static_cast<size_t>(kNumQuantTables); i++) { |
1189 | 2.42M | size_t num = required_size_x[i] * required_size_y[i] * kDCTBlockSize; |
1190 | 9.71M | for (size_t c = 0; c < 3; c++) { |
1191 | 7.28M | offsets[3 * i + c] = pos + c * num; |
1192 | 7.28M | } |
1193 | 2.42M | pos += 3 * num; |
1194 | 2.42M | } |
1195 | 4.00M | for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) { |
1196 | 15.4M | for (size_t c = 0; c < 3; c++) { |
1197 | 11.5M | table_offsets_[i * 3 + c] = |
1198 | 11.5M | offsets[static_cast<size_t>(kAcStrategyToQuantTableMap[i]) * 3 + c]; |
1199 | 11.5M | } |
1200 | 3.85M | } |
1201 | 142k | } |
1202 | | |
1203 | | Status DequantMatrices::EnsureComputed(JxlMemoryManager* memory_manager, |
1204 | 10.4k | uint32_t acs_mask) { |
1205 | 10.4k | const QuantEncoding* library = Library(); |
1206 | | |
1207 | 10.4k | if (!table_storage_) { |
1208 | 10.4k | size_t table_storage_bytes = 2 * kTotalTableSize * sizeof(float); |
1209 | 10.4k | JXL_ASSIGN_OR_RETURN( |
1210 | 10.4k | table_storage_, |
1211 | 10.4k | AlignedMemory::Create(memory_manager, table_storage_bytes)); |
1212 | 10.4k | table_ = table_storage_.address<float>(); |
1213 | 10.4k | inv_table_ = table_ + kTotalTableSize; |
1214 | 10.4k | } |
1215 | | |
1216 | 10.4k | size_t offsets[kNumQuantTables * 3 + 1]; |
1217 | 10.4k | size_t pos = 0; |
1218 | 187k | for (size_t i = 0; i < kNumQuantTables; i++) { |
1219 | 177k | size_t num = required_size_x[i] * required_size_y[i] * kDCTBlockSize; |
1220 | 708k | for (size_t c = 0; c < 3; c++) { |
1221 | 531k | offsets[3 * i + c] = pos + c * num; |
1222 | 531k | } |
1223 | 177k | pos += 3 * num; |
1224 | 177k | } |
1225 | 10.4k | offsets[kNumQuantTables * 3] = pos; |
1226 | 10.4k | JXL_ENSURE(pos == kTotalTableSize); |
1227 | | |
1228 | 10.4k | uint32_t kind_mask = 0; |
1229 | 291k | for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) { |
1230 | 281k | if (acs_mask & (1u << i)) { |
1231 | 22.9k | kind_mask |= 1u << static_cast<uint32_t>(kAcStrategyToQuantTableMap[i]); |
1232 | 22.9k | } |
1233 | 281k | } |
1234 | 10.4k | uint32_t computed_kind_mask = 0; |
1235 | 291k | for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) { |
1236 | 281k | if (computed_mask_ & (1u << i)) { |
1237 | 0 | computed_kind_mask |= |
1238 | 0 | 1u << static_cast<uint32_t>(kAcStrategyToQuantTableMap[i]); |
1239 | 0 | } |
1240 | 281k | } |
1241 | 186k | for (size_t table = 0; table < kNumQuantTables; table++) { |
1242 | 176k | if ((1 << table) & computed_kind_mask) continue; |
1243 | 176k | if ((1 << table) & ~kind_mask) continue; |
1244 | 22.9k | size_t pos = offsets[table * 3]; |
1245 | 22.9k | float* mutable_table = table_storage_.address<float>(); |
1246 | 22.9k | if (encodings_[table].mode == QuantEncoding::kQuantModeLibrary) { |
1247 | 22.9k | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ComputeQuantTable)( |
1248 | 22.9k | library[table], mutable_table, mutable_table + kTotalTableSize, table, |
1249 | 22.9k | QuantTable(table), &pos)); |
1250 | 22.9k | } else { |
1251 | 59 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ComputeQuantTable)( |
1252 | 59 | encodings_[table], mutable_table, mutable_table + kTotalTableSize, |
1253 | 59 | table, QuantTable(table), &pos)); |
1254 | 59 | } |
1255 | 22.9k | JXL_ENSURE(pos == offsets[table * 3 + 3]); |
1256 | 22.9k | } |
1257 | 10.3k | computed_mask_ |= acs_mask; |
1258 | | |
1259 | 10.3k | return true; |
1260 | 10.4k | } |
1261 | | |
1262 | | } // namespace jxl |
1263 | | #endif |