/src/libjxl/lib/jxl/enc_group.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/enc_group.h" |
7 | | |
8 | | #include <jxl/memory_manager.h> |
9 | | |
10 | | #include "lib/jxl/base/status.h" |
11 | | #include "lib/jxl/memory_manager_internal.h" |
12 | | |
13 | | #undef HWY_TARGET_INCLUDE |
14 | | #define HWY_TARGET_INCLUDE "lib/jxl/enc_group.cc" |
15 | | #include <hwy/foreach_target.h> |
16 | | #include <hwy/highway.h> |
17 | | |
18 | | #include "lib/jxl/ac_strategy.h" |
19 | | #include "lib/jxl/base/bits.h" |
20 | | #include "lib/jxl/base/compiler_specific.h" |
21 | | #include "lib/jxl/base/rect.h" |
22 | | #include "lib/jxl/common.h" // kMaxNumPasses |
23 | | #include "lib/jxl/dct_util.h" |
24 | | #include "lib/jxl/dec_transforms-inl.h" |
25 | | #include "lib/jxl/enc_aux_out.h" |
26 | | #include "lib/jxl/enc_cache.h" |
27 | | #include "lib/jxl/enc_params.h" |
28 | | #include "lib/jxl/enc_transforms-inl.h" |
29 | | #include "lib/jxl/image.h" |
30 | | #include "lib/jxl/quantizer-inl.h" |
31 | | #include "lib/jxl/quantizer.h" |
32 | | #include "lib/jxl/simd_util.h" |
33 | | HWY_BEFORE_NAMESPACE(); |
34 | | namespace jxl { |
35 | | namespace HWY_NAMESPACE { |
36 | | |
37 | | // These templates are not found via ADL. |
38 | | using hwy::HWY_NAMESPACE::Abs; |
39 | | using hwy::HWY_NAMESPACE::Ge; |
40 | | using hwy::HWY_NAMESPACE::IfThenElse; |
41 | | using hwy::HWY_NAMESPACE::IfThenElseZero; |
42 | | using hwy::HWY_NAMESPACE::MaskFromVec; |
43 | | using hwy::HWY_NAMESPACE::Round; |
44 | | |
45 | | // NOTE: caller takes care of extracting quant from rect of RawQuantField. |
46 | | void QuantizeBlockAC(const Quantizer& quantizer, const bool error_diffusion, |
47 | | size_t c, float qm_multiplier, AcStrategyType quant_kind, |
48 | | size_t xsize, size_t ysize, float* thresholds, |
49 | | const float* JXL_RESTRICT block_in, const int32_t* quant, |
50 | 0 | int32_t* JXL_RESTRICT block_out) { |
51 | 0 | const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c); |
52 | 0 | float qac = quantizer.Scale() * (*quant); |
53 | | // Not SIMD-ified for now. |
54 | 0 | if (c != 1 && xsize * ysize >= 4) { |
55 | 0 | for (int i = 0; i < 4; ++i) { |
56 | 0 | thresholds[i] -= 0.00744f * xsize * ysize; |
57 | 0 | if (thresholds[i] < 0.5) { |
58 | 0 | thresholds[i] = 0.5; |
59 | 0 | } |
60 | 0 | } |
61 | 0 | } |
62 | 0 | HWY_CAPPED(float, kBlockDim) df; |
63 | 0 | HWY_CAPPED(int32_t, kBlockDim) di; |
64 | 0 | HWY_CAPPED(uint32_t, kBlockDim) du; |
65 | 0 | const auto quantv = Set(df, qac * qm_multiplier); |
66 | 0 | for (size_t y = 0; y < ysize * kBlockDim; y++) { |
67 | 0 | size_t yfix = static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2; |
68 | 0 | const size_t off = y * kBlockDim * xsize; |
69 | 0 | for (size_t x = 0; x < xsize * kBlockDim; x += Lanes(df)) { |
70 | 0 | auto threshold = Zero(df); |
71 | 0 | if (xsize == 1) { |
72 | 0 | HWY_ALIGN uint32_t kMask[kBlockDim] = {0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u}; |
73 | 0 | const auto mask = MaskFromVec(BitCast(df, Load(du, kMask + x))); |
74 | 0 | threshold = IfThenElse(mask, Set(df, thresholds[yfix + 1]), |
75 | 0 | Set(df, thresholds[yfix])); |
76 | 0 | } else { |
77 | | // Same for all lanes in the vector. |
78 | 0 | threshold = Set( |
79 | 0 | df, |
80 | 0 | thresholds[yfix + static_cast<size_t>(x >= xsize * kBlockDim / 2)]); |
81 | 0 | } |
82 | 0 | const auto q = Mul(Load(df, qm + off + x), quantv); |
83 | 0 | const auto in = Load(df, block_in + off + x); |
84 | 0 | const auto val = Mul(q, in); |
85 | 0 | const auto nzero_mask = Ge(Abs(val), threshold); |
86 | 0 | const auto v = ConvertTo(di, IfThenElseZero(nzero_mask, Round(val))); |
87 | 0 | Store(v, di, block_out + off + x); |
88 | 0 | } |
89 | 0 | } |
90 | 0 | } |
91 | | |
92 | | void AdjustQuantBlockAC(const Quantizer& quantizer, size_t c, |
93 | | float qm_multiplier, AcStrategyType quant_kind, |
94 | | size_t xsize, size_t ysize, float* thresholds, |
95 | 0 | const float* JXL_RESTRICT block_in, int32_t* quant) { |
96 | | // No quantization adjusting for these small blocks. |
97 | | // Quantization adjusting attempts to fix some known issues |
98 | | // with larger blocks and on the 8x8 dct's emerging 8x8 blockiness |
99 | | // when there are not many non-zeros. |
100 | 0 | constexpr size_t kPartialBlockKinds = |
101 | 0 | (1 << static_cast<size_t>(AcStrategyType::IDENTITY)) | |
102 | 0 | (1 << static_cast<size_t>(AcStrategyType::DCT2X2)) | |
103 | 0 | (1 << static_cast<size_t>(AcStrategyType::DCT4X4)) | |
104 | 0 | (1 << static_cast<size_t>(AcStrategyType::DCT4X8)) | |
105 | 0 | (1 << static_cast<size_t>(AcStrategyType::DCT8X4)) | |
106 | 0 | (1 << static_cast<size_t>(AcStrategyType::AFV0)) | |
107 | 0 | (1 << static_cast<size_t>(AcStrategyType::AFV1)) | |
108 | 0 | (1 << static_cast<size_t>(AcStrategyType::AFV2)) | |
109 | 0 | (1 << static_cast<size_t>(AcStrategyType::AFV3)); |
110 | 0 | if ((1 << static_cast<size_t>(quant_kind)) & kPartialBlockKinds) { |
111 | 0 | return; |
112 | 0 | } |
113 | | |
114 | 0 | const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c); |
115 | 0 | float qac = quantizer.Scale() * (*quant); |
116 | 0 | if (xsize > 1 || ysize > 1) { |
117 | 0 | for (int i = 0; i < 4; ++i) { |
118 | 0 | thresholds[i] -= Clamp1(0.003f * xsize * ysize, 0.f, 0.08f); |
119 | 0 | if (thresholds[i] < 0.54) { |
120 | 0 | thresholds[i] = 0.54; |
121 | 0 | } |
122 | 0 | } |
123 | 0 | } |
124 | 0 | float sum_of_highest_freq_row_and_column = 0; |
125 | 0 | float sum_of_error = 0; |
126 | 0 | float sum_of_vals = 0; |
127 | 0 | float hfNonZeros[4] = {}; |
128 | 0 | float hfMaxError[4] = {}; |
129 | |
|
130 | 0 | for (size_t y = 0; y < ysize * kBlockDim; y++) { |
131 | 0 | for (size_t x = 0; x < xsize * kBlockDim; x++) { |
132 | 0 | const size_t pos = y * kBlockDim * xsize + x; |
133 | 0 | if (x < xsize && y < ysize) { |
134 | 0 | continue; |
135 | 0 | } |
136 | 0 | const size_t hfix = (static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2 + |
137 | 0 | static_cast<size_t>(x >= xsize * kBlockDim / 2)); |
138 | 0 | const float val = block_in[pos] * (qm[pos] * qac * qm_multiplier); |
139 | 0 | const float v = (std::abs(val) < thresholds[hfix]) ? 0 : rintf(val); |
140 | 0 | const float error = std::abs(val - v); |
141 | 0 | sum_of_error += error; |
142 | 0 | sum_of_vals += std::abs(v); |
143 | 0 | if (c == 1 && v == 0) { |
144 | 0 | if (hfMaxError[hfix] < error) { |
145 | 0 | hfMaxError[hfix] = error; |
146 | 0 | } |
147 | 0 | } |
148 | 0 | if (v != 0.0f) { |
149 | 0 | hfNonZeros[hfix] += std::abs(v); |
150 | 0 | bool in_corner = y >= 7 * ysize && x >= 7 * xsize; |
151 | 0 | bool on_border = |
152 | 0 | y == ysize * kBlockDim - 1 || x == xsize * kBlockDim - 1; |
153 | 0 | bool in_larger_corner = x >= 4 * xsize && y >= 4 * ysize; |
154 | 0 | if (in_corner || (on_border && in_larger_corner)) { |
155 | 0 | sum_of_highest_freq_row_and_column += std::abs(val); |
156 | 0 | } |
157 | 0 | } |
158 | 0 | } |
159 | 0 | } |
160 | 0 | if (c == 1 && sum_of_vals * 8 < xsize * ysize) { |
161 | 0 | static const double kLimit[4] = { |
162 | 0 | 0.46, |
163 | 0 | 0.46, |
164 | 0 | 0.46, |
165 | 0 | 0.46, |
166 | 0 | }; |
167 | 0 | static const double kMul[4] = { |
168 | 0 | 0.9999, |
169 | 0 | 0.9999, |
170 | 0 | 0.9999, |
171 | 0 | 0.9999, |
172 | 0 | }; |
173 | 0 | const int32_t orig_quant = *quant; |
174 | 0 | int32_t new_quant = *quant; |
175 | 0 | for (int i = 1; i < 4; ++i) { |
176 | 0 | if (hfNonZeros[i] == 0.0 && hfMaxError[i] > kLimit[i]) { |
177 | 0 | new_quant = orig_quant + 1; |
178 | 0 | break; |
179 | 0 | } |
180 | 0 | } |
181 | 0 | *quant = new_quant; |
182 | 0 | if (hfNonZeros[3] == 0.0 && hfMaxError[3] > kLimit[3]) { |
183 | 0 | thresholds[3] = kMul[3] * hfMaxError[3] * new_quant / orig_quant; |
184 | 0 | } else if ((hfNonZeros[1] == 0.0 && hfMaxError[1] > kLimit[1]) || |
185 | 0 | (hfNonZeros[2] == 0.0 && hfMaxError[2] > kLimit[2])) { |
186 | 0 | thresholds[1] = kMul[1] * std::max(hfMaxError[1], hfMaxError[2]) * |
187 | 0 | new_quant / orig_quant; |
188 | 0 | thresholds[2] = thresholds[1]; |
189 | 0 | } else if (hfNonZeros[0] == 0.0 && hfMaxError[0] > kLimit[0]) { |
190 | 0 | thresholds[0] = kMul[0] * hfMaxError[0] * new_quant / orig_quant; |
191 | 0 | } |
192 | 0 | } |
193 | | // Heuristic for improving accuracy of high-frequency patterns |
194 | | // occurring in an environment with no medium-frequency masking |
195 | | // patterns. |
196 | 0 | { |
197 | 0 | float all = |
198 | 0 | hfNonZeros[0] + hfNonZeros[1] + hfNonZeros[2] + hfNonZeros[3] + 1; |
199 | 0 | float mul[3] = {70, 30, 60}; |
200 | 0 | if (mul[c] * sum_of_highest_freq_row_and_column >= all) { |
201 | 0 | *quant += mul[c] * sum_of_highest_freq_row_and_column / all; |
202 | 0 | if (*quant >= Quantizer::kQuantMax) { |
203 | 0 | *quant = Quantizer::kQuantMax - 1; |
204 | 0 | } |
205 | 0 | } |
206 | 0 | } |
207 | 0 | if (quant_kind == AcStrategyType::DCT) { |
208 | | // If this 8x8 block is too flat, increase the adaptive quantization level |
209 | | // a bit to reduce visible block boundaries and requantize the block. |
210 | 0 | if (hfNonZeros[0] + hfNonZeros[1] + hfNonZeros[2] + hfNonZeros[3] < 11) { |
211 | 0 | *quant += 1; |
212 | 0 | if (*quant >= Quantizer::kQuantMax) { |
213 | 0 | *quant = Quantizer::kQuantMax - 1; |
214 | 0 | } |
215 | 0 | } |
216 | 0 | } |
217 | 0 | { |
218 | 0 | static const double kMul1[4][3] = { |
219 | 0 | { |
220 | 0 | 0.22080615753848404, |
221 | 0 | 0.45797479824262011, |
222 | 0 | 0.29859235095977965, |
223 | 0 | }, |
224 | 0 | { |
225 | 0 | 0.70109486510286834, |
226 | 0 | 0.16185281305512639, |
227 | 0 | 0.14387691730035473, |
228 | 0 | }, |
229 | 0 | { |
230 | 0 | 0.114985964456218638, |
231 | 0 | 0.44656840441027695, |
232 | 0 | 0.10587658215149048, |
233 | 0 | }, |
234 | 0 | { |
235 | 0 | 0.46849665264409396, |
236 | 0 | 0.41239077937781954, |
237 | 0 | 0.088667407767185444, |
238 | 0 | }, |
239 | 0 | }; |
240 | 0 | static const double kMul2[4][3] = { |
241 | 0 | { |
242 | 0 | 0.27450281941822197, |
243 | 0 | 1.1255766549984996, |
244 | 0 | 0.98950459134128388, |
245 | 0 | }, |
246 | 0 | { |
247 | 0 | 0.4652168675598285, |
248 | 0 | 0.40945807983455818, |
249 | 0 | 0.36581899811751367, |
250 | 0 | }, |
251 | 0 | { |
252 | 0 | 0.28034972424715715, |
253 | 0 | 0.9182653201929738, |
254 | 0 | 1.5581531543057416, |
255 | 0 | }, |
256 | 0 | { |
257 | 0 | 0.26873118114033728, |
258 | 0 | 0.68863712390392484, |
259 | 0 | 1.2082185408666786, |
260 | 0 | }, |
261 | 0 | }; |
262 | 0 | static const double kQuantNormalizer = 2.2942708343284721; |
263 | 0 | sum_of_error *= kQuantNormalizer; |
264 | 0 | sum_of_vals *= kQuantNormalizer; |
265 | 0 | if (quant_kind >= AcStrategyType::DCT16X16) { |
266 | 0 | int ix = 3; |
267 | 0 | if (quant_kind == AcStrategyType::DCT32X16 || |
268 | 0 | quant_kind == AcStrategyType::DCT16X32) { |
269 | 0 | ix = 1; |
270 | 0 | } else if (quant_kind == AcStrategyType::DCT16X16) { |
271 | 0 | ix = 0; |
272 | 0 | } else if (quant_kind == AcStrategyType::DCT32X32) { |
273 | 0 | ix = 2; |
274 | 0 | } |
275 | 0 | int step = |
276 | 0 | sum_of_error / (kMul1[ix][c] * xsize * ysize * kBlockDim * kBlockDim + |
277 | 0 | kMul2[ix][c] * sum_of_vals); |
278 | 0 | if (step >= 2) { |
279 | 0 | step = 2; |
280 | 0 | } |
281 | 0 | if (step < 0) { |
282 | 0 | step = 0; |
283 | 0 | } |
284 | 0 | if (sum_of_error > kMul1[ix][c] * xsize * ysize * kBlockDim * kBlockDim + |
285 | 0 | kMul2[ix][c] * sum_of_vals) { |
286 | 0 | *quant += step; |
287 | 0 | if (*quant >= Quantizer::kQuantMax) { |
288 | 0 | *quant = Quantizer::kQuantMax - 1; |
289 | 0 | } |
290 | 0 | } |
291 | 0 | } |
292 | 0 | } |
293 | 0 | { |
294 | | // Reduce quant in highly active areas. |
295 | 0 | int32_t div = (xsize * ysize); |
296 | 0 | int32_t activity = (static_cast<int32_t>(hfNonZeros[0]) + div / 2) / div; |
297 | 0 | int32_t orig_qp_limit = std::max(4, *quant / 2); |
298 | 0 | for (int i = 1; i < 4; ++i) { |
299 | 0 | activity = std::min( |
300 | 0 | activity, (static_cast<int32_t>(hfNonZeros[i]) + div / 2) / div); |
301 | 0 | } |
302 | 0 | if (activity >= 15) { |
303 | 0 | activity = 15; |
304 | 0 | } |
305 | 0 | int32_t qp = *quant - activity; |
306 | 0 | if (c == 1) { |
307 | 0 | for (int i = 1; i < 4; ++i) { |
308 | 0 | thresholds[i] += 0.01 * activity; |
309 | 0 | } |
310 | 0 | } |
311 | 0 | if (qp < orig_qp_limit) { |
312 | 0 | qp = orig_qp_limit; |
313 | 0 | } |
314 | 0 | *quant = qp; |
315 | 0 | } |
316 | 0 | } |
317 | | |
318 | | // NOTE: caller takes care of extracting quant from rect of RawQuantField. |
319 | | void QuantizeRoundtripYBlockAC(PassesEncoderState* enc_state, const size_t size, |
320 | | const Quantizer& quantizer, |
321 | | const bool error_diffusion, |
322 | | AcStrategyType quant_kind, size_t xsize, |
323 | | size_t ysize, const float* JXL_RESTRICT biases, |
324 | | int32_t* quant, float* JXL_RESTRICT inout, |
325 | 0 | int32_t* JXL_RESTRICT quantized) { |
326 | 0 | float thres_y[4] = {0.58f, 0.64f, 0.64f, 0.64f}; |
327 | 0 | if (enc_state->cparams.speed_tier <= SpeedTier::kHare) { |
328 | 0 | int32_t max_quant = 0; |
329 | 0 | int quant_orig = *quant; |
330 | 0 | float val[3] = {enc_state->x_qm_multiplier, 1.0f, |
331 | 0 | enc_state->b_qm_multiplier}; |
332 | 0 | for (int c : {1, 0, 2}) { |
333 | 0 | float thres[4] = {0.58f, 0.64f, 0.64f, 0.64f}; |
334 | 0 | *quant = quant_orig; |
335 | 0 | AdjustQuantBlockAC(quantizer, c, val[c], quant_kind, xsize, ysize, |
336 | 0 | &thres[0], inout + c * size, quant); |
337 | | // Dead zone adjustment |
338 | 0 | if (c == 1) { |
339 | 0 | for (int k = 0; k < 4; ++k) { |
340 | 0 | thres_y[k] = thres[k]; |
341 | 0 | } |
342 | 0 | } |
343 | 0 | max_quant = std::max(*quant, max_quant); |
344 | 0 | } |
345 | 0 | *quant = max_quant; |
346 | 0 | } else { |
347 | 0 | thres_y[0] = 0.56; |
348 | 0 | thres_y[1] = 0.62; |
349 | 0 | thres_y[2] = 0.62; |
350 | 0 | thres_y[3] = 0.62; |
351 | 0 | } |
352 | |
|
353 | 0 | QuantizeBlockAC(quantizer, error_diffusion, 1, 1.0f, quant_kind, xsize, ysize, |
354 | 0 | &thres_y[0], inout + size, quant, quantized + size); |
355 | |
|
356 | 0 | const float* JXL_RESTRICT dequant_matrix = |
357 | 0 | quantizer.DequantMatrix(quant_kind, 1); |
358 | |
|
359 | 0 | HWY_CAPPED(float, kDCTBlockSize) df; |
360 | 0 | HWY_CAPPED(int32_t, kDCTBlockSize) di; |
361 | 0 | const auto inv_qac = Set(df, quantizer.inv_quant_ac(*quant)); |
362 | 0 | for (size_t k = 0; k < kDCTBlockSize * xsize * ysize; k += Lanes(df)) { |
363 | 0 | const auto quant = Load(di, quantized + size + k); |
364 | 0 | const auto adj_quant = AdjustQuantBias(di, 1, quant, biases); |
365 | 0 | const auto dequantm = Load(df, dequant_matrix + k); |
366 | 0 | Store(Mul(Mul(adj_quant, dequantm), inv_qac), df, inout + size + k); |
367 | 0 | } |
368 | 0 | } |
369 | | |
370 | | Status ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state, |
371 | | const Image3F& opsin, const Rect& rect, |
372 | 0 | Image3F* dc) { |
373 | 0 | JxlMemoryManager* memory_manager = opsin.memory_manager(); |
374 | 0 | const Rect block_group_rect = |
375 | 0 | enc_state->shared.frame_dim.BlockGroupRect(group_idx); |
376 | 0 | const Rect cmap_rect( |
377 | 0 | block_group_rect.x0() / kColorTileDimInBlocks, |
378 | 0 | block_group_rect.y0() / kColorTileDimInBlocks, |
379 | 0 | DivCeil(block_group_rect.xsize(), kColorTileDimInBlocks), |
380 | 0 | DivCeil(block_group_rect.ysize(), kColorTileDimInBlocks)); |
381 | 0 | const Rect group_rect = |
382 | 0 | enc_state->shared.frame_dim.GroupRect(group_idx).Translate(rect.x0(), |
383 | 0 | rect.y0()); |
384 | |
|
385 | 0 | const size_t xsize_blocks = block_group_rect.xsize(); |
386 | 0 | const size_t ysize_blocks = block_group_rect.ysize(); |
387 | |
|
388 | 0 | const size_t dc_stride = static_cast<size_t>(dc->PixelsPerRow()); |
389 | 0 | const size_t opsin_stride = static_cast<size_t>(opsin.PixelsPerRow()); |
390 | |
|
391 | 0 | ImageI& full_quant_field = enc_state->shared.raw_quant_field; |
392 | 0 | const CompressParams& cparams = enc_state->cparams; |
393 | |
|
394 | 0 | const size_t dct_scratch_size = |
395 | 0 | 3 * (MaxVectorSize() / sizeof(float)) * AcStrategy::kMaxBlockDim; |
396 | | |
397 | | // TODO(veluca): consider strategies to reduce this memory. |
398 | 0 | size_t mem_bytes = 3 * AcStrategy::kMaxCoeffArea * sizeof(int32_t); |
399 | 0 | JXL_ASSIGN_OR_RETURN(auto mem, |
400 | 0 | AlignedMemory::Create(memory_manager, mem_bytes)); |
401 | 0 | size_t fmem_bytes = |
402 | 0 | (5 * AcStrategy::kMaxCoeffArea + dct_scratch_size) * sizeof(float); |
403 | 0 | JXL_ASSIGN_OR_RETURN(auto fmem, |
404 | 0 | AlignedMemory::Create(memory_manager, fmem_bytes)); |
405 | 0 | float* JXL_RESTRICT scratch_space = |
406 | 0 | fmem.address<float>() + 3 * AcStrategy::kMaxCoeffArea; |
407 | 0 | { |
408 | | // Only use error diffusion in Squirrel mode or slower. |
409 | 0 | const bool error_diffusion = cparams.speed_tier <= SpeedTier::kSquirrel; |
410 | 0 | constexpr HWY_CAPPED(float, kDCTBlockSize) d; |
411 | |
|
412 | 0 | int32_t* JXL_RESTRICT coeffs[3][kMaxNumPasses] = {}; |
413 | 0 | size_t num_passes = enc_state->progressive_splitter.GetNumPasses(); |
414 | 0 | JXL_ENSURE(num_passes > 0); |
415 | 0 | for (size_t i = 0; i < num_passes; i++) { |
416 | | // TODO(veluca): 16-bit quantized coeffs are not implemented yet. |
417 | 0 | JXL_ENSURE(enc_state->coeffs[i]->Type() == ACType::k32); |
418 | 0 | for (size_t c = 0; c < 3; c++) { |
419 | 0 | coeffs[c][i] = enc_state->coeffs[i]->PlaneRow(c, group_idx, 0).ptr32; |
420 | 0 | } |
421 | 0 | } |
422 | | |
423 | 0 | HWY_ALIGN float* coeffs_in = fmem.address<float>(); |
424 | 0 | HWY_ALIGN int32_t* quantized = mem.address<int32_t>(); |
425 | |
|
426 | 0 | for (size_t by = 0; by < ysize_blocks; ++by) { |
427 | 0 | int32_t* JXL_RESTRICT row_quant_ac = |
428 | 0 | block_group_rect.Row(&full_quant_field, by); |
429 | 0 | size_t ty = by / kColorTileDimInBlocks; |
430 | 0 | const int8_t* JXL_RESTRICT row_cmap[3] = { |
431 | 0 | cmap_rect.ConstRow(enc_state->shared.cmap.ytox_map, ty), |
432 | 0 | nullptr, |
433 | 0 | cmap_rect.ConstRow(enc_state->shared.cmap.ytob_map, ty), |
434 | 0 | }; |
435 | 0 | const float* JXL_RESTRICT opsin_rows[3] = { |
436 | 0 | group_rect.ConstPlaneRow(opsin, 0, by * kBlockDim), |
437 | 0 | group_rect.ConstPlaneRow(opsin, 1, by * kBlockDim), |
438 | 0 | group_rect.ConstPlaneRow(opsin, 2, by * kBlockDim), |
439 | 0 | }; |
440 | 0 | float* JXL_RESTRICT dc_rows[3] = { |
441 | 0 | block_group_rect.PlaneRow(dc, 0, by), |
442 | 0 | block_group_rect.PlaneRow(dc, 1, by), |
443 | 0 | block_group_rect.PlaneRow(dc, 2, by), |
444 | 0 | }; |
445 | 0 | AcStrategyRow ac_strategy_row = |
446 | 0 | enc_state->shared.ac_strategy.ConstRow(block_group_rect, by); |
447 | 0 | for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks); |
448 | 0 | tx++) { |
449 | 0 | const auto x_factor = |
450 | 0 | Set(d, enc_state->shared.cmap.base().YtoXRatio(row_cmap[0][tx])); |
451 | 0 | const auto b_factor = |
452 | 0 | Set(d, enc_state->shared.cmap.base().YtoBRatio(row_cmap[2][tx])); |
453 | 0 | for (size_t bx = tx * kColorTileDimInBlocks; |
454 | 0 | bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks; ++bx) { |
455 | 0 | const AcStrategy acs = ac_strategy_row[bx]; |
456 | 0 | if (!acs.IsFirstBlock()) continue; |
457 | | |
458 | 0 | size_t xblocks = acs.covered_blocks_x(); |
459 | 0 | size_t yblocks = acs.covered_blocks_y(); |
460 | |
|
461 | 0 | CoefficientLayout(&yblocks, &xblocks); |
462 | |
|
463 | 0 | size_t size = kDCTBlockSize * xblocks * yblocks; |
464 | | |
465 | | // DCT Y channel, roundtrip-quantize it and set DC. |
466 | 0 | int32_t quant_ac = row_quant_ac[bx]; |
467 | 0 | for (size_t c : {0, 1, 2}) { |
468 | 0 | TransformFromPixels(acs.Strategy(), opsin_rows[c] + bx * kBlockDim, |
469 | 0 | opsin_stride, coeffs_in + c * size, |
470 | 0 | scratch_space); |
471 | 0 | } |
472 | 0 | DCFromLowestFrequencies(acs.Strategy(), coeffs_in + size, |
473 | 0 | dc_rows[1] + bx, dc_stride); |
474 | |
|
475 | 0 | QuantizeRoundtripYBlockAC( |
476 | 0 | enc_state, size, enc_state->shared.quantizer, error_diffusion, |
477 | 0 | acs.Strategy(), xblocks, yblocks, kDefaultQuantBias, &quant_ac, |
478 | 0 | coeffs_in, quantized); |
479 | | |
480 | | // Unapply color correlation |
481 | 0 | for (size_t k = 0; k < size; k += Lanes(d)) { |
482 | 0 | const auto in_x = Load(d, coeffs_in + k); |
483 | 0 | const auto in_y = Load(d, coeffs_in + size + k); |
484 | 0 | const auto in_b = Load(d, coeffs_in + 2 * size + k); |
485 | 0 | const auto out_x = NegMulAdd(x_factor, in_y, in_x); |
486 | 0 | const auto out_b = NegMulAdd(b_factor, in_y, in_b); |
487 | 0 | Store(out_x, d, coeffs_in + k); |
488 | 0 | Store(out_b, d, coeffs_in + 2 * size + k); |
489 | 0 | } |
490 | | |
491 | | // Quantize X and B channels and set DC. |
492 | 0 | for (size_t c : {0, 2}) { |
493 | 0 | float thres[4] = {0.58f, 0.62f, 0.62f, 0.62f}; |
494 | 0 | QuantizeBlockAC(enc_state->shared.quantizer, error_diffusion, c, |
495 | 0 | c == 0 ? enc_state->x_qm_multiplier |
496 | 0 | : enc_state->b_qm_multiplier, |
497 | 0 | acs.Strategy(), xblocks, yblocks, &thres[0], |
498 | 0 | coeffs_in + c * size, &quant_ac, |
499 | 0 | quantized + c * size); |
500 | 0 | DCFromLowestFrequencies(acs.Strategy(), coeffs_in + c * size, |
501 | 0 | dc_rows[c] + bx, dc_stride); |
502 | 0 | } |
503 | 0 | row_quant_ac[bx] = quant_ac; |
504 | 0 | for (size_t c = 0; c < 3; c++) { |
505 | 0 | enc_state->progressive_splitter.SplitACCoefficients( |
506 | 0 | quantized + c * size, acs, bx, by, coeffs[c]); |
507 | 0 | for (size_t p = 0; p < num_passes; p++) { |
508 | 0 | coeffs[c][p] += size; |
509 | 0 | } |
510 | 0 | } |
511 | 0 | } |
512 | 0 | } |
513 | 0 | } |
514 | 0 | } |
515 | 0 | return true; |
516 | 0 | } |
517 | | |
518 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
519 | | } // namespace HWY_NAMESPACE |
520 | | } // namespace jxl |
521 | | HWY_AFTER_NAMESPACE(); |
522 | | |
523 | | #if HWY_ONCE |
524 | | namespace jxl { |
525 | | HWY_EXPORT(ComputeCoefficients); |
526 | | Status ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state, |
527 | | const Image3F& opsin, const Rect& rect, |
528 | 0 | Image3F* dc) { |
529 | 0 | return HWY_DYNAMIC_DISPATCH(ComputeCoefficients)(group_idx, enc_state, opsin, |
530 | 0 | rect, dc); |
531 | 0 | } |
532 | | |
533 | | Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx, |
534 | | size_t histogram_idx, |
535 | | const PassesEncoderState& enc_state, |
536 | 0 | BitWriter* writer, AuxOut* aux_out) { |
537 | | // Select which histogram to use among those of the current pass. |
538 | 0 | const size_t num_histograms = enc_state.shared.num_histograms; |
539 | | // num_histograms is 0 only for lossless. |
540 | 0 | JXL_ENSURE(num_histograms == 0 || histogram_idx < num_histograms); |
541 | 0 | size_t histo_selector_bits = CeilLog2Nonzero(num_histograms); |
542 | |
|
543 | 0 | if (histo_selector_bits != 0) { |
544 | 0 | JXL_RETURN_IF_ERROR( |
545 | 0 | writer->WithMaxBits(histo_selector_bits, LayerType::Ac, aux_out, [&] { |
546 | 0 | writer->Write(histo_selector_bits, histogram_idx); |
547 | 0 | return true; |
548 | 0 | })); |
549 | 0 | } |
550 | 0 | size_t context_offset = |
551 | 0 | histogram_idx * enc_state.shared.block_ctx_map.NumACContexts(); |
552 | 0 | JXL_RETURN_IF_ERROR(WriteTokens( |
553 | 0 | enc_state.passes[pass_idx].ac_tokens[group_idx], |
554 | 0 | enc_state.passes[pass_idx].codes, enc_state.passes[pass_idx].context_map, |
555 | 0 | context_offset, writer, LayerType::AcTokens, aux_out)); |
556 | | |
557 | 0 | return true; |
558 | 0 | } |
559 | | |
560 | | } // namespace jxl |
561 | | #endif // HWY_ONCE |