/src/libjxl/lib/jxl/enc_ac_strategy.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/enc_ac_strategy.h" |
7 | | |
8 | | #include <jxl/memory_manager.h> |
9 | | |
10 | | #include <algorithm> |
11 | | #include <cmath> |
12 | | #include <cstdint> |
13 | | #include <cstdio> |
14 | | #include <cstring> |
15 | | |
16 | | #include "lib/jxl/base/common.h" |
17 | | #include "lib/jxl/memory_manager_internal.h" |
18 | | |
19 | | #undef HWY_TARGET_INCLUDE |
20 | | #define HWY_TARGET_INCLUDE "lib/jxl/enc_ac_strategy.cc" |
21 | | #include <hwy/foreach_target.h> |
22 | | #include <hwy/highway.h> |
23 | | |
24 | | #include "lib/jxl/ac_strategy.h" |
25 | | #include "lib/jxl/base/bits.h" |
26 | | #include "lib/jxl/base/compiler_specific.h" |
27 | | #include "lib/jxl/base/fast_math-inl.h" |
28 | | #include "lib/jxl/base/rect.h" |
29 | | #include "lib/jxl/base/status.h" |
30 | | #include "lib/jxl/dec_transforms-inl.h" |
31 | | #include "lib/jxl/enc_aux_out.h" |
32 | | #include "lib/jxl/enc_debug_image.h" |
33 | | #include "lib/jxl/enc_params.h" |
34 | | #include "lib/jxl/enc_transforms-inl.h" |
35 | | #include "lib/jxl/simd_util.h" |
36 | | |
37 | | // Some of the floating point constants in this file and in other |
38 | | // files in the libjxl project have been obtained using the |
39 | | // tools/optimizer/simplex_fork.py tool. It is a variation of |
40 | | // Nelder-Mead optimization, and we generally try to minimize |
41 | | // BPP * pnorm aggregate as reported by the benchmark_xl tool, |
42 | | // but occasionally the values are optimized by using additional |
43 | | // constraints such as maintaining a certain density, or ratio of |
44 | | // popularity of integral transforms. Jyrki visually reviews all |
45 | | // such changes and often makes manual changes to maintain good |
46 | | // visual quality to changes where butteraugli was not sufficiently |
47 | | // sensitive to some kind of degradation. Unfortunately image quality |
48 | | // is still more of an art than science. |
49 | | |
50 | | // Set JXL_DEBUG_AC_STRATEGY to 1 to enable debugging. |
51 | | #ifndef JXL_DEBUG_AC_STRATEGY |
52 | 0 | #define JXL_DEBUG_AC_STRATEGY 0 |
53 | | #endif |
54 | | |
55 | | // This must come before the begin/end_target, but HWY_ONCE is only true |
56 | | // after that, so use an "include guard". |
57 | | #ifndef LIB_JXL_ENC_AC_STRATEGY_ |
58 | | #define LIB_JXL_ENC_AC_STRATEGY_ |
59 | | // Parameters of the heuristic are marked with a OPTIMIZE comment. |
60 | | namespace jxl { |
61 | | namespace { |
62 | | |
63 | | // Debugging utilities. |
64 | | |
65 | | // Returns a linear sRGB color (as bytes) for each AC strategy. |
66 | 0 | const uint8_t* TypeColor(uint8_t raw_strategy) { |
67 | 0 | JXL_DASSERT(AcStrategy::IsRawStrategyValid(raw_strategy)); |
68 | 0 | static_assert(AcStrategy::kNumValidStrategies == 27, "Update colors"); |
69 | 0 | static constexpr uint8_t kColors[AcStrategy::kNumValidStrategies + 1][3] = { |
70 | 0 | {0xFF, 0xFF, 0x00}, // DCT8 | yellow |
71 | 0 | {0xFF, 0x80, 0x80}, // HORNUSS | vivid tangerine |
72 | 0 | {0xFF, 0x80, 0x80}, // DCT2x2 | vivid tangerine |
73 | 0 | {0xFF, 0x80, 0x80}, // DCT4x4 | vivid tangerine |
74 | 0 | {0x80, 0xFF, 0x00}, // DCT16x16 | chartreuse |
75 | 0 | {0x00, 0xC0, 0x00}, // DCT32x32 | waystone green |
76 | 0 | {0xC0, 0xFF, 0x00}, // DCT16x8 | lime |
77 | 0 | {0xC0, 0xFF, 0x00}, // DCT8x16 | lime |
78 | 0 | {0x00, 0xFF, 0x00}, // DCT32x8 | green |
79 | 0 | {0x00, 0xFF, 0x00}, // DCT8x32 | green |
80 | 0 | {0x00, 0xFF, 0x00}, // DCT32x16 | green |
81 | 0 | {0x00, 0xFF, 0x00}, // DCT16x32 | green |
82 | 0 | {0xFF, 0x80, 0x00}, // DCT4x8 | orange juice |
83 | 0 | {0xFF, 0x80, 0x00}, // DCT8x4 | orange juice |
84 | 0 | {0xFF, 0xFF, 0x80}, // AFV0 | butter |
85 | 0 | {0xFF, 0xFF, 0x80}, // AFV1 | butter |
86 | 0 | {0xFF, 0xFF, 0x80}, // AFV2 | butter |
87 | 0 | {0xFF, 0xFF, 0x80}, // AFV3 | butter |
88 | 0 | {0x00, 0xC0, 0xFF}, // DCT64x64 | capri |
89 | 0 | {0x00, 0xFF, 0xFF}, // DCT64x32 | aqua |
90 | 0 | {0x00, 0xFF, 0xFF}, // DCT32x64 | aqua |
91 | 0 | {0x00, 0x40, 0xFF}, // DCT128x128 | rare blue |
92 | 0 | {0x00, 0x80, 0xFF}, // DCT128x64 | magic ink |
93 | 0 | {0x00, 0x80, 0xFF}, // DCT64x128 | magic ink |
94 | 0 | {0x00, 0x00, 0xC0}, // DCT256x256 | keese blue |
95 | 0 | {0x00, 0x00, 0xFF}, // DCT256x128 | blue |
96 | 0 | {0x00, 0x00, 0xFF}, // DCT128x256 | blue |
97 | 0 | {0x00, 0x00, 0x00} // invalid | black |
98 | 0 | }; |
99 | 0 | raw_strategy = |
100 | 0 | Clamp1<uint8_t>(raw_strategy, 0, AcStrategy::kNumValidStrategies); |
101 | 0 | return kColors[raw_strategy]; |
102 | 0 | } |
103 | | |
104 | 0 | const uint8_t* TypeMask(uint8_t raw_strategy) { |
105 | 0 | JXL_DASSERT(AcStrategy::IsRawStrategyValid(raw_strategy)); |
106 | 0 | static_assert(AcStrategy::kNumValidStrategies == 27, "Update masks"); |
107 | 0 | // implicitly, first row and column is made dark |
108 | 0 | static constexpr uint8_t kMask[AcStrategy::kNumValidStrategies + 1][64] = { |
109 | 0 | { |
110 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
111 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
112 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
113 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
114 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
115 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
116 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
117 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
118 | 0 | }, // DCT8 |
119 | 0 | { |
120 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
121 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
122 | 0 | 0, 0, 1, 0, 0, 1, 0, 0, // |
123 | 0 | 0, 0, 1, 0, 0, 1, 0, 0, // |
124 | 0 | 0, 0, 1, 1, 1, 1, 0, 0, // |
125 | 0 | 0, 0, 1, 0, 0, 1, 0, 0, // |
126 | 0 | 0, 0, 1, 0, 0, 1, 0, 0, // |
127 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
128 | 0 | }, // HORNUSS |
129 | 0 | { |
130 | 0 | 1, 1, 1, 1, 1, 1, 1, 1, // |
131 | 0 | 1, 0, 1, 0, 1, 0, 1, 0, // |
132 | 0 | 1, 1, 1, 1, 1, 1, 1, 1, // |
133 | 0 | 1, 0, 1, 0, 1, 0, 1, 0, // |
134 | 0 | 1, 1, 1, 1, 1, 1, 1, 1, // |
135 | 0 | 1, 0, 1, 0, 1, 0, 1, 0, // |
136 | 0 | 1, 1, 1, 1, 1, 1, 1, 1, // |
137 | 0 | 1, 0, 1, 0, 1, 0, 1, 0, // |
138 | 0 | }, // 2x2 |
139 | 0 | { |
140 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
141 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
142 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
143 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
144 | 0 | 1, 1, 1, 1, 1, 1, 1, 1, // |
145 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
146 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
147 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
148 | 0 | }, // 4x4 |
149 | 0 | {}, // DCT16x16 (unused) |
150 | 0 | {}, // DCT32x32 (unused) |
151 | 0 | {}, // DCT16x8 (unused) |
152 | 0 | {}, // DCT8x16 (unused) |
153 | 0 | {}, // DCT32x8 (unused) |
154 | 0 | {}, // DCT8x32 (unused) |
155 | 0 | {}, // DCT32x16 (unused) |
156 | 0 | {}, // DCT16x32 (unused) |
157 | 0 | { |
158 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
159 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
160 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
161 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
162 | 0 | 1, 1, 1, 1, 1, 1, 1, 1, // |
163 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
164 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
165 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
166 | 0 | }, // DCT4x8 |
167 | 0 | { |
168 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
169 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
170 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
171 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
172 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
173 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
174 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
175 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
176 | 0 | }, // DCT8x4 |
177 | 0 | { |
178 | 0 | 1, 1, 1, 1, 1, 0, 0, 0, // |
179 | 0 | 1, 1, 1, 1, 0, 0, 0, 0, // |
180 | 0 | 1, 1, 1, 0, 0, 0, 0, 0, // |
181 | 0 | 1, 1, 0, 0, 0, 0, 0, 0, // |
182 | 0 | 1, 0, 0, 0, 0, 0, 0, 0, // |
183 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
184 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
185 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
186 | 0 | }, // AFV0 |
187 | 0 | { |
188 | 0 | 0, 0, 0, 0, 1, 1, 1, 1, // |
189 | 0 | 0, 0, 0, 0, 0, 1, 1, 1, // |
190 | 0 | 0, 0, 0, 0, 0, 0, 1, 1, // |
191 | 0 | 0, 0, 0, 0, 0, 0, 0, 1, // |
192 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
193 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
194 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
195 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
196 | 0 | }, // AFV1 |
197 | 0 | { |
198 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
199 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
200 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
201 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
202 | 0 | 1, 0, 0, 0, 0, 0, 0, 0, // |
203 | 0 | 1, 1, 0, 0, 0, 0, 0, 0, // |
204 | 0 | 1, 1, 1, 0, 0, 0, 0, 0, // |
205 | 0 | 1, 1, 1, 1, 0, 0, 0, 0, // |
206 | 0 | }, // AFV2 |
207 | 0 | { |
208 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
209 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
210 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
211 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
212 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
213 | 0 | 0, 0, 0, 0, 0, 0, 0, 1, // |
214 | 0 | 0, 0, 0, 0, 0, 0, 1, 1, // |
215 | 0 | 0, 0, 0, 0, 0, 1, 1, 1, // |
216 | 0 | }, // AFV3 |
217 | 0 | {} // invalid |
218 | 0 | }; |
219 | 0 | raw_strategy = |
220 | 0 | Clamp1<uint8_t>(raw_strategy, 0, AcStrategy::kNumValidStrategies); |
221 | 0 | return kMask[raw_strategy]; |
222 | 0 | } |
223 | | |
224 | | Status DumpAcStrategy(const AcStrategyImage& ac_strategy, size_t xsize, |
225 | | size_t ysize, const char* tag, AuxOut* aux_out, |
226 | 0 | const CompressParams& cparams) { |
227 | 0 | JxlMemoryManager* memory_manager = ac_strategy.memory_manager(); |
228 | 0 | JXL_ASSIGN_OR_RETURN(Image3F color_acs, |
229 | 0 | Image3F::Create(memory_manager, xsize, ysize)); |
230 | 0 | for (size_t y = 0; y < ysize; y++) { |
231 | 0 | float* JXL_RESTRICT rows[3] = { |
232 | 0 | color_acs.PlaneRow(0, y), |
233 | 0 | color_acs.PlaneRow(1, y), |
234 | 0 | color_acs.PlaneRow(2, y), |
235 | 0 | }; |
236 | 0 | const AcStrategyRow acs_row = ac_strategy.ConstRow(y / kBlockDim); |
237 | 0 | for (size_t x = 0; x < xsize; x++) { |
238 | 0 | AcStrategy acs = acs_row[x / kBlockDim]; |
239 | 0 | const uint8_t* JXL_RESTRICT color = TypeColor(acs.RawStrategy()); |
240 | 0 | for (size_t c = 0; c < 3; c++) { |
241 | 0 | rows[c][x] = color[c] / 255.f; |
242 | 0 | } |
243 | 0 | } |
244 | 0 | } |
245 | 0 | size_t stride = color_acs.PixelsPerRow(); |
246 | 0 | for (size_t c = 0; c < 3; c++) { |
247 | 0 | for (size_t by = 0; by < DivCeil(ysize, kBlockDim); by++) { |
248 | 0 | float* JXL_RESTRICT row = color_acs.PlaneRow(c, by * kBlockDim); |
249 | 0 | const AcStrategyRow acs_row = ac_strategy.ConstRow(by); |
250 | 0 | for (size_t bx = 0; bx < DivCeil(xsize, kBlockDim); bx++) { |
251 | 0 | AcStrategy acs = acs_row[bx]; |
252 | 0 | if (!acs.IsFirstBlock()) continue; |
253 | 0 | const uint8_t* JXL_RESTRICT color = TypeColor(acs.RawStrategy()); |
254 | 0 | const uint8_t* JXL_RESTRICT mask = TypeMask(acs.RawStrategy()); |
255 | 0 | if (acs.covered_blocks_x() == 1 && acs.covered_blocks_y() == 1) { |
256 | 0 | for (size_t iy = 0; iy < kBlockDim && by * kBlockDim + iy < ysize; |
257 | 0 | iy++) { |
258 | 0 | for (size_t ix = 0; ix < kBlockDim && bx * kBlockDim + ix < xsize; |
259 | 0 | ix++) { |
260 | 0 | if (mask[iy * kBlockDim + ix]) { |
261 | 0 | row[iy * stride + bx * kBlockDim + ix] = color[c] / 800.f; |
262 | 0 | } |
263 | 0 | } |
264 | 0 | } |
265 | 0 | } |
266 | 0 | // draw block edges |
267 | 0 | for (size_t ix = 0; ix < kBlockDim * acs.covered_blocks_x() && |
268 | 0 | bx * kBlockDim + ix < xsize; |
269 | 0 | ix++) { |
270 | 0 | row[0 * stride + bx * kBlockDim + ix] = color[c] / 350.f; |
271 | 0 | } |
272 | 0 | for (size_t iy = 0; iy < kBlockDim * acs.covered_blocks_y() && |
273 | 0 | by * kBlockDim + iy < ysize; |
274 | 0 | iy++) { |
275 | 0 | row[iy * stride + bx * kBlockDim + 0] = color[c] / 350.f; |
276 | 0 | } |
277 | 0 | } |
278 | 0 | } |
279 | 0 | } |
280 | 0 | return DumpImage(cparams, tag, color_acs); |
281 | 0 | } |
282 | | |
283 | | } // namespace |
284 | | } // namespace jxl |
285 | | #endif // LIB_JXL_ENC_AC_STRATEGY_ |
286 | | |
287 | | HWY_BEFORE_NAMESPACE(); |
288 | | namespace jxl { |
289 | | namespace HWY_NAMESPACE { |
290 | | |
291 | | // These templates are not found via ADL. |
292 | | using hwy::HWY_NAMESPACE::AbsDiff; |
293 | | using hwy::HWY_NAMESPACE::Eq; |
294 | | using hwy::HWY_NAMESPACE::IfThenElseZero; |
295 | | using hwy::HWY_NAMESPACE::IfThenZeroElse; |
296 | | using hwy::HWY_NAMESPACE::Round; |
297 | | using hwy::HWY_NAMESPACE::Sqrt; |
298 | | |
299 | | bool MultiBlockTransformCrossesHorizontalBoundary( |
300 | | const AcStrategyImage& ac_strategy, size_t start_x, size_t y, |
301 | 0 | size_t end_x) { |
302 | 0 | if (start_x >= ac_strategy.xsize() || y >= ac_strategy.ysize()) { |
303 | 0 | return false; |
304 | 0 | } |
305 | 0 | if (y % 8 == 0) { |
306 | | // Nothing crosses 64x64 boundaries, and the memory on the other side |
307 | | // of the 64x64 block may still uninitialized. |
308 | 0 | return false; |
309 | 0 | } |
310 | 0 | end_x = std::min(end_x, ac_strategy.xsize()); |
311 | | // The first multiblock might be before the start_x, let's adjust it |
312 | | // to point to the first IsFirstBlock() == true block we find by backward |
313 | | // tracing. |
314 | 0 | AcStrategyRow row = ac_strategy.ConstRow(y); |
315 | 0 | const size_t start_x_limit = start_x & ~7; |
316 | 0 | while (start_x != start_x_limit && !row[start_x].IsFirstBlock()) { |
317 | 0 | --start_x; |
318 | 0 | } |
319 | 0 | for (size_t x = start_x; x < end_x;) { |
320 | 0 | if (row[x].IsFirstBlock()) { |
321 | 0 | x += row[x].covered_blocks_x(); |
322 | 0 | } else { |
323 | 0 | return true; |
324 | 0 | } |
325 | 0 | } |
326 | 0 | return false; |
327 | 0 | } |
328 | | |
329 | | bool MultiBlockTransformCrossesVerticalBoundary( |
330 | | const AcStrategyImage& ac_strategy, size_t x, size_t start_y, |
331 | 0 | size_t end_y) { |
332 | 0 | if (x >= ac_strategy.xsize() || start_y >= ac_strategy.ysize()) { |
333 | 0 | return false; |
334 | 0 | } |
335 | 0 | if (x % 8 == 0) { |
336 | | // Nothing crosses 64x64 boundaries, and the memory on the other side |
337 | | // of the 64x64 block may still uninitialized. |
338 | 0 | return false; |
339 | 0 | } |
340 | 0 | end_y = std::min(end_y, ac_strategy.ysize()); |
341 | | // The first multiblock might be before the start_y, let's adjust it |
342 | | // to point to the first IsFirstBlock() == true block we find by backward |
343 | | // tracing. |
344 | 0 | const size_t start_y_limit = start_y & ~7; |
345 | 0 | while (start_y != start_y_limit && |
346 | 0 | !ac_strategy.ConstRow(start_y)[x].IsFirstBlock()) { |
347 | 0 | --start_y; |
348 | 0 | } |
349 | |
|
350 | 0 | for (size_t y = start_y; y < end_y;) { |
351 | 0 | AcStrategyRow row = ac_strategy.ConstRow(y); |
352 | 0 | if (row[x].IsFirstBlock()) { |
353 | 0 | y += row[x].covered_blocks_y(); |
354 | 0 | } else { |
355 | 0 | return true; |
356 | 0 | } |
357 | 0 | } |
358 | 0 | return false; |
359 | 0 | } |
360 | | |
361 | | Status EstimateEntropy(const AcStrategy& acs, float entropy_mul, size_t x, |
362 | | size_t y, const ACSConfig& config, |
363 | | const float* JXL_RESTRICT cmap_factors, float* block, |
364 | | float* full_scratch_space, uint32_t* quantized, |
365 | 0 | float& entropy) { |
366 | 0 | entropy = 0.0f; |
367 | 0 | float* mem = full_scratch_space; |
368 | 0 | float* scratch_space = full_scratch_space + AcStrategy::kMaxCoeffArea; |
369 | 0 | const size_t size = (1 << acs.log2_covered_blocks()) * kDCTBlockSize; |
370 | | |
371 | | // Apply transform. |
372 | 0 | for (size_t c = 0; c < 3; c++) { |
373 | 0 | float* JXL_RESTRICT block_c = block + size * c; |
374 | 0 | TransformFromPixels(acs.Strategy(), &config.Pixel(c, x, y), |
375 | 0 | config.src_stride, block_c, scratch_space); |
376 | 0 | } |
377 | 0 | HWY_FULL(float) df; |
378 | |
|
379 | 0 | const size_t num_blocks = acs.covered_blocks_x() * acs.covered_blocks_y(); |
380 | | // avoid large blocks when there is a lot going on in red-green. |
381 | 0 | float quant_norm16 = 0; |
382 | 0 | if (num_blocks == 1) { |
383 | | // When it is only one 8x8, we don't need aggregation of values. |
384 | 0 | quant_norm16 = config.Quant(x / 8, y / 8); |
385 | 0 | } else if (num_blocks == 2) { |
386 | | // Taking max instead of 8th norm seems to work |
387 | | // better for smallest blocks up to 16x8. Jyrki couldn't get |
388 | | // improvements in trying the same for 16x16 blocks. |
389 | 0 | if (acs.covered_blocks_y() == 2) { |
390 | 0 | quant_norm16 = |
391 | 0 | std::max(config.Quant(x / 8, y / 8), config.Quant(x / 8, y / 8 + 1)); |
392 | 0 | } else { |
393 | 0 | quant_norm16 = |
394 | 0 | std::max(config.Quant(x / 8, y / 8), config.Quant(x / 8 + 1, y / 8)); |
395 | 0 | } |
396 | 0 | } else { |
397 | | // Load QF value, calculate empirical heuristic on masking field |
398 | | // for weighting the information loss. Information loss manifests |
399 | | // itself as ringing, and masking could hide it. |
400 | 0 | for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { |
401 | 0 | for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) { |
402 | 0 | float qval = config.Quant(x / 8 + ix, y / 8 + iy); |
403 | 0 | qval *= qval; |
404 | 0 | qval *= qval; |
405 | 0 | qval *= qval; |
406 | 0 | quant_norm16 += qval * qval; |
407 | 0 | } |
408 | 0 | } |
409 | 0 | quant_norm16 /= num_blocks; |
410 | 0 | quant_norm16 = FastPowf(quant_norm16, 1.0f / 16.0f); |
411 | 0 | } |
412 | 0 | const auto quant = Set(df, quant_norm16); |
413 | | |
414 | | // Compute entropy. |
415 | 0 | const HWY_CAPPED(float, 8) df8; |
416 | |
|
417 | 0 | auto loss = Zero(df8); |
418 | 0 | for (size_t c = 0; c < 3; c++) { |
419 | 0 | const float* inv_matrix = config.dequant->InvMatrix(acs.Strategy(), c); |
420 | 0 | const float* matrix = config.dequant->Matrix(acs.Strategy(), c); |
421 | 0 | const auto cmap_factor = Set(df, cmap_factors[c]); |
422 | |
|
423 | 0 | auto entropy_v = Zero(df); |
424 | 0 | auto nzeros_v = Zero(df); |
425 | 0 | for (size_t i = 0; i < num_blocks * kDCTBlockSize; i += Lanes(df)) { |
426 | 0 | const auto in = Load(df, block + c * size + i); |
427 | 0 | const auto in_y = Mul(Load(df, block + size + i), cmap_factor); |
428 | 0 | const auto im = Load(df, inv_matrix + i); |
429 | 0 | const auto val = Mul(Sub(in, in_y), Mul(im, quant)); |
430 | 0 | const auto rval = Round(val); |
431 | 0 | const auto diff = Sub(val, rval); |
432 | 0 | const auto m = Load(df, matrix + i); |
433 | 0 | Store(Mul(m, diff), df, &mem[i]); |
434 | 0 | const auto q = Abs(rval); |
435 | 0 | const auto q_is_zero = Eq(q, Zero(df)); |
436 | | // We used to have q * C here, but that cost model seems to |
437 | | // be punishing large values more than necessary. Sqrt tries |
438 | | // to avoid large values less aggressively. |
439 | 0 | entropy_v = Add(Sqrt(q), entropy_v); |
440 | 0 | nzeros_v = Add(nzeros_v, IfThenZeroElse(q_is_zero, Set(df, 1.0f))); |
441 | 0 | } |
442 | |
|
443 | 0 | { |
444 | 0 | auto lossc = Zero(df8); |
445 | 0 | TransformToPixels(acs.Strategy(), &mem[0], block, |
446 | 0 | acs.covered_blocks_x() * 8, scratch_space); |
447 | |
|
448 | 0 | for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { |
449 | 0 | for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) { |
450 | 0 | for (size_t dy = 0; dy < kBlockDim; ++dy) { |
451 | 0 | for (size_t dx = 0; dx < kBlockDim; dx += Lanes(df8)) { |
452 | 0 | auto in = Load(df8, block + |
453 | 0 | (iy * kBlockDim + dy) * |
454 | 0 | (acs.covered_blocks_x() * kBlockDim) + |
455 | 0 | ix * kBlockDim + dx); |
456 | 0 | if (x + ix * 8 + dx + Lanes(df8) <= config.mask1x1_xsize) { |
457 | 0 | auto masku = |
458 | 0 | Abs(Load(df8, config.MaskingPtr1x1(x + ix * 8 + dx, |
459 | 0 | y + iy * 8 + dy))); |
460 | 0 | in = Mul(masku, in); |
461 | 0 | in = Mul(in, in); |
462 | 0 | in = Mul(in, in); |
463 | 0 | in = Mul(in, in); |
464 | 0 | lossc = Add(lossc, in); |
465 | 0 | } |
466 | 0 | } |
467 | 0 | } |
468 | 0 | } |
469 | 0 | } |
470 | 0 | static const double kChannelMul[3] = { |
471 | 0 | pow(10.2, 8.0), |
472 | 0 | pow(1.0, 8.0), |
473 | 0 | pow(1.03, 8.0), |
474 | 0 | }; |
475 | 0 | lossc = Mul(Set(df8, kChannelMul[c]), lossc); |
476 | 0 | loss = Add(loss, lossc); |
477 | 0 | } |
478 | 0 | entropy += config.cost_delta * GetLane(SumOfLanes(df, entropy_v)); |
479 | 0 | size_t num_nzeros = GetLane(SumOfLanes(df, nzeros_v)); |
480 | | // Add #bit of num_nonzeros, as an estimate of the cost for encoding the |
481 | | // number of non-zeros of the block. |
482 | 0 | size_t nbits = CeilLog2Nonzero(num_nzeros + 1) + 1; |
483 | | // Also add #bit of #bit of num_nonzeros, to estimate the ANS cost, with a |
484 | | // bias. |
485 | 0 | entropy += config.zeros_mul * (CeilLog2Nonzero(nbits + 17) + nbits); |
486 | 0 | } |
487 | 0 | float loss_scalar = |
488 | 0 | pow(GetLane(SumOfLanes(df8, loss)) / (num_blocks * kDCTBlockSize), |
489 | 0 | 1.0 / 8.0) * |
490 | 0 | (num_blocks * kDCTBlockSize) / quant_norm16; |
491 | 0 | entropy *= entropy_mul; |
492 | 0 | entropy += config.info_loss_multiplier * loss_scalar; |
493 | 0 | return true; |
494 | 0 | } |
495 | | |
496 | | Status FindBest8x8Transform(size_t x, size_t y, int encoding_speed_tier, |
497 | | float butteraugli_target, const ACSConfig& config, |
498 | | const float* JXL_RESTRICT cmap_factors, |
499 | | AcStrategyImage* JXL_RESTRICT ac_strategy, |
500 | | float* block, float* scratch_space, |
501 | | uint32_t* quantized, float* entropy_out, |
502 | 0 | AcStrategyType& best_tx) { |
503 | 0 | struct TransformTry8x8 { |
504 | 0 | AcStrategyType type; |
505 | 0 | int encoding_speed_tier_max_limit; |
506 | 0 | double entropy_mul; |
507 | 0 | }; |
508 | 0 | static const TransformTry8x8 kTransforms8x8[] = { |
509 | 0 | { |
510 | 0 | AcStrategyType::DCT, |
511 | 0 | 9, |
512 | 0 | 0.8, |
513 | 0 | }, |
514 | 0 | { |
515 | 0 | AcStrategyType::DCT4X4, |
516 | 0 | 5, |
517 | 0 | 1.08, |
518 | 0 | }, |
519 | 0 | { |
520 | 0 | AcStrategyType::DCT2X2, |
521 | 0 | 5, |
522 | 0 | 0.95, |
523 | 0 | }, |
524 | 0 | { |
525 | 0 | AcStrategyType::DCT4X8, |
526 | 0 | 4, |
527 | 0 | 0.85931637428340035, |
528 | 0 | }, |
529 | 0 | { |
530 | 0 | AcStrategyType::DCT8X4, |
531 | 0 | 4, |
532 | 0 | 0.85931637428340035, |
533 | 0 | }, |
534 | 0 | { |
535 | 0 | AcStrategyType::IDENTITY, |
536 | 0 | 5, |
537 | 0 | 1.0427542510634957, |
538 | 0 | }, |
539 | 0 | { |
540 | 0 | AcStrategyType::AFV0, |
541 | 0 | 4, |
542 | 0 | 0.81779489591359944, |
543 | 0 | }, |
544 | 0 | { |
545 | 0 | AcStrategyType::AFV1, |
546 | 0 | 4, |
547 | 0 | 0.81779489591359944, |
548 | 0 | }, |
549 | 0 | { |
550 | 0 | AcStrategyType::AFV2, |
551 | 0 | 4, |
552 | 0 | 0.81779489591359944, |
553 | 0 | }, |
554 | 0 | { |
555 | 0 | AcStrategyType::AFV3, |
556 | 0 | 4, |
557 | 0 | 0.81779489591359944, |
558 | 0 | }, |
559 | 0 | }; |
560 | 0 | double best = 1e30; |
561 | 0 | best_tx = kTransforms8x8[0].type; |
562 | 0 | for (auto tx : kTransforms8x8) { |
563 | 0 | if (tx.encoding_speed_tier_max_limit < encoding_speed_tier) { |
564 | 0 | continue; |
565 | 0 | } |
566 | 0 | AcStrategy acs = AcStrategy::FromRawStrategy(tx.type); |
567 | 0 | float entropy_mul = tx.entropy_mul / kTransforms8x8[0].entropy_mul; |
568 | 0 | if ((tx.type == AcStrategyType::DCT2X2 || |
569 | 0 | tx.type == AcStrategyType::IDENTITY) && |
570 | 0 | butteraugli_target < 5.0) { |
571 | 0 | static const float kFavor2X2AtHighQuality = 0.4; |
572 | 0 | float weight = pow((5.0f - butteraugli_target) / 5.0f, 2.0); |
573 | 0 | entropy_mul -= kFavor2X2AtHighQuality * weight; |
574 | 0 | } |
575 | 0 | if ((tx.type != AcStrategyType::DCT && tx.type != AcStrategyType::DCT2X2 && |
576 | 0 | tx.type != AcStrategyType::IDENTITY) && |
577 | 0 | butteraugli_target > 4.0) { |
578 | 0 | static const float kAvoidEntropyOfTransforms = 0.5; |
579 | 0 | float mul = 1.0; |
580 | 0 | if (butteraugli_target < 12.0) { |
581 | 0 | mul *= (12.0 - 4.0) / (butteraugli_target - 4.0); |
582 | 0 | } |
583 | 0 | entropy_mul += kAvoidEntropyOfTransforms * mul; |
584 | 0 | } |
585 | 0 | float entropy; |
586 | 0 | JXL_RETURN_IF_ERROR(EstimateEntropy(acs, entropy_mul, x, y, config, |
587 | 0 | cmap_factors, block, scratch_space, |
588 | 0 | quantized, entropy)); |
589 | 0 | if (entropy < best) { |
590 | 0 | best_tx = tx.type; |
591 | 0 | best = entropy; |
592 | 0 | } |
593 | 0 | } |
594 | 0 | *entropy_out = best; |
595 | 0 | return true; |
596 | 0 | } |
597 | | |
598 | | // bx, by addresses the 64x64 block at 8x8 subresolution |
599 | | // cx, cy addresses the left, upper 8x8 block position of the candidate |
600 | | // transform. |
601 | | Status TryMergeAcs(AcStrategyType acs_raw, size_t bx, size_t by, size_t cx, |
602 | | size_t cy, const ACSConfig& config, |
603 | | const float* JXL_RESTRICT cmap_factors, |
604 | | AcStrategyImage* JXL_RESTRICT ac_strategy, |
605 | | const float entropy_mul, const uint8_t candidate_priority, |
606 | | uint8_t* priority, float* JXL_RESTRICT entropy_estimate, |
607 | 0 | float* block, float* scratch_space, uint32_t* quantized) { |
608 | 0 | AcStrategy acs = AcStrategy::FromRawStrategy(acs_raw); |
609 | 0 | float entropy_current = 0; |
610 | 0 | for (size_t iy = 0; iy < acs.covered_blocks_y(); ++iy) { |
611 | 0 | for (size_t ix = 0; ix < acs.covered_blocks_x(); ++ix) { |
612 | 0 | if (priority[(cy + iy) * 8 + (cx + ix)] >= candidate_priority) { |
613 | | // Transform would reuse already allocated blocks and |
614 | | // lead to invalid overlaps, for example DCT64X32 vs. |
615 | | // DCT32X64. |
616 | 0 | return true; |
617 | 0 | } |
618 | 0 | entropy_current += entropy_estimate[(cy + iy) * 8 + (cx + ix)]; |
619 | 0 | } |
620 | 0 | } |
621 | 0 | float entropy_candidate; |
622 | 0 | JXL_RETURN_IF_ERROR(EstimateEntropy( |
623 | 0 | acs, entropy_mul, (bx + cx) * 8, (by + cy) * 8, config, cmap_factors, |
624 | 0 | block, scratch_space, quantized, entropy_candidate)); |
625 | 0 | if (entropy_candidate >= entropy_current) return true; |
626 | | // Accept the candidate. |
627 | 0 | for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { |
628 | 0 | for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) { |
629 | 0 | entropy_estimate[(cy + iy) * 8 + cx + ix] = 0; |
630 | 0 | priority[(cy + iy) * 8 + cx + ix] = candidate_priority; |
631 | 0 | } |
632 | 0 | } |
633 | 0 | JXL_RETURN_IF_ERROR(ac_strategy->Set(bx + cx, by + cy, acs_raw)); |
634 | 0 | entropy_estimate[cy * 8 + cx] = entropy_candidate; |
635 | 0 | return true; |
636 | 0 | } |
637 | | |
638 | | static void SetEntropyForTransform(size_t cx, size_t cy, |
639 | | const AcStrategyType acs_raw, float entropy, |
640 | 0 | float* JXL_RESTRICT entropy_estimate) { |
641 | 0 | const AcStrategy acs = AcStrategy::FromRawStrategy(acs_raw); |
642 | 0 | for (size_t dy = 0; dy < acs.covered_blocks_y(); ++dy) { |
643 | 0 | for (size_t dx = 0; dx < acs.covered_blocks_x(); ++dx) { |
644 | 0 | entropy_estimate[(cy + dy) * 8 + cx + dx] = 0.0; |
645 | 0 | } |
646 | 0 | } |
647 | 0 | entropy_estimate[cy * 8 + cx] = entropy; |
648 | 0 | } |
649 | | |
650 | 0 | AcStrategyType AcsSquare(size_t blocks) { |
651 | 0 | if (blocks == 2) { |
652 | 0 | return AcStrategyType::DCT16X16; |
653 | 0 | } else if (blocks == 4) { |
654 | 0 | return AcStrategyType::DCT32X32; |
655 | 0 | } else { |
656 | 0 | return AcStrategyType::DCT64X64; |
657 | 0 | } |
658 | 0 | } |
659 | | |
660 | 0 | AcStrategyType AcsVerticalSplit(size_t blocks) { |
661 | 0 | if (blocks == 2) { |
662 | 0 | return AcStrategyType::DCT16X8; |
663 | 0 | } else if (blocks == 4) { |
664 | 0 | return AcStrategyType::DCT32X16; |
665 | 0 | } else { |
666 | 0 | return AcStrategyType::DCT64X32; |
667 | 0 | } |
668 | 0 | } |
669 | | |
670 | 0 | AcStrategyType AcsHorizontalSplit(size_t blocks) { |
671 | 0 | if (blocks == 2) { |
672 | 0 | return AcStrategyType::DCT8X16; |
673 | 0 | } else if (blocks == 4) { |
674 | 0 | return AcStrategyType::DCT16X32; |
675 | 0 | } else { |
676 | 0 | return AcStrategyType::DCT32X64; |
677 | 0 | } |
678 | 0 | } |
679 | | |
680 | | // The following function tries to merge smaller transforms into |
681 | | // squares and the rectangles originating from a single middle division |
682 | | // (horizontal or vertical) fairly. |
683 | | // |
684 | | // This is now generalized to concern about squares |
685 | | // of blocks X blocks size, where a block is 8x8 pixels. |
686 | | Status FindBestFirstLevelDivisionForSquare( |
687 | | size_t blocks, bool allow_square_transform, size_t bx, size_t by, size_t cx, |
688 | | size_t cy, const ACSConfig& config, const float* JXL_RESTRICT cmap_factors, |
689 | | AcStrategyImage* JXL_RESTRICT ac_strategy, const float entropy_mul_JXK, |
690 | | const float entropy_mul_JXJ, float* JXL_RESTRICT entropy_estimate, |
691 | 0 | float* block, float* scratch_space, uint32_t* quantized) { |
692 | | // We denote J for the larger dimension here, and K for the smaller. |
693 | | // For example, for 32x32 block splitting, J would be 32, K 16. |
694 | 0 | const size_t blocks_half = blocks / 2; |
695 | 0 | const AcStrategyType acs_rawJXK = AcsVerticalSplit(blocks); |
696 | 0 | const AcStrategyType acs_rawKXJ = AcsHorizontalSplit(blocks); |
697 | 0 | const AcStrategyType acs_rawJXJ = AcsSquare(blocks); |
698 | 0 | const AcStrategy acsJXK = AcStrategy::FromRawStrategy(acs_rawJXK); |
699 | 0 | const AcStrategy acsKXJ = AcStrategy::FromRawStrategy(acs_rawKXJ); |
700 | 0 | const AcStrategy acsJXJ = AcStrategy::FromRawStrategy(acs_rawJXJ); |
701 | 0 | AcStrategyRow row0 = ac_strategy->ConstRow(by + cy + 0); |
702 | 0 | AcStrategyRow row1 = ac_strategy->ConstRow(by + cy + blocks_half); |
703 | | // Let's check if we can consider a JXJ block here at all. |
704 | | // This is not necessary in the basic use of hierarchically merging |
705 | | // blocks in the simplest possible way, but is needed when we try other |
706 | | // 'floating' options of merging, possibly after a simple hierarchical |
707 | | // merge has been explored. |
708 | 0 | if (MultiBlockTransformCrossesHorizontalBoundary(*ac_strategy, bx + cx, |
709 | 0 | by + cy, bx + cx + blocks) || |
710 | 0 | MultiBlockTransformCrossesHorizontalBoundary( |
711 | 0 | *ac_strategy, bx + cx, by + cy + blocks, bx + cx + blocks) || |
712 | 0 | MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx, by + cy, |
713 | 0 | by + cy + blocks) || |
714 | 0 | MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx + blocks, |
715 | 0 | by + cy, by + cy + blocks)) { |
716 | 0 | return true; // not suitable for JxJ analysis, some transforms leak out. |
717 | 0 | } |
718 | | // For floating transforms there may be |
719 | | // already blocks selected that make either or both JXK and |
720 | | // KXJ not feasible for this location. |
721 | 0 | const bool allow_JXK = !MultiBlockTransformCrossesVerticalBoundary( |
722 | 0 | *ac_strategy, bx + cx + blocks_half, by + cy, by + cy + blocks); |
723 | 0 | const bool allow_KXJ = !MultiBlockTransformCrossesHorizontalBoundary( |
724 | 0 | *ac_strategy, bx + cx, by + cy + blocks_half, bx + cx + blocks); |
725 | | // Current entropies aggregated on NxN resolution. |
726 | 0 | float entropy[2][2] = {}; |
727 | 0 | for (size_t dy = 0; dy < blocks; ++dy) { |
728 | 0 | for (size_t dx = 0; dx < blocks; ++dx) { |
729 | 0 | entropy[dy / blocks_half][dx / blocks_half] += |
730 | 0 | entropy_estimate[(cy + dy) * 8 + (cx + dx)]; |
731 | 0 | } |
732 | 0 | } |
733 | 0 | float entropy_JXK_left = std::numeric_limits<float>::max(); |
734 | 0 | float entropy_JXK_right = std::numeric_limits<float>::max(); |
735 | 0 | float entropy_KXJ_top = std::numeric_limits<float>::max(); |
736 | 0 | float entropy_KXJ_bottom = std::numeric_limits<float>::max(); |
737 | 0 | float entropy_JXJ = std::numeric_limits<float>::max(); |
738 | 0 | if (allow_JXK) { |
739 | 0 | if (row0[bx + cx + 0].Strategy() != acs_rawJXK) { |
740 | 0 | JXL_RETURN_IF_ERROR(EstimateEntropy( |
741 | 0 | acsJXK, entropy_mul_JXK, (bx + cx + 0) * 8, (by + cy + 0) * 8, config, |
742 | 0 | cmap_factors, block, scratch_space, quantized, entropy_JXK_left)); |
743 | 0 | } |
744 | 0 | if (row0[bx + cx + blocks_half].Strategy() != acs_rawJXK) { |
745 | 0 | JXL_RETURN_IF_ERROR( |
746 | 0 | EstimateEntropy(acsJXK, entropy_mul_JXK, (bx + cx + blocks_half) * 8, |
747 | 0 | (by + cy + 0) * 8, config, cmap_factors, block, |
748 | 0 | scratch_space, quantized, entropy_JXK_right)); |
749 | 0 | } |
750 | 0 | } |
751 | 0 | if (allow_KXJ) { |
752 | 0 | if (row0[bx + cx].Strategy() != acs_rawKXJ) { |
753 | 0 | JXL_RETURN_IF_ERROR(EstimateEntropy( |
754 | 0 | acsKXJ, entropy_mul_JXK, (bx + cx + 0) * 8, (by + cy + 0) * 8, config, |
755 | 0 | cmap_factors, block, scratch_space, quantized, entropy_KXJ_top)); |
756 | 0 | } |
757 | 0 | if (row1[bx + cx].Strategy() != acs_rawKXJ) { |
758 | 0 | JXL_RETURN_IF_ERROR( |
759 | 0 | EstimateEntropy(acsKXJ, entropy_mul_JXK, (bx + cx + 0) * 8, |
760 | 0 | (by + cy + blocks_half) * 8, config, cmap_factors, |
761 | 0 | block, scratch_space, quantized, entropy_KXJ_bottom)); |
762 | 0 | } |
763 | 0 | } |
764 | 0 | if (allow_square_transform) { |
765 | | // We control the exploration of the square transform separately so that |
766 | | // we can turn it off at high decoding speeds for 32x32, but still allow |
767 | | // exploring 16x32 and 32x16. |
768 | 0 | JXL_RETURN_IF_ERROR(EstimateEntropy( |
769 | 0 | acsJXJ, entropy_mul_JXJ, (bx + cx + 0) * 8, (by + cy + 0) * 8, config, |
770 | 0 | cmap_factors, block, scratch_space, quantized, entropy_JXJ)); |
771 | 0 | } |
772 | | |
773 | | // Test if this block should have JXK or KXJ transforms, |
774 | | // because it can have only one or the other. |
775 | 0 | float costJxN = std::min(entropy_JXK_left, entropy[0][0] + entropy[1][0]) + |
776 | 0 | std::min(entropy_JXK_right, entropy[0][1] + entropy[1][1]); |
777 | 0 | float costNxJ = std::min(entropy_KXJ_top, entropy[0][0] + entropy[0][1]) + |
778 | 0 | std::min(entropy_KXJ_bottom, entropy[1][0] + entropy[1][1]); |
779 | 0 | if (entropy_JXJ < costJxN && entropy_JXJ < costNxJ) { |
780 | 0 | JXL_RETURN_IF_ERROR(ac_strategy->Set(bx + cx, by + cy, acs_rawJXJ)); |
781 | 0 | SetEntropyForTransform(cx, cy, acs_rawJXJ, entropy_JXJ, entropy_estimate); |
782 | 0 | } else if (costJxN < costNxJ) { |
783 | 0 | if (entropy_JXK_left < entropy[0][0] + entropy[1][0]) { |
784 | 0 | JXL_RETURN_IF_ERROR(ac_strategy->Set(bx + cx, by + cy, acs_rawJXK)); |
785 | 0 | SetEntropyForTransform(cx, cy, acs_rawJXK, entropy_JXK_left, |
786 | 0 | entropy_estimate); |
787 | 0 | } |
788 | 0 | if (entropy_JXK_right < entropy[0][1] + entropy[1][1]) { |
789 | 0 | JXL_RETURN_IF_ERROR( |
790 | 0 | ac_strategy->Set(bx + cx + blocks_half, by + cy, acs_rawJXK)); |
791 | 0 | SetEntropyForTransform(cx + blocks_half, cy, acs_rawJXK, |
792 | 0 | entropy_JXK_right, entropy_estimate); |
793 | 0 | } |
794 | 0 | } else { |
795 | 0 | if (entropy_KXJ_top < entropy[0][0] + entropy[0][1]) { |
796 | 0 | JXL_RETURN_IF_ERROR(ac_strategy->Set(bx + cx, by + cy, acs_rawKXJ)); |
797 | 0 | SetEntropyForTransform(cx, cy, acs_rawKXJ, entropy_KXJ_top, |
798 | 0 | entropy_estimate); |
799 | 0 | } |
800 | 0 | if (entropy_KXJ_bottom < entropy[1][0] + entropy[1][1]) { |
801 | 0 | JXL_RETURN_IF_ERROR( |
802 | 0 | ac_strategy->Set(bx + cx, by + cy + blocks_half, acs_rawKXJ)); |
803 | 0 | SetEntropyForTransform(cx, cy + blocks_half, acs_rawKXJ, |
804 | 0 | entropy_KXJ_bottom, entropy_estimate); |
805 | 0 | } |
806 | 0 | } |
807 | 0 | return true; |
808 | 0 | } |
809 | | |
810 | | Status ProcessRectACS(const CompressParams& cparams, const ACSConfig& config, |
811 | | const Rect& rect, const ColorCorrelationMap& cmap, |
812 | | float* JXL_RESTRICT block, |
813 | | uint32_t* JXL_RESTRICT quantized, |
814 | 0 | AcStrategyImage* ac_strategy) { |
815 | | // Main philosophy here: |
816 | | // 1. First find best 8x8 transform for each area. |
817 | | // 2. Merging them into larger transforms where possibly, but |
818 | | // starting from the smallest transforms (16x8 and 8x16). |
819 | | // Additional complication: 16x8 and 8x16 are considered |
820 | | // simultaneously and fairly against each other. |
821 | | // We are looking at 64x64 squares since the Y-to-X and Y-to-B |
822 | | // maps happen to be at that resolution, and having |
823 | | // integral transforms cross these boundaries leads to |
824 | | // additional complications. |
825 | 0 | const float butteraugli_target = cparams.butteraugli_distance; |
826 | 0 | float* JXL_RESTRICT scratch_space = block + 3 * AcStrategy::kMaxCoeffArea; |
827 | 0 | size_t bx = rect.x0(); |
828 | 0 | size_t by = rect.y0(); |
829 | 0 | JXL_ENSURE(rect.xsize() <= 8); |
830 | 0 | JXL_ENSURE(rect.ysize() <= 8); |
831 | 0 | size_t tx = bx / kColorTileDimInBlocks; |
832 | 0 | size_t ty = by / kColorTileDimInBlocks; |
833 | 0 | const float cmap_factors[3] = { |
834 | 0 | cmap.base().YtoXRatio(cmap.ytox_map.ConstRow(ty)[tx]), |
835 | 0 | 0.0f, |
836 | 0 | cmap.base().YtoBRatio(cmap.ytob_map.ConstRow(ty)[tx]), |
837 | 0 | }; |
838 | 0 | if (cparams.speed_tier > SpeedTier::kHare) return true; |
839 | | // First compute the best 8x8 transform for each square. Later, we do not |
840 | | // experiment with different combinations, but only use the best of the 8x8s |
841 | | // when DCT8X8 is specified in the tree search. |
842 | | // 8x8 transforms have 10 variants, but every larger transform is just a DCT. |
843 | 0 | float entropy_estimate[64] = {}; |
844 | | // Favor all 8x8 transforms (against 16x8 and larger transforms)) at |
845 | | // low butteraugli_target distances. |
846 | 0 | static const float k8x8mul1 = -0.4; |
847 | 0 | static const float k8x8mul2 = 1.0; |
848 | 0 | static const float k8x8base = 1.4; |
849 | 0 | const float mul8x8 = k8x8mul2 + k8x8mul1 / (butteraugli_target + k8x8base); |
850 | 0 | for (size_t iy = 0; iy < rect.ysize(); iy++) { |
851 | 0 | for (size_t ix = 0; ix < rect.xsize(); ix++) { |
852 | 0 | float entropy = 0.0; |
853 | 0 | AcStrategyType best_of_8x8s; |
854 | 0 | JXL_RETURN_IF_ERROR(FindBest8x8Transform( |
855 | 0 | 8 * (bx + ix), 8 * (by + iy), static_cast<int>(cparams.speed_tier), |
856 | 0 | butteraugli_target, config, cmap_factors, ac_strategy, block, |
857 | 0 | scratch_space, quantized, &entropy, best_of_8x8s)); |
858 | 0 | JXL_RETURN_IF_ERROR(ac_strategy->Set(bx + ix, by + iy, best_of_8x8s)); |
859 | 0 | entropy_estimate[iy * 8 + ix] = entropy * mul8x8; |
860 | 0 | } |
861 | 0 | } |
862 | | // Merge when a larger transform is better than the previously |
863 | | // searched best combination of 8x8 transforms. |
864 | 0 | struct MergeTry { |
865 | 0 | AcStrategyType type; |
866 | 0 | uint8_t priority; |
867 | 0 | uint8_t decoding_speed_tier_max_limit; |
868 | 0 | uint8_t encoding_speed_tier_max_limit; |
869 | 0 | float entropy_mul; |
870 | 0 | }; |
871 | | // These numbers need to be figured out manually and looking at |
872 | | // ringing next to sky etc. Optimization will find smaller numbers |
873 | | // and produce more ringing than is ideal. Larger numbers will |
874 | | // help stop ringing. |
875 | 0 | const float entropy_mul16X8 = 1.25; |
876 | 0 | const float entropy_mul16X16 = 1.35; |
877 | 0 | const float entropy_mul16X32 = 1.5; |
878 | 0 | const float entropy_mul32X32 = 1.5; |
879 | 0 | const float entropy_mul64X32 = 2.26; |
880 | 0 | const float entropy_mul64X64 = 2.26; |
881 | | // TODO(jyrki): Consider this feedback in further changes: |
882 | | // Also effectively when the multipliers for smaller blocks are |
883 | | // below 1, this raises the bar for the bigger blocks even higher |
884 | | // in that sense these constants are not independent (e.g. changing |
885 | | // the constant for DCT16x32 by -5% (making it more likely) also |
886 | | // means that DCT32x32 becomes harder to do when starting from |
887 | | // two DCT16x32s). It might be better to make them more independent, |
888 | | // e.g. by not applying the multiplier when storing the new entropy |
889 | | // estimates in TryMergeToACSCandidate(). |
890 | 0 | const MergeTry kTransformsForMerge[9] = { |
891 | 0 | {AcStrategyType::DCT16X8, 2, 4, 5, entropy_mul16X8}, |
892 | 0 | {AcStrategyType::DCT8X16, 2, 4, 5, entropy_mul16X8}, |
893 | | // FindBestFirstLevelDivisionForSquare looks for DCT16X16 and its |
894 | | // subdivisions. {AcStrategyType::DCT16X16, 3, entropy_mul16X16}, |
895 | 0 | {AcStrategyType::DCT16X32, 4, 4, 4, entropy_mul16X32}, |
896 | 0 | {AcStrategyType::DCT32X16, 4, 4, 4, entropy_mul16X32}, |
897 | | // FindBestFirstLevelDivisionForSquare looks for DCT32X32 and its |
898 | | // subdivisions. {AcStrategyType::DCT32X32, 5, 1, 5, |
899 | | // 0.9822994906548809f}, |
900 | 0 | {AcStrategyType::DCT64X32, 6, 1, 3, entropy_mul64X32}, |
901 | 0 | {AcStrategyType::DCT32X64, 6, 1, 3, entropy_mul64X32}, |
902 | | // {AcStrategyType::DCT64X64, 8, 1, 3, 2.0846542128012948f}, |
903 | 0 | }; |
904 | | /* |
905 | | These sizes not yet included in merge heuristic: |
906 | | set(AcStrategyType::DCT32X8, 0.0f, 2.261390410971102f); |
907 | | set(AcStrategyType::DCT8X32, 0.0f, 2.261390410971102f); |
908 | | set(AcStrategyType::DCT128X128, 0.0f, 1.0f); |
909 | | set(AcStrategyType::DCT128X64, 0.0f, 0.73f); |
910 | | set(AcStrategyType::DCT64X128, 0.0f, 0.73f); |
911 | | set(AcStrategyType::DCT256X256, 0.0f, 1.0f); |
912 | | set(AcStrategyType::DCT256X128, 0.0f, 0.73f); |
913 | | set(AcStrategyType::DCT128X256, 0.0f, 0.73f); |
914 | | */ |
915 | | |
916 | | // Priority is a tricky kludge to avoid collisions so that transforms |
917 | | // don't overlap. |
918 | 0 | uint8_t priority[64] = {}; |
919 | 0 | bool enable_32x32 = cparams.decoding_speed_tier < 4; |
920 | 0 | for (auto tx : kTransformsForMerge) { |
921 | 0 | if (tx.decoding_speed_tier_max_limit < cparams.decoding_speed_tier) { |
922 | 0 | continue; |
923 | 0 | } |
924 | 0 | AcStrategy acs = AcStrategy::FromRawStrategy(tx.type); |
925 | |
|
926 | 0 | for (size_t cy = 0; cy + acs.covered_blocks_y() - 1 < rect.ysize(); |
927 | 0 | cy += acs.covered_blocks_y()) { |
928 | 0 | for (size_t cx = 0; cx + acs.covered_blocks_x() - 1 < rect.xsize(); |
929 | 0 | cx += acs.covered_blocks_x()) { |
930 | 0 | if (cy + 7 < rect.ysize() && cx + 7 < rect.xsize()) { |
931 | 0 | if (cparams.decoding_speed_tier < 4 && |
932 | 0 | tx.type == AcStrategyType::DCT32X64) { |
933 | | // We handle both DCT8X16 and DCT16X8 at the same time. |
934 | 0 | if ((cy | cx) % 8 == 0) { |
935 | 0 | JXL_RETURN_IF_ERROR(FindBestFirstLevelDivisionForSquare( |
936 | 0 | 8, true, bx, by, cx, cy, config, cmap_factors, ac_strategy, |
937 | 0 | tx.entropy_mul, entropy_mul64X64, entropy_estimate, block, |
938 | 0 | scratch_space, quantized)); |
939 | 0 | } |
940 | 0 | continue; |
941 | 0 | } else if (tx.type == AcStrategyType::DCT32X16) { |
942 | | // We handled both DCT8X16 and DCT16X8 at the same time, |
943 | | // and that is above. The last column and last row, |
944 | | // when the last column or last row is odd numbered, |
945 | | // are still handled by TryMergeAcs. |
946 | 0 | continue; |
947 | 0 | } |
948 | 0 | } |
949 | 0 | if ((tx.type == AcStrategyType::DCT16X32 && cy % 4 != 0) || |
950 | 0 | (tx.type == AcStrategyType::DCT32X16 && cx % 4 != 0)) { |
951 | | // already covered by FindBest32X32 |
952 | 0 | continue; |
953 | 0 | } |
954 | | |
955 | 0 | if (cy + 3 < rect.ysize() && cx + 3 < rect.xsize()) { |
956 | 0 | if (tx.type == AcStrategyType::DCT16X32) { |
957 | | // We handle both DCT8X16 and DCT16X8 at the same time. |
958 | 0 | if ((cy | cx) % 4 == 0) { |
959 | 0 | JXL_RETURN_IF_ERROR(FindBestFirstLevelDivisionForSquare( |
960 | 0 | 4, enable_32x32, bx, by, cx, cy, config, cmap_factors, |
961 | 0 | ac_strategy, tx.entropy_mul, entropy_mul32X32, |
962 | 0 | entropy_estimate, block, scratch_space, quantized)); |
963 | 0 | } |
964 | 0 | continue; |
965 | 0 | } else if (tx.type == AcStrategyType::DCT32X16) { |
966 | | // We handled both DCT8X16 and DCT16X8 at the same time, |
967 | | // and that is above. The last column and last row, |
968 | | // when the last column or last row is odd numbered, |
969 | | // are still handled by TryMergeAcs. |
970 | 0 | continue; |
971 | 0 | } |
972 | 0 | } |
973 | 0 | if ((tx.type == AcStrategyType::DCT16X32 && cy % 4 != 0) || |
974 | 0 | (tx.type == AcStrategyType::DCT32X16 && cx % 4 != 0)) { |
975 | | // already covered by FindBest32X32 |
976 | 0 | continue; |
977 | 0 | } |
978 | 0 | if (cy + 1 < rect.ysize() && cx + 1 < rect.xsize()) { |
979 | 0 | if (tx.type == AcStrategyType::DCT8X16) { |
980 | | // We handle both DCT8X16 and DCT16X8 at the same time. |
981 | 0 | if ((cy | cx) % 2 == 0) { |
982 | 0 | JXL_RETURN_IF_ERROR(FindBestFirstLevelDivisionForSquare( |
983 | 0 | 2, true, bx, by, cx, cy, config, cmap_factors, ac_strategy, |
984 | 0 | tx.entropy_mul, entropy_mul16X16, entropy_estimate, block, |
985 | 0 | scratch_space, quantized)); |
986 | 0 | } |
987 | 0 | continue; |
988 | 0 | } else if (tx.type == AcStrategyType::DCT16X8) { |
989 | | // We handled both DCT8X16 and DCT16X8 at the same time, |
990 | | // and that is above. The last column and last row, |
991 | | // when the last column or last row is odd numbered, |
992 | | // are still handled by TryMergeAcs. |
993 | 0 | continue; |
994 | 0 | } |
995 | 0 | } |
996 | 0 | if ((tx.type == AcStrategyType::DCT8X16 && cy % 2 == 1) || |
997 | 0 | (tx.type == AcStrategyType::DCT16X8 && cx % 2 == 1)) { |
998 | | // already covered by FindBestFirstLevelDivisionForSquare |
999 | 0 | continue; |
1000 | 0 | } |
1001 | | // All other merge sizes are handled here. |
1002 | | // Some of the DCT16X8s and DCT8X16s will still leak through here |
1003 | | // when there is an odd number of 8x8 blocks, then the last row |
1004 | | // and column will get their DCT16X8s and DCT8X16s through the |
1005 | | // normal integral transform merging process. |
1006 | 0 | JXL_RETURN_IF_ERROR( |
1007 | 0 | TryMergeAcs(tx.type, bx, by, cx, cy, config, cmap_factors, |
1008 | 0 | ac_strategy, tx.entropy_mul, tx.priority, &priority[0], |
1009 | 0 | entropy_estimate, block, scratch_space, quantized)); |
1010 | 0 | } |
1011 | 0 | } |
1012 | 0 | } |
1013 | 0 | if (cparams.speed_tier >= SpeedTier::kHare) { |
1014 | 0 | return true; |
1015 | 0 | } |
1016 | | // Here we still try to do some non-aligned matching, find a few more |
1017 | | // 16X8, 8X16 and 16X16s between the non-2-aligned blocks. |
1018 | 0 | for (size_t cy = 0; cy + 1 < rect.ysize(); ++cy) { |
1019 | 0 | for (size_t cx = 0; cx + 1 < rect.xsize(); ++cx) { |
1020 | 0 | if ((cy | cx) % 2 != 0) { |
1021 | 0 | JXL_RETURN_IF_ERROR(FindBestFirstLevelDivisionForSquare( |
1022 | 0 | 2, true, bx, by, cx, cy, config, cmap_factors, ac_strategy, |
1023 | 0 | entropy_mul16X8, entropy_mul16X16, entropy_estimate, block, |
1024 | 0 | scratch_space, quantized)); |
1025 | 0 | } |
1026 | 0 | } |
1027 | 0 | } |
1028 | | // Non-aligned matching for 32X32, 16X32 and 32X16. |
1029 | 0 | size_t step = cparams.speed_tier >= SpeedTier::kTortoise ? 2 : 1; |
1030 | 0 | for (size_t cy = 0; cy + 3 < rect.ysize(); cy += step) { |
1031 | 0 | for (size_t cx = 0; cx + 3 < rect.xsize(); cx += step) { |
1032 | 0 | if ((cy | cx) % 4 == 0) { |
1033 | 0 | continue; // Already tried with loop above (DCT16X32 case). |
1034 | 0 | } |
1035 | 0 | JXL_RETURN_IF_ERROR(FindBestFirstLevelDivisionForSquare( |
1036 | 0 | 4, enable_32x32, bx, by, cx, cy, config, cmap_factors, ac_strategy, |
1037 | 0 | entropy_mul16X32, entropy_mul32X32, entropy_estimate, block, |
1038 | 0 | scratch_space, quantized)); |
1039 | 0 | } |
1040 | 0 | } |
1041 | 0 | return true; |
1042 | 0 | } |
1043 | | |
1044 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
1045 | | } // namespace HWY_NAMESPACE |
1046 | | } // namespace jxl |
1047 | | HWY_AFTER_NAMESPACE(); |
1048 | | |
1049 | | #if HWY_ONCE |
1050 | | namespace jxl { |
1051 | | HWY_EXPORT(ProcessRectACS); |
1052 | | |
1053 | | Status AcStrategyHeuristics::Init(const Image3F& src, const Rect& rect_in, |
1054 | | const ImageF& quant_field, const ImageF& mask, |
1055 | | const ImageF& mask1x1, |
1056 | 0 | DequantMatrices* matrices) { |
1057 | 0 | config.dequant = matrices; |
1058 | |
|
1059 | 0 | if (cparams.speed_tier >= SpeedTier::kCheetah) { |
1060 | 0 | JXL_RETURN_IF_ERROR( |
1061 | 0 | matrices->EnsureComputed(memory_manager, 1)); // DCT8 only |
1062 | 0 | } else { |
1063 | 0 | uint32_t acs_mask = 0; |
1064 | | // All transforms up to 64x64. |
1065 | 0 | for (size_t i = 0; i < static_cast<size_t>(AcStrategyType::DCT128X128); |
1066 | 0 | i++) { |
1067 | 0 | acs_mask |= (1 << i); |
1068 | 0 | } |
1069 | 0 | JXL_RETURN_IF_ERROR(matrices->EnsureComputed(memory_manager, acs_mask)); |
1070 | 0 | } |
1071 | | |
1072 | | // Image row pointers and strides. |
1073 | 0 | config.quant_field_row = quant_field.Row(0); |
1074 | 0 | config.quant_field_stride = quant_field.PixelsPerRow(); |
1075 | 0 | if (mask.xsize() > 0 && mask.ysize() > 0) { |
1076 | 0 | config.masking_field_row = mask.Row(0); |
1077 | 0 | config.masking_field_stride = mask.PixelsPerRow(); |
1078 | 0 | } |
1079 | 0 | config.mask1x1_xsize = mask1x1.xsize(); |
1080 | 0 | if (mask1x1.xsize() > 0 && mask1x1.ysize() > 0) { |
1081 | 0 | config.masking1x1_field_row = mask1x1.Row(0); |
1082 | 0 | config.masking1x1_field_stride = mask1x1.PixelsPerRow(); |
1083 | 0 | } |
1084 | |
|
1085 | 0 | config.src_rows[0] = rect_in.ConstPlaneRow(src, 0, 0); |
1086 | 0 | config.src_rows[1] = rect_in.ConstPlaneRow(src, 1, 0); |
1087 | 0 | config.src_rows[2] = rect_in.ConstPlaneRow(src, 2, 0); |
1088 | 0 | config.src_stride = src.PixelsPerRow(); |
1089 | | |
1090 | | // Entropy estimate is composed of two factors: |
1091 | | // - estimate of the number of bits that will be used by the block |
1092 | | // - information loss due to quantization |
1093 | | // The following constant controls the relative weights of these components. |
1094 | 0 | config.info_loss_multiplier = 1.2; |
1095 | 0 | config.zeros_mul = 9.3089059022677905; |
1096 | 0 | config.cost_delta = 10.833273317067883; |
1097 | |
|
1098 | 0 | static const float kBias = 0.13731742964354549; |
1099 | 0 | const float ratio = (cparams.butteraugli_distance + kBias) / (1.0f + kBias); |
1100 | |
|
1101 | 0 | static const float kPow1 = 0.33677806662454718; |
1102 | 0 | static const float kPow2 = 0.50990926717963703; |
1103 | 0 | static const float kPow3 = 0.36702940662370243; |
1104 | 0 | config.info_loss_multiplier *= std::pow(ratio, kPow1); |
1105 | 0 | config.zeros_mul *= std::pow(ratio, kPow2); |
1106 | 0 | config.cost_delta *= std::pow(ratio, kPow3); |
1107 | 0 | return true; |
1108 | 0 | } |
1109 | | |
1110 | 0 | Status AcStrategyHeuristics::PrepareForThreads(std::size_t num_threads) { |
1111 | 0 | const size_t dct_scratch_size = |
1112 | 0 | 3 * (MaxVectorSize() / sizeof(float)) * AcStrategy::kMaxBlockDim; |
1113 | 0 | mem_per_thread = 6 * AcStrategy::kMaxCoeffArea + dct_scratch_size; |
1114 | 0 | size_t mem_bytes = num_threads * mem_per_thread * sizeof(float); |
1115 | 0 | JXL_ASSIGN_OR_RETURN(mem, AlignedMemory::Create(memory_manager, mem_bytes)); |
1116 | 0 | qmem_per_thread = AcStrategy::kMaxCoeffArea; |
1117 | 0 | size_t qmem_bytes = num_threads * qmem_per_thread * sizeof(uint32_t); |
1118 | 0 | JXL_ASSIGN_OR_RETURN(qmem, AlignedMemory::Create(memory_manager, qmem_bytes)); |
1119 | 0 | return true; |
1120 | 0 | } |
1121 | | |
1122 | | Status AcStrategyHeuristics::ProcessRect(const Rect& rect, |
1123 | | const ColorCorrelationMap& cmap, |
1124 | | AcStrategyImage* ac_strategy, |
1125 | 0 | size_t thread) { |
1126 | | // In Falcon mode, use DCT8 everywhere and uniform quantization. |
1127 | 0 | if (cparams.speed_tier >= SpeedTier::kCheetah) { |
1128 | 0 | ac_strategy->FillDCT8(rect); |
1129 | 0 | return true; |
1130 | 0 | } |
1131 | 0 | return HWY_DYNAMIC_DISPATCH(ProcessRectACS)( |
1132 | 0 | cparams, config, rect, cmap, |
1133 | 0 | mem.address<float>() + thread * mem_per_thread, |
1134 | 0 | qmem.address<uint32_t>() + thread * qmem_per_thread, ac_strategy); |
1135 | 0 | } |
1136 | | |
1137 | | Status AcStrategyHeuristics::Finalize(const FrameDimensions& frame_dim, |
1138 | | const AcStrategyImage& ac_strategy, |
1139 | 0 | AuxOut* aux_out) { |
1140 | | // Accounting and debug output. |
1141 | 0 | if (aux_out != nullptr) { |
1142 | 0 | aux_out->num_small_blocks = |
1143 | 0 | ac_strategy.CountBlocks(AcStrategyType::IDENTITY) + |
1144 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT2X2) + |
1145 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT4X4); |
1146 | 0 | aux_out->num_dct4x8_blocks = |
1147 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT4X8) + |
1148 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT8X4); |
1149 | 0 | aux_out->num_afv_blocks = ac_strategy.CountBlocks(AcStrategyType::AFV0) + |
1150 | 0 | ac_strategy.CountBlocks(AcStrategyType::AFV1) + |
1151 | 0 | ac_strategy.CountBlocks(AcStrategyType::AFV2) + |
1152 | 0 | ac_strategy.CountBlocks(AcStrategyType::AFV3); |
1153 | 0 | aux_out->num_dct8_blocks = ac_strategy.CountBlocks(AcStrategyType::DCT); |
1154 | 0 | aux_out->num_dct8x16_blocks = |
1155 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT8X16) + |
1156 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT16X8); |
1157 | 0 | aux_out->num_dct8x32_blocks = |
1158 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT8X32) + |
1159 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT32X8); |
1160 | 0 | aux_out->num_dct16_blocks = |
1161 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT16X16); |
1162 | 0 | aux_out->num_dct16x32_blocks = |
1163 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT16X32) + |
1164 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT32X16); |
1165 | 0 | aux_out->num_dct32_blocks = |
1166 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT32X32); |
1167 | 0 | aux_out->num_dct32x64_blocks = |
1168 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT32X64) + |
1169 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT64X32); |
1170 | 0 | aux_out->num_dct64_blocks = |
1171 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT64X64); |
1172 | 0 | } |
1173 | |
|
1174 | 0 | if (JXL_DEBUG_AC_STRATEGY && WantDebugOutput(cparams)) { |
1175 | 0 | JXL_RETURN_IF_ERROR(DumpAcStrategy(ac_strategy, frame_dim.xsize, |
1176 | 0 | frame_dim.ysize, "ac_strategy", aux_out, |
1177 | 0 | cparams)); |
1178 | 0 | } |
1179 | 0 | return true; |
1180 | 0 | } |
1181 | | |
1182 | | } // namespace jxl |
1183 | | #endif // HWY_ONCE |