/src/libjxl/lib/jxl/enc_ac_strategy.cc
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/enc_ac_strategy.h" |
7 | | |
8 | | #include <algorithm> |
9 | | #include <cmath> |
10 | | #include <cstdint> |
11 | | #include <cstdio> |
12 | | #include <cstring> |
13 | | #include <limits> |
14 | | |
15 | | #include "lib/jxl/chroma_from_luma.h" |
16 | | #include "lib/jxl/common.h" |
17 | | #include "lib/jxl/frame_dimensions.h" |
18 | | #include "lib/jxl/image.h" |
19 | | #include "lib/jxl/memory_manager_internal.h" |
20 | | #include "lib/jxl/quant_weights.h" |
21 | | |
22 | | #undef HWY_TARGET_INCLUDE |
23 | | #define HWY_TARGET_INCLUDE "lib/jxl/enc_ac_strategy.cc" |
24 | | #include <hwy/foreach_target.h> |
25 | | #include <hwy/highway.h> |
26 | | |
27 | | #include "lib/jxl/ac_strategy.h" |
28 | | #include "lib/jxl/base/bits.h" |
29 | | #include "lib/jxl/base/compiler_specific.h" |
30 | | #include "lib/jxl/base/fast_math-inl.h" |
31 | | #include "lib/jxl/base/rect.h" |
32 | | #include "lib/jxl/base/status.h" |
33 | | #include "lib/jxl/dec_transforms-inl.h" |
34 | | #include "lib/jxl/enc_aux_out.h" |
35 | | #include "lib/jxl/enc_debug_image.h" |
36 | | #include "lib/jxl/enc_params.h" |
37 | | #include "lib/jxl/enc_transforms-inl.h" |
38 | | #include "lib/jxl/simd_util.h" |
39 | | |
40 | | // Some of the floating point constants in this file and in other |
41 | | // files in the libjxl project have been obtained using the |
42 | | // tools/optimizer/simplex_fork.py tool. It is a variation of |
43 | | // Nelder-Mead optimization, and we generally try to minimize |
44 | | // BPP * pnorm aggregate as reported by the benchmark_xl tool, |
45 | | // but occasionally the values are optimized by using additional |
46 | | // constraints such as maintaining a certain density, or ratio of |
47 | | // popularity of integral transforms. Jyrki visually reviews all |
48 | | // such changes and often makes manual changes to maintain good |
49 | | // visual quality to changes where butteraugli was not sufficiently |
50 | | // sensitive to some kind of degradation. Unfortunately image quality |
51 | | // is still more of an art than science. |
52 | | |
53 | | // Set JXL_DEBUG_AC_STRATEGY to 1 to enable debugging. |
54 | | #ifndef JXL_DEBUG_AC_STRATEGY |
55 | 0 | #define JXL_DEBUG_AC_STRATEGY 0 |
56 | | #endif |
57 | | |
58 | | // This must come before the begin/end_target, but HWY_ONCE is only true |
59 | | // after that, so use an "include guard". |
60 | | #ifndef LIB_JXL_ENC_AC_STRATEGY_ |
61 | | #define LIB_JXL_ENC_AC_STRATEGY_ |
62 | | // Parameters of the heuristic are marked with a OPTIMIZE comment. |
63 | | namespace jxl { |
64 | | namespace { |
65 | | |
66 | | // Debugging utilities. |
67 | | |
68 | | // Returns a linear sRGB color (as bytes) for each AC strategy. |
69 | 0 | const uint8_t* TypeColor(uint8_t raw_strategy) { |
70 | 0 | JXL_DASSERT(AcStrategy::IsRawStrategyValid(raw_strategy)); |
71 | 0 | static_assert(AcStrategy::kNumValidStrategies == 27, "Update colors"); |
72 | 0 | static constexpr uint8_t kColors[AcStrategy::kNumValidStrategies + 1][3] = { |
73 | 0 | {0xFF, 0xFF, 0x00}, // DCT8 | yellow |
74 | 0 | {0xFF, 0x80, 0x80}, // HORNUSS | vivid tangerine |
75 | 0 | {0xFF, 0x80, 0x80}, // DCT2x2 | vivid tangerine |
76 | 0 | {0xFF, 0x80, 0x80}, // DCT4x4 | vivid tangerine |
77 | 0 | {0x80, 0xFF, 0x00}, // DCT16x16 | chartreuse |
78 | 0 | {0x00, 0xC0, 0x00}, // DCT32x32 | waystone green |
79 | 0 | {0xC0, 0xFF, 0x00}, // DCT16x8 | lime |
80 | 0 | {0xC0, 0xFF, 0x00}, // DCT8x16 | lime |
81 | 0 | {0x00, 0xFF, 0x00}, // DCT32x8 | green |
82 | 0 | {0x00, 0xFF, 0x00}, // DCT8x32 | green |
83 | 0 | {0x00, 0xFF, 0x00}, // DCT32x16 | green |
84 | 0 | {0x00, 0xFF, 0x00}, // DCT16x32 | green |
85 | 0 | {0xFF, 0x80, 0x00}, // DCT4x8 | orange juice |
86 | 0 | {0xFF, 0x80, 0x00}, // DCT8x4 | orange juice |
87 | 0 | {0xFF, 0xFF, 0x80}, // AFV0 | butter |
88 | 0 | {0xFF, 0xFF, 0x80}, // AFV1 | butter |
89 | 0 | {0xFF, 0xFF, 0x80}, // AFV2 | butter |
90 | 0 | {0xFF, 0xFF, 0x80}, // AFV3 | butter |
91 | 0 | {0x00, 0xC0, 0xFF}, // DCT64x64 | capri |
92 | 0 | {0x00, 0xFF, 0xFF}, // DCT64x32 | aqua |
93 | 0 | {0x00, 0xFF, 0xFF}, // DCT32x64 | aqua |
94 | 0 | {0x00, 0x40, 0xFF}, // DCT128x128 | rare blue |
95 | 0 | {0x00, 0x80, 0xFF}, // DCT128x64 | magic ink |
96 | 0 | {0x00, 0x80, 0xFF}, // DCT64x128 | magic ink |
97 | 0 | {0x00, 0x00, 0xC0}, // DCT256x256 | keese blue |
98 | 0 | {0x00, 0x00, 0xFF}, // DCT256x128 | blue |
99 | 0 | {0x00, 0x00, 0xFF}, // DCT128x256 | blue |
100 | 0 | {0x00, 0x00, 0x00} // invalid | black |
101 | 0 | }; |
102 | 0 | raw_strategy = |
103 | 0 | Clamp1<uint8_t>(raw_strategy, 0, AcStrategy::kNumValidStrategies); |
104 | 0 | return kColors[raw_strategy]; |
105 | 0 | } |
106 | | |
107 | 0 | const uint8_t* TypeMask(uint8_t raw_strategy) { |
108 | 0 | JXL_DASSERT(AcStrategy::IsRawStrategyValid(raw_strategy)); |
109 | 0 | static_assert(AcStrategy::kNumValidStrategies == 27, "Update masks"); |
110 | 0 | // implicitly, first row and column is made dark |
111 | 0 | static constexpr uint8_t kMask[AcStrategy::kNumValidStrategies + 1][64] = { |
112 | 0 | { |
113 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
114 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
115 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
116 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
117 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
118 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
119 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
120 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
121 | 0 | }, // DCT8 |
122 | 0 | { |
123 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
124 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
125 | 0 | 0, 0, 1, 0, 0, 1, 0, 0, // |
126 | 0 | 0, 0, 1, 0, 0, 1, 0, 0, // |
127 | 0 | 0, 0, 1, 1, 1, 1, 0, 0, // |
128 | 0 | 0, 0, 1, 0, 0, 1, 0, 0, // |
129 | 0 | 0, 0, 1, 0, 0, 1, 0, 0, // |
130 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
131 | 0 | }, // HORNUSS |
132 | 0 | { |
133 | 0 | 1, 1, 1, 1, 1, 1, 1, 1, // |
134 | 0 | 1, 0, 1, 0, 1, 0, 1, 0, // |
135 | 0 | 1, 1, 1, 1, 1, 1, 1, 1, // |
136 | 0 | 1, 0, 1, 0, 1, 0, 1, 0, // |
137 | 0 | 1, 1, 1, 1, 1, 1, 1, 1, // |
138 | 0 | 1, 0, 1, 0, 1, 0, 1, 0, // |
139 | 0 | 1, 1, 1, 1, 1, 1, 1, 1, // |
140 | 0 | 1, 0, 1, 0, 1, 0, 1, 0, // |
141 | 0 | }, // 2x2 |
142 | 0 | { |
143 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
144 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
145 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
146 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
147 | 0 | 1, 1, 1, 1, 1, 1, 1, 1, // |
148 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
149 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
150 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
151 | 0 | }, // 4x4 |
152 | 0 | {}, // DCT16x16 (unused) |
153 | 0 | {}, // DCT32x32 (unused) |
154 | 0 | {}, // DCT16x8 (unused) |
155 | 0 | {}, // DCT8x16 (unused) |
156 | 0 | {}, // DCT32x8 (unused) |
157 | 0 | {}, // DCT8x32 (unused) |
158 | 0 | {}, // DCT32x16 (unused) |
159 | 0 | {}, // DCT16x32 (unused) |
160 | 0 | { |
161 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
162 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
163 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
164 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
165 | 0 | 1, 1, 1, 1, 1, 1, 1, 1, // |
166 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
167 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
168 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
169 | 0 | }, // DCT4x8 |
170 | 0 | { |
171 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
172 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
173 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
174 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
175 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
176 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
177 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
178 | 0 | 0, 0, 0, 0, 1, 0, 0, 0, // |
179 | 0 | }, // DCT8x4 |
180 | 0 | { |
181 | 0 | 1, 1, 1, 1, 1, 0, 0, 0, // |
182 | 0 | 1, 1, 1, 1, 0, 0, 0, 0, // |
183 | 0 | 1, 1, 1, 0, 0, 0, 0, 0, // |
184 | 0 | 1, 1, 0, 0, 0, 0, 0, 0, // |
185 | 0 | 1, 0, 0, 0, 0, 0, 0, 0, // |
186 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
187 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
188 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
189 | 0 | }, // AFV0 |
190 | 0 | { |
191 | 0 | 0, 0, 0, 0, 1, 1, 1, 1, // |
192 | 0 | 0, 0, 0, 0, 0, 1, 1, 1, // |
193 | 0 | 0, 0, 0, 0, 0, 0, 1, 1, // |
194 | 0 | 0, 0, 0, 0, 0, 0, 0, 1, // |
195 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
196 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
197 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
198 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
199 | 0 | }, // AFV1 |
200 | 0 | { |
201 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
202 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
203 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
204 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
205 | 0 | 1, 0, 0, 0, 0, 0, 0, 0, // |
206 | 0 | 1, 1, 0, 0, 0, 0, 0, 0, // |
207 | 0 | 1, 1, 1, 0, 0, 0, 0, 0, // |
208 | 0 | 1, 1, 1, 1, 0, 0, 0, 0, // |
209 | 0 | }, // AFV2 |
210 | 0 | { |
211 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
212 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
213 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
214 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
215 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, // |
216 | 0 | 0, 0, 0, 0, 0, 0, 0, 1, // |
217 | 0 | 0, 0, 0, 0, 0, 0, 1, 1, // |
218 | 0 | 0, 0, 0, 0, 0, 1, 1, 1, // |
219 | 0 | }, // AFV3 |
220 | 0 | {} // invalid |
221 | 0 | }; |
222 | 0 | raw_strategy = |
223 | 0 | Clamp1<uint8_t>(raw_strategy, 0, AcStrategy::kNumValidStrategies); |
224 | 0 | return kMask[raw_strategy]; |
225 | 0 | } |
226 | | |
227 | | Status DumpAcStrategy(const AcStrategyImage& ac_strategy, size_t xsize, |
228 | | size_t ysize, const char* tag, AuxOut* aux_out, |
229 | 0 | const CompressParams& cparams) { |
230 | 0 | JxlMemoryManager* memory_manager = ac_strategy.memory_manager(); |
231 | 0 | JXL_ASSIGN_OR_RETURN(Image3F color_acs, |
232 | 0 | Image3F::Create(memory_manager, xsize, ysize)); |
233 | 0 | for (size_t y = 0; y < ysize; y++) { |
234 | 0 | float* JXL_RESTRICT rows[3] = { |
235 | 0 | color_acs.PlaneRow(0, y), |
236 | 0 | color_acs.PlaneRow(1, y), |
237 | 0 | color_acs.PlaneRow(2, y), |
238 | 0 | }; |
239 | 0 | const AcStrategyRow acs_row = ac_strategy.ConstRow(y / kBlockDim); |
240 | 0 | for (size_t x = 0; x < xsize; x++) { |
241 | 0 | AcStrategy acs = acs_row[x / kBlockDim]; |
242 | 0 | const uint8_t* JXL_RESTRICT color = TypeColor(acs.RawStrategy()); |
243 | 0 | for (size_t c = 0; c < 3; c++) { |
244 | 0 | rows[c][x] = color[c] / 255.f; |
245 | 0 | } |
246 | 0 | } |
247 | 0 | } |
248 | 0 | size_t stride = color_acs.PixelsPerRow(); |
249 | 0 | for (size_t c = 0; c < 3; c++) { |
250 | 0 | for (size_t by = 0; by < DivCeil(ysize, kBlockDim); by++) { |
251 | 0 | float* JXL_RESTRICT row = color_acs.PlaneRow(c, by * kBlockDim); |
252 | 0 | const AcStrategyRow acs_row = ac_strategy.ConstRow(by); |
253 | 0 | for (size_t bx = 0; bx < DivCeil(xsize, kBlockDim); bx++) { |
254 | 0 | AcStrategy acs = acs_row[bx]; |
255 | 0 | if (!acs.IsFirstBlock()) continue; |
256 | 0 | const uint8_t* JXL_RESTRICT color = TypeColor(acs.RawStrategy()); |
257 | 0 | const uint8_t* JXL_RESTRICT mask = TypeMask(acs.RawStrategy()); |
258 | 0 | if (acs.covered_blocks_x() == 1 && acs.covered_blocks_y() == 1) { |
259 | 0 | for (size_t iy = 0; iy < kBlockDim && by * kBlockDim + iy < ysize; |
260 | 0 | iy++) { |
261 | 0 | for (size_t ix = 0; ix < kBlockDim && bx * kBlockDim + ix < xsize; |
262 | 0 | ix++) { |
263 | 0 | if (mask[iy * kBlockDim + ix]) { |
264 | 0 | row[iy * stride + bx * kBlockDim + ix] = color[c] / 800.f; |
265 | 0 | } |
266 | 0 | } |
267 | 0 | } |
268 | 0 | } |
269 | 0 | // draw block edges |
270 | 0 | for (size_t ix = 0; ix < kBlockDim * acs.covered_blocks_x() && |
271 | 0 | bx * kBlockDim + ix < xsize; |
272 | 0 | ix++) { |
273 | 0 | row[0 * stride + bx * kBlockDim + ix] = color[c] / 350.f; |
274 | 0 | } |
275 | 0 | for (size_t iy = 0; iy < kBlockDim * acs.covered_blocks_y() && |
276 | 0 | by * kBlockDim + iy < ysize; |
277 | 0 | iy++) { |
278 | 0 | row[iy * stride + bx * kBlockDim + 0] = color[c] / 350.f; |
279 | 0 | } |
280 | 0 | } |
281 | 0 | } |
282 | 0 | } |
283 | 0 | return DumpImage(cparams, tag, color_acs); |
284 | 0 | } |
285 | | |
286 | | } // namespace |
287 | | } // namespace jxl |
288 | | #endif // LIB_JXL_ENC_AC_STRATEGY_ |
289 | | |
290 | | HWY_BEFORE_NAMESPACE(); |
291 | | namespace jxl { |
292 | | namespace HWY_NAMESPACE { |
293 | | |
294 | | // These templates are not found via ADL. |
295 | | using hwy::HWY_NAMESPACE::AbsDiff; |
296 | | using hwy::HWY_NAMESPACE::Eq; |
297 | | using hwy::HWY_NAMESPACE::IfThenElseZero; |
298 | | using hwy::HWY_NAMESPACE::IfThenZeroElse; |
299 | | using hwy::HWY_NAMESPACE::Round; |
300 | | using hwy::HWY_NAMESPACE::Sqrt; |
301 | | |
302 | | bool MultiBlockTransformCrossesHorizontalBoundary( |
303 | | const AcStrategyImage& ac_strategy, size_t start_x, size_t y, |
304 | 0 | size_t end_x) { |
305 | 0 | if (start_x >= ac_strategy.xsize() || y >= ac_strategy.ysize()) { |
306 | 0 | return false; |
307 | 0 | } |
308 | 0 | if (y % 8 == 0) { |
309 | | // Nothing crosses 64x64 boundaries, and the memory on the other side |
310 | | // of the 64x64 block may still uninitialized. |
311 | 0 | return false; |
312 | 0 | } |
313 | 0 | end_x = std::min(end_x, ac_strategy.xsize()); |
314 | | // The first multiblock might be before the start_x, let's adjust it |
315 | | // to point to the first IsFirstBlock() == true block we find by backward |
316 | | // tracing. |
317 | 0 | AcStrategyRow row = ac_strategy.ConstRow(y); |
318 | 0 | const size_t start_x_limit = start_x & ~7; |
319 | 0 | while (start_x != start_x_limit && !row[start_x].IsFirstBlock()) { |
320 | 0 | --start_x; |
321 | 0 | } |
322 | 0 | for (size_t x = start_x; x < end_x;) { |
323 | 0 | if (row[x].IsFirstBlock()) { |
324 | 0 | x += row[x].covered_blocks_x(); |
325 | 0 | } else { |
326 | 0 | return true; |
327 | 0 | } |
328 | 0 | } |
329 | 0 | return false; |
330 | 0 | } |
331 | | |
332 | | bool MultiBlockTransformCrossesVerticalBoundary( |
333 | | const AcStrategyImage& ac_strategy, size_t x, size_t start_y, |
334 | 0 | size_t end_y) { |
335 | 0 | if (x >= ac_strategy.xsize() || start_y >= ac_strategy.ysize()) { |
336 | 0 | return false; |
337 | 0 | } |
338 | 0 | if (x % 8 == 0) { |
339 | | // Nothing crosses 64x64 boundaries, and the memory on the other side |
340 | | // of the 64x64 block may still uninitialized. |
341 | 0 | return false; |
342 | 0 | } |
343 | 0 | end_y = std::min(end_y, ac_strategy.ysize()); |
344 | | // The first multiblock might be before the start_y, let's adjust it |
345 | | // to point to the first IsFirstBlock() == true block we find by backward |
346 | | // tracing. |
347 | 0 | const size_t start_y_limit = start_y & ~7; |
348 | 0 | while (start_y != start_y_limit && |
349 | 0 | !ac_strategy.ConstRow(start_y)[x].IsFirstBlock()) { |
350 | 0 | --start_y; |
351 | 0 | } |
352 | |
|
353 | 0 | for (size_t y = start_y; y < end_y;) { |
354 | 0 | AcStrategyRow row = ac_strategy.ConstRow(y); |
355 | 0 | if (row[x].IsFirstBlock()) { |
356 | 0 | y += row[x].covered_blocks_y(); |
357 | 0 | } else { |
358 | 0 | return true; |
359 | 0 | } |
360 | 0 | } |
361 | 0 | return false; |
362 | 0 | } |
363 | | |
364 | | Status EstimateEntropy(const AcStrategy& acs, float entropy_mul, size_t x, |
365 | | size_t y, const ACSConfig& config, |
366 | | const float* JXL_RESTRICT cmap_factors, float* block, |
367 | | float* full_scratch_space, uint32_t* quantized, |
368 | 0 | float& entropy) { |
369 | 0 | entropy = 0.0f; |
370 | 0 | float* mem = full_scratch_space; |
371 | 0 | float* scratch_space = full_scratch_space + AcStrategy::kMaxCoeffArea; |
372 | 0 | const size_t size = (1 << acs.log2_covered_blocks()) * kDCTBlockSize; |
373 | | |
374 | | // Apply transform. |
375 | 0 | for (size_t c = 0; c < 3; c++) { |
376 | 0 | float* JXL_RESTRICT block_c = block + size * c; |
377 | 0 | TransformFromPixels(acs.Strategy(), &config.Pixel(c, x, y), |
378 | 0 | config.src_stride, block_c, scratch_space); |
379 | 0 | } |
380 | 0 | HWY_FULL(float) df; |
381 | |
|
382 | 0 | const size_t num_blocks = acs.covered_blocks_x() * acs.covered_blocks_y(); |
383 | | // avoid large blocks when there is a lot going on in red-green. |
384 | 0 | float quant_norm16 = 0; |
385 | 0 | if (num_blocks == 1) { |
386 | | // When it is only one 8x8, we don't need aggregation of values. |
387 | 0 | quant_norm16 = config.Quant(x / 8, y / 8); |
388 | 0 | } else if (num_blocks == 2) { |
389 | | // Taking max instead of 8th norm seems to work |
390 | | // better for smallest blocks up to 16x8. Jyrki couldn't get |
391 | | // improvements in trying the same for 16x16 blocks. |
392 | 0 | if (acs.covered_blocks_y() == 2) { |
393 | 0 | quant_norm16 = |
394 | 0 | std::max(config.Quant(x / 8, y / 8), config.Quant(x / 8, y / 8 + 1)); |
395 | 0 | } else { |
396 | 0 | quant_norm16 = |
397 | 0 | std::max(config.Quant(x / 8, y / 8), config.Quant(x / 8 + 1, y / 8)); |
398 | 0 | } |
399 | 0 | } else { |
400 | | // Load QF value, calculate empirical heuristic on masking field |
401 | | // for weighting the information loss. Information loss manifests |
402 | | // itself as ringing, and masking could hide it. |
403 | 0 | for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { |
404 | 0 | for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) { |
405 | 0 | float qval = config.Quant(x / 8 + ix, y / 8 + iy); |
406 | 0 | qval *= qval; |
407 | 0 | qval *= qval; |
408 | 0 | qval *= qval; |
409 | 0 | quant_norm16 += qval * qval; |
410 | 0 | } |
411 | 0 | } |
412 | 0 | quant_norm16 /= num_blocks; |
413 | 0 | quant_norm16 = FastPowf(quant_norm16, 1.0f / 16.0f); |
414 | 0 | } |
415 | 0 | const auto quant = Set(df, quant_norm16); |
416 | | |
417 | | // Compute entropy. |
418 | 0 | const HWY_CAPPED(float, 8) df8; |
419 | |
|
420 | 0 | auto loss = Zero(df8); |
421 | 0 | for (size_t c = 0; c < 3; c++) { |
422 | 0 | const float* inv_matrix = config.dequant->InvMatrix(acs.Strategy(), c); |
423 | 0 | const float* matrix = config.dequant->Matrix(acs.Strategy(), c); |
424 | 0 | const auto cmap_factor = Set(df, cmap_factors[c]); |
425 | |
|
426 | 0 | auto entropy_v = Zero(df); |
427 | 0 | auto nzeros_v = Zero(df); |
428 | 0 | for (size_t i = 0; i < num_blocks * kDCTBlockSize; i += Lanes(df)) { |
429 | 0 | const auto in = Load(df, block + c * size + i); |
430 | 0 | const auto in_y = Mul(Load(df, block + size + i), cmap_factor); |
431 | 0 | const auto im = Load(df, inv_matrix + i); |
432 | 0 | const auto val = Mul(Sub(in, in_y), Mul(im, quant)); |
433 | 0 | const auto rval = Round(val); |
434 | 0 | const auto diff = Sub(val, rval); |
435 | 0 | const auto m = Load(df, matrix + i); |
436 | 0 | Store(Mul(m, diff), df, &mem[i]); |
437 | 0 | const auto q = Abs(rval); |
438 | 0 | const auto q_is_zero = Eq(q, Zero(df)); |
439 | | // We used to have q * C here, but that cost model seems to |
440 | | // be punishing large values more than necessary. Sqrt tries |
441 | | // to avoid large values less aggressively. |
442 | 0 | entropy_v = Add(Sqrt(q), entropy_v); |
443 | 0 | nzeros_v = Add(nzeros_v, IfThenZeroElse(q_is_zero, Set(df, 1.0f))); |
444 | 0 | } |
445 | |
|
446 | 0 | { |
447 | 0 | float masku_lut[3] = { |
448 | 0 | 12.0, |
449 | 0 | 0.0, |
450 | 0 | 4.0, |
451 | 0 | }; |
452 | 0 | auto masku_off = Set(df8, masku_lut[c]); |
453 | 0 | auto lossc = Zero(df8); |
454 | 0 | TransformToPixels(acs.Strategy(), &mem[0], block, |
455 | 0 | acs.covered_blocks_x() * 8, scratch_space); |
456 | |
|
457 | 0 | for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { |
458 | 0 | for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) { |
459 | 0 | for (size_t dy = 0; dy < kBlockDim; ++dy) { |
460 | 0 | for (size_t dx = 0; dx < kBlockDim; dx += Lanes(df8)) { |
461 | 0 | auto in = Load(df8, block + |
462 | 0 | (iy * kBlockDim + dy) * |
463 | 0 | (acs.covered_blocks_x() * kBlockDim) + |
464 | 0 | ix * kBlockDim + dx); |
465 | 0 | if (x + ix * 8 + dx + Lanes(df8) <= config.mask1x1_xsize) { |
466 | 0 | auto masku = |
467 | 0 | Add(Load(df8, config.MaskingPtr1x1(x + ix * 8 + dx, |
468 | 0 | y + iy * 8 + dy)), |
469 | 0 | masku_off); |
470 | 0 | in = Mul(masku, in); |
471 | 0 | in = Mul(in, in); |
472 | 0 | in = Mul(in, in); |
473 | 0 | in = Mul(in, in); |
474 | 0 | lossc = Add(lossc, in); |
475 | 0 | } |
476 | 0 | } |
477 | 0 | } |
478 | 0 | } |
479 | 0 | } |
480 | 0 | static const double kChannelMul[3] = { |
481 | 0 | pow(8.2, 8.0), |
482 | 0 | pow(1.0, 8.0), |
483 | 0 | pow(1.03, 8.0), |
484 | 0 | }; |
485 | 0 | lossc = Mul(Set(df8, kChannelMul[c]), lossc); |
486 | 0 | loss = Add(loss, lossc); |
487 | 0 | } |
488 | 0 | entropy += config.cost_delta * GetLane(SumOfLanes(df, entropy_v)); |
489 | 0 | size_t num_nzeros = GetLane(SumOfLanes(df, nzeros_v)); |
490 | | // Add #bit of num_nonzeros, as an estimate of the cost for encoding the |
491 | | // number of non-zeros of the block. |
492 | 0 | size_t nbits = CeilLog2Nonzero(num_nzeros + 1) + 1; |
493 | | // Also add #bit of #bit of num_nonzeros, to estimate the ANS cost, with a |
494 | | // bias. |
495 | 0 | entropy += config.zeros_mul * (CeilLog2Nonzero(nbits + 17) + nbits); |
496 | 0 | if (c == 0 && num_blocks >= 2) { |
497 | | // It is X channel (red-green) and we often see ringing |
498 | | // in the large blocks. Let's punish that more here. |
499 | 0 | float w = 1.0 + std::min(3.0, num_blocks / 8.0); |
500 | 0 | entropy *= w; |
501 | 0 | loss = Mul(loss, Set(df8, w)); |
502 | 0 | } |
503 | 0 | } |
504 | 0 | float loss_scalar = |
505 | 0 | pow(GetLane(SumOfLanes(df8, loss)) / (num_blocks * kDCTBlockSize), |
506 | 0 | 1.0f / 8.0f) * |
507 | 0 | (num_blocks * kDCTBlockSize) / quant_norm16; |
508 | 0 | entropy *= entropy_mul; |
509 | 0 | entropy += config.info_loss_multiplier * loss_scalar; |
510 | 0 | return true; |
511 | 0 | } |
512 | | |
513 | | Status FindBest8x8Transform(size_t x, size_t y, int encoding_speed_tier, |
514 | | float butteraugli_target, const ACSConfig& config, |
515 | | const float* JXL_RESTRICT cmap_factors, |
516 | | AcStrategyImage* JXL_RESTRICT ac_strategy, |
517 | | float* block, float* scratch_space, |
518 | | uint32_t* quantized, float* entropy_out, |
519 | 0 | AcStrategyType& best_tx) { |
520 | 0 | struct TransformTry8x8 { |
521 | 0 | AcStrategyType type; |
522 | 0 | int encoding_speed_tier_max_limit; |
523 | 0 | double entropy_mul; |
524 | 0 | }; |
525 | 0 | static const TransformTry8x8 kTransforms8x8[] = { |
526 | 0 | { |
527 | 0 | AcStrategyType::DCT, |
528 | 0 | 9, |
529 | 0 | 0.8, |
530 | 0 | }, |
531 | 0 | { |
532 | 0 | AcStrategyType::DCT4X4, |
533 | 0 | 5, |
534 | 0 | 1.08, |
535 | 0 | }, |
536 | 0 | { |
537 | 0 | AcStrategyType::DCT2X2, |
538 | 0 | 5, |
539 | 0 | 0.95, |
540 | 0 | }, |
541 | 0 | { |
542 | 0 | AcStrategyType::DCT4X8, |
543 | 0 | 4, |
544 | 0 | 0.85931637428340035, |
545 | 0 | }, |
546 | 0 | { |
547 | 0 | AcStrategyType::DCT8X4, |
548 | 0 | 4, |
549 | 0 | 0.85931637428340035, |
550 | 0 | }, |
551 | 0 | { |
552 | 0 | AcStrategyType::IDENTITY, |
553 | 0 | 5, |
554 | 0 | 1.0427542510634957, |
555 | 0 | }, |
556 | 0 | { |
557 | 0 | AcStrategyType::AFV0, |
558 | 0 | 4, |
559 | 0 | 0.81779489591359944, |
560 | 0 | }, |
561 | 0 | { |
562 | 0 | AcStrategyType::AFV1, |
563 | 0 | 4, |
564 | 0 | 0.81779489591359944, |
565 | 0 | }, |
566 | 0 | { |
567 | 0 | AcStrategyType::AFV2, |
568 | 0 | 4, |
569 | 0 | 0.81779489591359944, |
570 | 0 | }, |
571 | 0 | { |
572 | 0 | AcStrategyType::AFV3, |
573 | 0 | 4, |
574 | 0 | 0.81779489591359944, |
575 | 0 | }, |
576 | 0 | }; |
577 | 0 | double best = 1e30; |
578 | 0 | best_tx = kTransforms8x8[0].type; |
579 | 0 | for (auto tx : kTransforms8x8) { |
580 | 0 | if (tx.encoding_speed_tier_max_limit < encoding_speed_tier) { |
581 | 0 | continue; |
582 | 0 | } |
583 | 0 | AcStrategy acs = AcStrategy::FromRawStrategy(tx.type); |
584 | 0 | float entropy_mul = tx.entropy_mul / kTransforms8x8[0].entropy_mul; |
585 | 0 | if ((tx.type == AcStrategyType::DCT2X2 || |
586 | 0 | tx.type == AcStrategyType::IDENTITY) && |
587 | 0 | butteraugli_target < 5.0) { |
588 | 0 | static const float kFavor2X2AtHighQuality = 0.4; |
589 | 0 | float weight = pow((5.0f - butteraugli_target) / 5.0f, 2.0f); |
590 | 0 | entropy_mul -= kFavor2X2AtHighQuality * weight; |
591 | 0 | } |
592 | 0 | if ((tx.type != AcStrategyType::DCT && tx.type != AcStrategyType::DCT2X2 && |
593 | 0 | tx.type != AcStrategyType::IDENTITY) && |
594 | 0 | butteraugli_target > 4.0) { |
595 | 0 | static const float kAvoidEntropyOfTransforms = 0.5; |
596 | 0 | float mul = 1.0; |
597 | 0 | if (butteraugli_target < 12.0) { |
598 | 0 | mul *= (12.0 - 4.0) / (butteraugli_target - 4.0); |
599 | 0 | } |
600 | 0 | entropy_mul += kAvoidEntropyOfTransforms * mul; |
601 | 0 | } |
602 | 0 | float entropy; |
603 | 0 | JXL_RETURN_IF_ERROR(EstimateEntropy(acs, entropy_mul, x, y, config, |
604 | 0 | cmap_factors, block, scratch_space, |
605 | 0 | quantized, entropy)); |
606 | 0 | if (entropy < best) { |
607 | 0 | best_tx = tx.type; |
608 | 0 | best = entropy; |
609 | 0 | } |
610 | 0 | } |
611 | 0 | *entropy_out = best; |
612 | 0 | return true; |
613 | 0 | } |
614 | | |
615 | | // bx, by addresses the 64x64 block at 8x8 subresolution |
616 | | // cx, cy addresses the left, upper 8x8 block position of the candidate |
617 | | // transform. |
618 | | Status TryMergeAcs(AcStrategyType acs_raw, size_t bx, size_t by, size_t cx, |
619 | | size_t cy, const ACSConfig& config, |
620 | | const float* JXL_RESTRICT cmap_factors, |
621 | | AcStrategyImage* JXL_RESTRICT ac_strategy, |
622 | | const float entropy_mul, const uint8_t candidate_priority, |
623 | | uint8_t* priority, float* JXL_RESTRICT entropy_estimate, |
624 | 0 | float* block, float* scratch_space, uint32_t* quantized) { |
625 | 0 | AcStrategy acs = AcStrategy::FromRawStrategy(acs_raw); |
626 | 0 | float entropy_current = 0; |
627 | 0 | for (size_t iy = 0; iy < acs.covered_blocks_y(); ++iy) { |
628 | 0 | for (size_t ix = 0; ix < acs.covered_blocks_x(); ++ix) { |
629 | 0 | if (priority[(cy + iy) * 8 + (cx + ix)] >= candidate_priority) { |
630 | | // Transform would reuse already allocated blocks and |
631 | | // lead to invalid overlaps, for example DCT64X32 vs. |
632 | | // DCT32X64. |
633 | 0 | return true; |
634 | 0 | } |
635 | 0 | entropy_current += entropy_estimate[(cy + iy) * 8 + (cx + ix)]; |
636 | 0 | } |
637 | 0 | } |
638 | 0 | float entropy_candidate; |
639 | 0 | JXL_RETURN_IF_ERROR(EstimateEntropy( |
640 | 0 | acs, entropy_mul, (bx + cx) * 8, (by + cy) * 8, config, cmap_factors, |
641 | 0 | block, scratch_space, quantized, entropy_candidate)); |
642 | 0 | if (entropy_candidate >= entropy_current) return true; |
643 | | // Accept the candidate. |
644 | 0 | for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { |
645 | 0 | for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) { |
646 | 0 | entropy_estimate[(cy + iy) * 8 + cx + ix] = 0; |
647 | 0 | priority[(cy + iy) * 8 + cx + ix] = candidate_priority; |
648 | 0 | } |
649 | 0 | } |
650 | 0 | JXL_RETURN_IF_ERROR(ac_strategy->Set(bx + cx, by + cy, acs_raw)); |
651 | 0 | entropy_estimate[cy * 8 + cx] = entropy_candidate; |
652 | 0 | return true; |
653 | 0 | } |
654 | | |
655 | | static void SetEntropyForTransform(size_t cx, size_t cy, |
656 | | const AcStrategyType acs_raw, float entropy, |
657 | 0 | float* JXL_RESTRICT entropy_estimate) { |
658 | 0 | const AcStrategy acs = AcStrategy::FromRawStrategy(acs_raw); |
659 | 0 | for (size_t dy = 0; dy < acs.covered_blocks_y(); ++dy) { |
660 | 0 | for (size_t dx = 0; dx < acs.covered_blocks_x(); ++dx) { |
661 | 0 | entropy_estimate[(cy + dy) * 8 + cx + dx] = 0.0; |
662 | 0 | } |
663 | 0 | } |
664 | 0 | entropy_estimate[cy * 8 + cx] = entropy; |
665 | 0 | } |
666 | | |
667 | 0 | AcStrategyType AcsSquare(size_t blocks) { |
668 | 0 | if (blocks == 2) { |
669 | 0 | return AcStrategyType::DCT16X16; |
670 | 0 | } else if (blocks == 4) { |
671 | 0 | return AcStrategyType::DCT32X32; |
672 | 0 | } else { |
673 | 0 | return AcStrategyType::DCT64X64; |
674 | 0 | } |
675 | 0 | } |
676 | | |
677 | 0 | AcStrategyType AcsVerticalSplit(size_t blocks) { |
678 | 0 | if (blocks == 2) { |
679 | 0 | return AcStrategyType::DCT16X8; |
680 | 0 | } else if (blocks == 4) { |
681 | 0 | return AcStrategyType::DCT32X16; |
682 | 0 | } else { |
683 | 0 | return AcStrategyType::DCT64X32; |
684 | 0 | } |
685 | 0 | } |
686 | | |
687 | 0 | AcStrategyType AcsHorizontalSplit(size_t blocks) { |
688 | 0 | if (blocks == 2) { |
689 | 0 | return AcStrategyType::DCT8X16; |
690 | 0 | } else if (blocks == 4) { |
691 | 0 | return AcStrategyType::DCT16X32; |
692 | 0 | } else { |
693 | 0 | return AcStrategyType::DCT32X64; |
694 | 0 | } |
695 | 0 | } |
696 | | |
697 | | // The following function tries to merge smaller transforms into |
698 | | // squares and the rectangles originating from a single middle division |
699 | | // (horizontal or vertical) fairly. |
700 | | // |
701 | | // This is now generalized to concern about squares |
702 | | // of blocks X blocks size, where a block is 8x8 pixels. |
703 | | Status FindBestFirstLevelDivisionForSquare( |
704 | | size_t blocks, bool allow_square_transform, size_t bx, size_t by, size_t cx, |
705 | | size_t cy, const ACSConfig& config, const float* JXL_RESTRICT cmap_factors, |
706 | | AcStrategyImage* JXL_RESTRICT ac_strategy, const float entropy_mul_JXK, |
707 | | const float entropy_mul_JXJ, float* JXL_RESTRICT entropy_estimate, |
708 | 0 | float* block, float* scratch_space, uint32_t* quantized) { |
709 | | // We denote J for the larger dimension here, and K for the smaller. |
710 | | // For example, for 32x32 block splitting, J would be 32, K 16. |
711 | 0 | const size_t blocks_half = blocks / 2; |
712 | 0 | const AcStrategyType acs_rawJXK = AcsVerticalSplit(blocks); |
713 | 0 | const AcStrategyType acs_rawKXJ = AcsHorizontalSplit(blocks); |
714 | 0 | const AcStrategyType acs_rawJXJ = AcsSquare(blocks); |
715 | 0 | const AcStrategy acsJXK = AcStrategy::FromRawStrategy(acs_rawJXK); |
716 | 0 | const AcStrategy acsKXJ = AcStrategy::FromRawStrategy(acs_rawKXJ); |
717 | 0 | const AcStrategy acsJXJ = AcStrategy::FromRawStrategy(acs_rawJXJ); |
718 | 0 | AcStrategyRow row0 = ac_strategy->ConstRow(by + cy + 0); |
719 | 0 | AcStrategyRow row1 = ac_strategy->ConstRow(by + cy + blocks_half); |
720 | | // Let's check if we can consider a JXJ block here at all. |
721 | | // This is not necessary in the basic use of hierarchically merging |
722 | | // blocks in the simplest possible way, but is needed when we try other |
723 | | // 'floating' options of merging, possibly after a simple hierarchical |
724 | | // merge has been explored. |
725 | 0 | if (MultiBlockTransformCrossesHorizontalBoundary(*ac_strategy, bx + cx, |
726 | 0 | by + cy, bx + cx + blocks) || |
727 | 0 | MultiBlockTransformCrossesHorizontalBoundary( |
728 | 0 | *ac_strategy, bx + cx, by + cy + blocks, bx + cx + blocks) || |
729 | 0 | MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx, by + cy, |
730 | 0 | by + cy + blocks) || |
731 | 0 | MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx + blocks, |
732 | 0 | by + cy, by + cy + blocks)) { |
733 | 0 | return true; // not suitable for JxJ analysis, some transforms leak out. |
734 | 0 | } |
735 | | // For floating transforms there may be |
736 | | // already blocks selected that make either or both JXK and |
737 | | // KXJ not feasible for this location. |
738 | 0 | const bool allow_JXK = !MultiBlockTransformCrossesVerticalBoundary( |
739 | 0 | *ac_strategy, bx + cx + blocks_half, by + cy, by + cy + blocks); |
740 | 0 | const bool allow_KXJ = !MultiBlockTransformCrossesHorizontalBoundary( |
741 | 0 | *ac_strategy, bx + cx, by + cy + blocks_half, bx + cx + blocks); |
742 | | // Current entropies aggregated on NxN resolution. |
743 | 0 | float entropy[2][2] = {}; |
744 | 0 | for (size_t dy = 0; dy < blocks; ++dy) { |
745 | 0 | for (size_t dx = 0; dx < blocks; ++dx) { |
746 | 0 | entropy[dy / blocks_half][dx / blocks_half] += |
747 | 0 | entropy_estimate[(cy + dy) * 8 + (cx + dx)]; |
748 | 0 | } |
749 | 0 | } |
750 | 0 | float entropy_JXK_left = std::numeric_limits<float>::max(); |
751 | 0 | float entropy_JXK_right = std::numeric_limits<float>::max(); |
752 | 0 | float entropy_KXJ_top = std::numeric_limits<float>::max(); |
753 | 0 | float entropy_KXJ_bottom = std::numeric_limits<float>::max(); |
754 | 0 | float entropy_JXJ = std::numeric_limits<float>::max(); |
755 | 0 | if (allow_JXK) { |
756 | 0 | if (row0[bx + cx + 0].Strategy() != acs_rawJXK) { |
757 | 0 | JXL_RETURN_IF_ERROR(EstimateEntropy( |
758 | 0 | acsJXK, entropy_mul_JXK, (bx + cx + 0) * 8, (by + cy + 0) * 8, config, |
759 | 0 | cmap_factors, block, scratch_space, quantized, entropy_JXK_left)); |
760 | 0 | } |
761 | 0 | if (row0[bx + cx + blocks_half].Strategy() != acs_rawJXK) { |
762 | 0 | JXL_RETURN_IF_ERROR( |
763 | 0 | EstimateEntropy(acsJXK, entropy_mul_JXK, (bx + cx + blocks_half) * 8, |
764 | 0 | (by + cy + 0) * 8, config, cmap_factors, block, |
765 | 0 | scratch_space, quantized, entropy_JXK_right)); |
766 | 0 | } |
767 | 0 | } |
768 | 0 | if (allow_KXJ) { |
769 | 0 | if (row0[bx + cx].Strategy() != acs_rawKXJ) { |
770 | 0 | JXL_RETURN_IF_ERROR(EstimateEntropy( |
771 | 0 | acsKXJ, entropy_mul_JXK, (bx + cx + 0) * 8, (by + cy + 0) * 8, config, |
772 | 0 | cmap_factors, block, scratch_space, quantized, entropy_KXJ_top)); |
773 | 0 | } |
774 | 0 | if (row1[bx + cx].Strategy() != acs_rawKXJ) { |
775 | 0 | JXL_RETURN_IF_ERROR( |
776 | 0 | EstimateEntropy(acsKXJ, entropy_mul_JXK, (bx + cx + 0) * 8, |
777 | 0 | (by + cy + blocks_half) * 8, config, cmap_factors, |
778 | 0 | block, scratch_space, quantized, entropy_KXJ_bottom)); |
779 | 0 | } |
780 | 0 | } |
781 | 0 | if (allow_square_transform) { |
782 | | // We control the exploration of the square transform separately so that |
783 | | // we can turn it off at high decoding speeds for 32x32, but still allow |
784 | | // exploring 16x32 and 32x16. |
785 | 0 | JXL_RETURN_IF_ERROR(EstimateEntropy( |
786 | 0 | acsJXJ, entropy_mul_JXJ, (bx + cx + 0) * 8, (by + cy + 0) * 8, config, |
787 | 0 | cmap_factors, block, scratch_space, quantized, entropy_JXJ)); |
788 | 0 | } |
789 | | |
790 | | // Test if this block should have JXK or KXJ transforms, |
791 | | // because it can have only one or the other. |
792 | 0 | float costJxN = std::min(entropy_JXK_left, entropy[0][0] + entropy[1][0]) + |
793 | 0 | std::min(entropy_JXK_right, entropy[0][1] + entropy[1][1]); |
794 | 0 | float costNxJ = std::min(entropy_KXJ_top, entropy[0][0] + entropy[0][1]) + |
795 | 0 | std::min(entropy_KXJ_bottom, entropy[1][0] + entropy[1][1]); |
796 | 0 | if (entropy_JXJ < costJxN && entropy_JXJ < costNxJ) { |
797 | 0 | JXL_RETURN_IF_ERROR(ac_strategy->Set(bx + cx, by + cy, acs_rawJXJ)); |
798 | 0 | SetEntropyForTransform(cx, cy, acs_rawJXJ, entropy_JXJ, entropy_estimate); |
799 | 0 | } else if (costJxN < costNxJ) { |
800 | 0 | if (entropy_JXK_left < entropy[0][0] + entropy[1][0]) { |
801 | 0 | JXL_RETURN_IF_ERROR(ac_strategy->Set(bx + cx, by + cy, acs_rawJXK)); |
802 | 0 | SetEntropyForTransform(cx, cy, acs_rawJXK, entropy_JXK_left, |
803 | 0 | entropy_estimate); |
804 | 0 | } |
805 | 0 | if (entropy_JXK_right < entropy[0][1] + entropy[1][1]) { |
806 | 0 | JXL_RETURN_IF_ERROR( |
807 | 0 | ac_strategy->Set(bx + cx + blocks_half, by + cy, acs_rawJXK)); |
808 | 0 | SetEntropyForTransform(cx + blocks_half, cy, acs_rawJXK, |
809 | 0 | entropy_JXK_right, entropy_estimate); |
810 | 0 | } |
811 | 0 | } else { |
812 | 0 | if (entropy_KXJ_top < entropy[0][0] + entropy[0][1]) { |
813 | 0 | JXL_RETURN_IF_ERROR(ac_strategy->Set(bx + cx, by + cy, acs_rawKXJ)); |
814 | 0 | SetEntropyForTransform(cx, cy, acs_rawKXJ, entropy_KXJ_top, |
815 | 0 | entropy_estimate); |
816 | 0 | } |
817 | 0 | if (entropy_KXJ_bottom < entropy[1][0] + entropy[1][1]) { |
818 | 0 | JXL_RETURN_IF_ERROR( |
819 | 0 | ac_strategy->Set(bx + cx, by + cy + blocks_half, acs_rawKXJ)); |
820 | 0 | SetEntropyForTransform(cx, cy + blocks_half, acs_rawKXJ, |
821 | 0 | entropy_KXJ_bottom, entropy_estimate); |
822 | 0 | } |
823 | 0 | } |
824 | 0 | return true; |
825 | 0 | } |
826 | | |
827 | | Status ProcessRectACS(const CompressParams& cparams, const ACSConfig& config, |
828 | | const Rect& rect, const ColorCorrelationMap& cmap, |
829 | | float* JXL_RESTRICT block, |
830 | | uint32_t* JXL_RESTRICT quantized, |
831 | 0 | AcStrategyImage* ac_strategy) { |
832 | | // Main philosophy here: |
833 | | // 1. First find best 8x8 transform for each area. |
834 | | // 2. Merging them into larger transforms where possibly, but |
835 | | // starting from the smallest transforms (16x8 and 8x16). |
836 | | // Additional complication: 16x8 and 8x16 are considered |
837 | | // simultaneously and fairly against each other. |
838 | | // We are looking at 64x64 squares since the Y-to-X and Y-to-B |
839 | | // maps happen to be at that resolution, and having |
840 | | // integral transforms cross these boundaries leads to |
841 | | // additional complications. |
842 | 0 | const float butteraugli_target = cparams.butteraugli_distance; |
843 | 0 | float* JXL_RESTRICT scratch_space = block + 3 * AcStrategy::kMaxCoeffArea; |
844 | 0 | size_t bx = rect.x0(); |
845 | 0 | size_t by = rect.y0(); |
846 | 0 | JXL_ENSURE(rect.xsize() <= 8); |
847 | 0 | JXL_ENSURE(rect.ysize() <= 8); |
848 | 0 | size_t tx = bx / kColorTileDimInBlocks; |
849 | 0 | size_t ty = by / kColorTileDimInBlocks; |
850 | 0 | const float cmap_factors[3] = { |
851 | 0 | cmap.base().YtoXRatio(cmap.ytox_map.ConstRow(ty)[tx]), |
852 | 0 | 0.0f, |
853 | 0 | cmap.base().YtoBRatio(cmap.ytob_map.ConstRow(ty)[tx]), |
854 | 0 | }; |
855 | 0 | if (cparams.speed_tier > SpeedTier::kHare) return true; |
856 | | // First compute the best 8x8 transform for each square. Later, we do not |
857 | | // experiment with different combinations, but only use the best of the 8x8s |
858 | | // when DCT8X8 is specified in the tree search. |
859 | | // 8x8 transforms have 10 variants, but every larger transform is just a DCT. |
860 | 0 | float entropy_estimate[64] = {}; |
861 | | // Favor all 8x8 transforms (against 16x8 and larger transforms)) at |
862 | | // low butteraugli_target distances. |
863 | 0 | static const float k8x8mul1 = -0.4; |
864 | 0 | static const float k8x8mul2 = 1.0; |
865 | 0 | static const float k8x8base = 1.4; |
866 | 0 | const float mul8x8 = k8x8mul2 + k8x8mul1 / (butteraugli_target + k8x8base); |
867 | 0 | for (size_t iy = 0; iy < rect.ysize(); iy++) { |
868 | 0 | for (size_t ix = 0; ix < rect.xsize(); ix++) { |
869 | 0 | float entropy = 0.0; |
870 | 0 | AcStrategyType best_of_8x8s; |
871 | 0 | JXL_RETURN_IF_ERROR(FindBest8x8Transform( |
872 | 0 | 8 * (bx + ix), 8 * (by + iy), static_cast<int>(cparams.speed_tier), |
873 | 0 | butteraugli_target, config, cmap_factors, ac_strategy, block, |
874 | 0 | scratch_space, quantized, &entropy, best_of_8x8s)); |
875 | 0 | JXL_RETURN_IF_ERROR(ac_strategy->Set(bx + ix, by + iy, best_of_8x8s)); |
876 | 0 | entropy_estimate[iy * 8 + ix] = entropy * mul8x8; |
877 | 0 | } |
878 | 0 | } |
879 | | // Merge when a larger transform is better than the previously |
880 | | // searched best combination of 8x8 transforms. |
881 | 0 | struct MergeTry { |
882 | 0 | AcStrategyType type; |
883 | 0 | uint8_t priority; |
884 | 0 | uint8_t decoding_speed_tier_max_limit; |
885 | 0 | uint8_t encoding_speed_tier_max_limit; |
886 | 0 | float entropy_mul; |
887 | 0 | }; |
888 | | // These numbers need to be figured out manually and looking at |
889 | | // ringing next to sky etc. Optimization will find smaller numbers |
890 | | // and produce more ringing than is ideal. Larger numbers will |
891 | | // help stop ringing. |
892 | 0 | const float entropy_mul16X8 = 1.21; |
893 | 0 | const float entropy_mul16X16 = 1.34; |
894 | 0 | const float entropy_mul16X32 = 1.49; |
895 | 0 | const float entropy_mul32X32 = 1.48; |
896 | 0 | const float entropy_mul64X32 = 2.25; |
897 | 0 | const float entropy_mul64X64 = 2.25; |
898 | | // TODO(jyrki): Consider this feedback in further changes: |
899 | | // Also effectively when the multipliers for smaller blocks are |
900 | | // below 1, this raises the bar for the bigger blocks even higher |
901 | | // in that sense these constants are not independent (e.g. changing |
902 | | // the constant for DCT16x32 by -5% (making it more likely) also |
903 | | // means that DCT32x32 becomes harder to do when starting from |
904 | | // two DCT16x32s). It might be better to make them more independent, |
905 | | // e.g. by not applying the multiplier when storing the new entropy |
906 | | // estimates in TryMergeToACSCandidate(). |
907 | 0 | const MergeTry kTransformsForMerge[9] = { |
908 | 0 | {AcStrategyType::DCT16X8, 2, 4, 5, entropy_mul16X8}, |
909 | 0 | {AcStrategyType::DCT8X16, 2, 4, 5, entropy_mul16X8}, |
910 | | // FindBestFirstLevelDivisionForSquare looks for DCT16X16 and its |
911 | | // subdivisions. {AcStrategyType::DCT16X16, 3, entropy_mul16X16}, |
912 | 0 | {AcStrategyType::DCT16X32, 4, 4, 4, entropy_mul16X32}, |
913 | 0 | {AcStrategyType::DCT32X16, 4, 4, 4, entropy_mul16X32}, |
914 | | // FindBestFirstLevelDivisionForSquare looks for DCT32X32 and its |
915 | | // subdivisions. {AcStrategyType::DCT32X32, 5, 1, 5, |
916 | | // 0.9822994906548809f}, |
917 | 0 | {AcStrategyType::DCT64X32, 6, 1, 3, entropy_mul64X32}, |
918 | 0 | {AcStrategyType::DCT32X64, 6, 1, 3, entropy_mul64X32}, |
919 | | // {AcStrategyType::DCT64X64, 8, 1, 3, 2.0846542128012948f}, |
920 | 0 | }; |
921 | | /* |
922 | | These sizes not yet included in merge heuristic: |
923 | | set(AcStrategyType::DCT32X8, 0.0f, 2.261390410971102f); |
924 | | set(AcStrategyType::DCT8X32, 0.0f, 2.261390410971102f); |
925 | | set(AcStrategyType::DCT128X128, 0.0f, 1.0f); |
926 | | set(AcStrategyType::DCT128X64, 0.0f, 0.73f); |
927 | | set(AcStrategyType::DCT64X128, 0.0f, 0.73f); |
928 | | set(AcStrategyType::DCT256X256, 0.0f, 1.0f); |
929 | | set(AcStrategyType::DCT256X128, 0.0f, 0.73f); |
930 | | set(AcStrategyType::DCT128X256, 0.0f, 0.73f); |
931 | | */ |
932 | | |
933 | | // Priority is a tricky kludge to avoid collisions so that transforms |
934 | | // don't overlap. |
935 | 0 | uint8_t priority[64] = {}; |
936 | 0 | bool enable_32x32 = cparams.decoding_speed_tier < 4; |
937 | 0 | for (auto mt : kTransformsForMerge) { |
938 | 0 | if (mt.decoding_speed_tier_max_limit < cparams.decoding_speed_tier) { |
939 | 0 | continue; |
940 | 0 | } |
941 | 0 | AcStrategy acs = AcStrategy::FromRawStrategy(mt.type); |
942 | |
|
943 | 0 | for (size_t cy = 0; cy + acs.covered_blocks_y() - 1 < rect.ysize(); |
944 | 0 | cy += acs.covered_blocks_y()) { |
945 | 0 | for (size_t cx = 0; cx + acs.covered_blocks_x() - 1 < rect.xsize(); |
946 | 0 | cx += acs.covered_blocks_x()) { |
947 | 0 | if (cy + 7 < rect.ysize() && cx + 7 < rect.xsize()) { |
948 | 0 | if (cparams.decoding_speed_tier < 4 && |
949 | 0 | mt.type == AcStrategyType::DCT32X64) { |
950 | | // We handle both DCT8X16 and DCT16X8 at the same time. |
951 | 0 | if ((cy | cx) % 8 == 0) { |
952 | 0 | JXL_RETURN_IF_ERROR(FindBestFirstLevelDivisionForSquare( |
953 | 0 | 8, true, bx, by, cx, cy, config, cmap_factors, ac_strategy, |
954 | 0 | mt.entropy_mul, entropy_mul64X64, entropy_estimate, block, |
955 | 0 | scratch_space, quantized)); |
956 | 0 | } |
957 | 0 | continue; |
958 | 0 | } else if (mt.type == AcStrategyType::DCT32X16) { |
959 | | // We handled both DCT8X16 and DCT16X8 at the same time, |
960 | | // and that is above. The last column and last row, |
961 | | // when the last column or last row is odd numbered, |
962 | | // are still handled by TryMergeAcs. |
963 | 0 | continue; |
964 | 0 | } |
965 | 0 | } |
966 | 0 | if ((mt.type == AcStrategyType::DCT16X32 && cy % 4 != 0) || |
967 | 0 | (mt.type == AcStrategyType::DCT32X16 && cx % 4 != 0)) { |
968 | | // already covered by FindBest32X32 |
969 | 0 | continue; |
970 | 0 | } |
971 | | |
972 | 0 | if (cy + 3 < rect.ysize() && cx + 3 < rect.xsize()) { |
973 | 0 | if (mt.type == AcStrategyType::DCT16X32) { |
974 | | // We handle both DCT8X16 and DCT16X8 at the same time. |
975 | 0 | if ((cy | cx) % 4 == 0) { |
976 | 0 | JXL_RETURN_IF_ERROR(FindBestFirstLevelDivisionForSquare( |
977 | 0 | 4, enable_32x32, bx, by, cx, cy, config, cmap_factors, |
978 | 0 | ac_strategy, mt.entropy_mul, entropy_mul32X32, |
979 | 0 | entropy_estimate, block, scratch_space, quantized)); |
980 | 0 | } |
981 | 0 | continue; |
982 | 0 | } else if (mt.type == AcStrategyType::DCT32X16) { |
983 | | // We handled both DCT8X16 and DCT16X8 at the same time, |
984 | | // and that is above. The last column and last row, |
985 | | // when the last column or last row is odd numbered, |
986 | | // are still handled by TryMergeAcs. |
987 | 0 | continue; |
988 | 0 | } |
989 | 0 | } |
990 | 0 | if ((mt.type == AcStrategyType::DCT16X32 && cy % 4 != 0) || |
991 | 0 | (mt.type == AcStrategyType::DCT32X16 && cx % 4 != 0)) { |
992 | | // already covered by FindBest32X32 |
993 | 0 | continue; |
994 | 0 | } |
995 | 0 | if (cy + 1 < rect.ysize() && cx + 1 < rect.xsize()) { |
996 | 0 | if (mt.type == AcStrategyType::DCT8X16) { |
997 | | // We handle both DCT8X16 and DCT16X8 at the same time. |
998 | 0 | if ((cy | cx) % 2 == 0) { |
999 | 0 | JXL_RETURN_IF_ERROR(FindBestFirstLevelDivisionForSquare( |
1000 | 0 | 2, true, bx, by, cx, cy, config, cmap_factors, ac_strategy, |
1001 | 0 | mt.entropy_mul, entropy_mul16X16, entropy_estimate, block, |
1002 | 0 | scratch_space, quantized)); |
1003 | 0 | } |
1004 | 0 | continue; |
1005 | 0 | } else if (mt.type == AcStrategyType::DCT16X8) { |
1006 | | // We handled both DCT8X16 and DCT16X8 at the same time, |
1007 | | // and that is above. The last column and last row, |
1008 | | // when the last column or last row is odd numbered, |
1009 | | // are still handled by TryMergeAcs. |
1010 | 0 | continue; |
1011 | 0 | } |
1012 | 0 | } |
1013 | 0 | if ((mt.type == AcStrategyType::DCT8X16 && cy % 2 == 1) || |
1014 | 0 | (mt.type == AcStrategyType::DCT16X8 && cx % 2 == 1)) { |
1015 | | // already covered by FindBestFirstLevelDivisionForSquare |
1016 | 0 | continue; |
1017 | 0 | } |
1018 | | // All other merge sizes are handled here. |
1019 | | // Some of the DCT16X8s and DCT8X16s will still leak through here |
1020 | | // when there is an odd number of 8x8 blocks, then the last row |
1021 | | // and column will get their DCT16X8s and DCT8X16s through the |
1022 | | // normal integral transform merging process. |
1023 | 0 | JXL_RETURN_IF_ERROR( |
1024 | 0 | TryMergeAcs(mt.type, bx, by, cx, cy, config, cmap_factors, |
1025 | 0 | ac_strategy, mt.entropy_mul, mt.priority, &priority[0], |
1026 | 0 | entropy_estimate, block, scratch_space, quantized)); |
1027 | 0 | } |
1028 | 0 | } |
1029 | 0 | } |
1030 | 0 | if (cparams.speed_tier >= SpeedTier::kHare) { |
1031 | 0 | return true; |
1032 | 0 | } |
1033 | | // Here we still try to do some non-aligned matching, find a few more |
1034 | | // 16X8, 8X16 and 16X16s between the non-2-aligned blocks. |
1035 | 0 | for (size_t cy = 0; cy + 1 < rect.ysize(); ++cy) { |
1036 | 0 | for (size_t cx = 0; cx + 1 < rect.xsize(); ++cx) { |
1037 | 0 | if ((cy | cx) % 2 != 0) { |
1038 | 0 | JXL_RETURN_IF_ERROR(FindBestFirstLevelDivisionForSquare( |
1039 | 0 | 2, true, bx, by, cx, cy, config, cmap_factors, ac_strategy, |
1040 | 0 | entropy_mul16X8, entropy_mul16X16, entropy_estimate, block, |
1041 | 0 | scratch_space, quantized)); |
1042 | 0 | } |
1043 | 0 | } |
1044 | 0 | } |
1045 | | // Non-aligned matching for 32X32, 16X32 and 32X16. |
1046 | 0 | size_t step = cparams.speed_tier >= SpeedTier::kTortoise ? 2 : 1; |
1047 | 0 | for (size_t cy = 0; cy + 3 < rect.ysize(); cy += step) { |
1048 | 0 | for (size_t cx = 0; cx + 3 < rect.xsize(); cx += step) { |
1049 | 0 | if ((cy | cx) % 4 == 0) { |
1050 | 0 | continue; // Already tried with loop above (DCT16X32 case). |
1051 | 0 | } |
1052 | 0 | JXL_RETURN_IF_ERROR(FindBestFirstLevelDivisionForSquare( |
1053 | 0 | 4, enable_32x32, bx, by, cx, cy, config, cmap_factors, ac_strategy, |
1054 | 0 | entropy_mul16X32, entropy_mul32X32, entropy_estimate, block, |
1055 | 0 | scratch_space, quantized)); |
1056 | 0 | } |
1057 | 0 | } |
1058 | 0 | return true; |
1059 | 0 | } |
1060 | | |
1061 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
1062 | | } // namespace HWY_NAMESPACE |
1063 | | } // namespace jxl |
1064 | | HWY_AFTER_NAMESPACE(); |
1065 | | |
1066 | | #if HWY_ONCE |
1067 | | namespace jxl { |
1068 | | HWY_EXPORT(ProcessRectACS); |
1069 | | |
1070 | | Status AcStrategyHeuristics::Init(const Image3F& src, const Rect& rect_in, |
1071 | | const ImageF& quant_field, const ImageF& mask, |
1072 | | const ImageF& mask1x1, |
1073 | 0 | DequantMatrices* matrices) { |
1074 | 0 | config.dequant = matrices; |
1075 | |
|
1076 | 0 | if (cparams.speed_tier >= SpeedTier::kCheetah) { |
1077 | 0 | JXL_RETURN_IF_ERROR( |
1078 | 0 | matrices->EnsureComputed(memory_manager, 1)); // DCT8 only |
1079 | 0 | } else { |
1080 | 0 | uint32_t acs_mask = 0; |
1081 | | // All transforms up to 64x64. |
1082 | 0 | for (size_t i = 0; i < static_cast<size_t>(AcStrategyType::DCT128X128); |
1083 | 0 | i++) { |
1084 | 0 | acs_mask |= (1 << i); |
1085 | 0 | } |
1086 | 0 | JXL_RETURN_IF_ERROR(matrices->EnsureComputed(memory_manager, acs_mask)); |
1087 | 0 | } |
1088 | | |
1089 | | // Image row pointers and strides. |
1090 | 0 | config.quant_field_row = quant_field.Row(0); |
1091 | 0 | config.quant_field_stride = quant_field.PixelsPerRow(); |
1092 | 0 | if (mask.xsize() > 0 && mask.ysize() > 0) { |
1093 | 0 | config.masking_field_row = mask.Row(0); |
1094 | 0 | config.masking_field_stride = mask.PixelsPerRow(); |
1095 | 0 | } |
1096 | 0 | config.mask1x1_xsize = mask1x1.xsize(); |
1097 | 0 | if (mask1x1.xsize() > 0 && mask1x1.ysize() > 0) { |
1098 | 0 | config.masking1x1_field_row = mask1x1.Row(0); |
1099 | 0 | config.masking1x1_field_stride = mask1x1.PixelsPerRow(); |
1100 | 0 | } |
1101 | |
|
1102 | 0 | config.src_rows[0] = rect_in.ConstPlaneRow(src, 0, 0); |
1103 | 0 | config.src_rows[1] = rect_in.ConstPlaneRow(src, 1, 0); |
1104 | 0 | config.src_rows[2] = rect_in.ConstPlaneRow(src, 2, 0); |
1105 | 0 | config.src_stride = src.PixelsPerRow(); |
1106 | | |
1107 | | // Entropy estimate is composed of two factors: |
1108 | | // - estimate of the number of bits that will be used by the block |
1109 | | // - information loss due to quantization |
1110 | | // The following constant controls the relative weights of these components. |
1111 | 0 | config.info_loss_multiplier = 1.2; |
1112 | 0 | config.zeros_mul = 9.3089059022677905; |
1113 | 0 | config.cost_delta = 10.833273317067883; |
1114 | |
|
1115 | 0 | static const float kBias = 0.13731742964354549; |
1116 | 0 | const float ratio = (cparams.butteraugli_distance + kBias) / (1.0f + kBias); |
1117 | |
|
1118 | 0 | static const float kPow1 = 0.33677806662454718; |
1119 | 0 | static const float kPow2 = 0.50990926717963703; |
1120 | 0 | static const float kPow3 = 0.36702940662370243; |
1121 | 0 | config.info_loss_multiplier *= std::pow(ratio, kPow1); |
1122 | 0 | config.zeros_mul *= std::pow(ratio, kPow2); |
1123 | 0 | config.cost_delta *= std::pow(ratio, kPow3); |
1124 | 0 | return true; |
1125 | 0 | } |
1126 | | |
1127 | 0 | Status AcStrategyHeuristics::PrepareForThreads(std::size_t num_threads) { |
1128 | 0 | const size_t dct_scratch_size = |
1129 | 0 | 3 * (MaxVectorSize() / sizeof(float)) * AcStrategy::kMaxBlockDim; |
1130 | 0 | mem_per_thread = 6 * AcStrategy::kMaxCoeffArea + dct_scratch_size; |
1131 | 0 | size_t mem_bytes = num_threads * mem_per_thread * sizeof(float); |
1132 | 0 | JXL_ASSIGN_OR_RETURN(mem, AlignedMemory::Create(memory_manager, mem_bytes)); |
1133 | 0 | qmem_per_thread = AcStrategy::kMaxCoeffArea; |
1134 | 0 | size_t qmem_bytes = num_threads * qmem_per_thread * sizeof(uint32_t); |
1135 | 0 | JXL_ASSIGN_OR_RETURN(qmem, AlignedMemory::Create(memory_manager, qmem_bytes)); |
1136 | 0 | return true; |
1137 | 0 | } |
1138 | | |
1139 | | Status AcStrategyHeuristics::ProcessRect(const Rect& rect, |
1140 | | const ColorCorrelationMap& cmap, |
1141 | | AcStrategyImage* ac_strategy, |
1142 | 0 | size_t thread) { |
1143 | | // In Cheetah mode, use DCT8 everywhere and uniform quantization. |
1144 | 0 | if (cparams.speed_tier >= SpeedTier::kCheetah) { |
1145 | 0 | ac_strategy->FillDCT8(rect); |
1146 | 0 | return true; |
1147 | 0 | } |
1148 | 0 | return HWY_DYNAMIC_DISPATCH(ProcessRectACS)( |
1149 | 0 | cparams, config, rect, cmap, |
1150 | 0 | mem.address<float>() + thread * mem_per_thread, |
1151 | 0 | qmem.address<uint32_t>() + thread * qmem_per_thread, ac_strategy); |
1152 | 0 | } |
1153 | | |
1154 | | Status AcStrategyHeuristics::Finalize(const FrameDimensions& frame_dim, |
1155 | | const AcStrategyImage& ac_strategy, |
1156 | 0 | AuxOut* aux_out) { |
1157 | | // Accounting and debug output. |
1158 | 0 | if (aux_out != nullptr) { |
1159 | 0 | aux_out->num_small_blocks = |
1160 | 0 | ac_strategy.CountBlocks(AcStrategyType::IDENTITY) + |
1161 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT2X2) + |
1162 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT4X4); |
1163 | 0 | aux_out->num_dct4x8_blocks = |
1164 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT4X8) + |
1165 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT8X4); |
1166 | 0 | aux_out->num_afv_blocks = ac_strategy.CountBlocks(AcStrategyType::AFV0) + |
1167 | 0 | ac_strategy.CountBlocks(AcStrategyType::AFV1) + |
1168 | 0 | ac_strategy.CountBlocks(AcStrategyType::AFV2) + |
1169 | 0 | ac_strategy.CountBlocks(AcStrategyType::AFV3); |
1170 | 0 | aux_out->num_dct8_blocks = ac_strategy.CountBlocks(AcStrategyType::DCT); |
1171 | 0 | aux_out->num_dct8x16_blocks = |
1172 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT8X16) + |
1173 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT16X8); |
1174 | 0 | aux_out->num_dct8x32_blocks = |
1175 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT8X32) + |
1176 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT32X8); |
1177 | 0 | aux_out->num_dct16_blocks = |
1178 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT16X16); |
1179 | 0 | aux_out->num_dct16x32_blocks = |
1180 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT16X32) + |
1181 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT32X16); |
1182 | 0 | aux_out->num_dct32_blocks = |
1183 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT32X32); |
1184 | 0 | aux_out->num_dct32x64_blocks = |
1185 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT32X64) + |
1186 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT64X32); |
1187 | 0 | aux_out->num_dct64_blocks = |
1188 | 0 | ac_strategy.CountBlocks(AcStrategyType::DCT64X64); |
1189 | 0 | } |
1190 | |
|
1191 | 0 | if (JXL_DEBUG_AC_STRATEGY && WantDebugOutput(cparams)) { |
1192 | 0 | JXL_RETURN_IF_ERROR(DumpAcStrategy(ac_strategy, frame_dim.xsize, |
1193 | 0 | frame_dim.ysize, "ac_strategy", aux_out, |
1194 | 0 | cparams)); |
1195 | 0 | } |
1196 | 0 | return true; |
1197 | 0 | } |
1198 | | |
1199 | | } // namespace jxl |
1200 | | #endif // HWY_ONCE |