/src/libjxl/lib/jxl/enc_modular.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/enc_modular.h" |
7 | | |
8 | | #include <stddef.h> |
9 | | #include <stdint.h> |
10 | | |
11 | | #include <array> |
12 | | #include <atomic> |
13 | | #include <limits> |
14 | | #include <queue> |
15 | | #include <utility> |
16 | | #include <vector> |
17 | | |
18 | | #include "lib/jxl/aux_out.h" |
19 | | #include "lib/jxl/base/compiler_specific.h" |
20 | | #include "lib/jxl/base/padded_bytes.h" |
21 | | #include "lib/jxl/base/printf_macros.h" |
22 | | #include "lib/jxl/base/status.h" |
23 | | #include "lib/jxl/compressed_dc.h" |
24 | | #include "lib/jxl/dec_ans.h" |
25 | | #include "lib/jxl/enc_bit_writer.h" |
26 | | #include "lib/jxl/enc_cluster.h" |
27 | | #include "lib/jxl/enc_params.h" |
28 | | #include "lib/jxl/enc_patch_dictionary.h" |
29 | | #include "lib/jxl/enc_quant_weights.h" |
30 | | #include "lib/jxl/frame_header.h" |
31 | | #include "lib/jxl/gaborish.h" |
32 | | #include "lib/jxl/modular/encoding/context_predict.h" |
33 | | #include "lib/jxl/modular/encoding/enc_debug_tree.h" |
34 | | #include "lib/jxl/modular/encoding/enc_encoding.h" |
35 | | #include "lib/jxl/modular/encoding/encoding.h" |
36 | | #include "lib/jxl/modular/encoding/ma_common.h" |
37 | | #include "lib/jxl/modular/modular_image.h" |
38 | | #include "lib/jxl/modular/options.h" |
39 | | #include "lib/jxl/modular/transform/enc_transform.h" |
40 | | #include "lib/jxl/toc.h" |
41 | | |
42 | | namespace jxl { |
43 | | |
44 | | namespace { |
45 | | // Squeeze default quantization factors |
46 | | // these quantization factors are for -Q 50 (other qualities simply scale the |
47 | | // factors; things are rounded down and obviously cannot get below 1) |
48 | | static const float squeeze_quality_factor = |
49 | | 0.35; // for easy tweaking of the quality range (decrease this number for |
50 | | // higher quality) |
51 | | static const float squeeze_luma_factor = |
52 | | 1.1; // for easy tweaking of the balance between luma (or anything |
53 | | // non-chroma) and chroma (decrease this number for higher quality |
54 | | // luma) |
55 | | static const float squeeze_quality_factor_xyb = 2.4f; |
56 | | static const float squeeze_xyb_qtable[3][16] = { |
57 | | {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28, 0.64, 0.32, 0.16, |
58 | | 0.08, 0.04, 0.02, 0.01, 0.005}, // Y |
59 | | {1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, |
60 | | 0.5}, // X |
61 | | {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, |
62 | | 0.5}, // B-Y |
63 | | }; |
64 | | |
65 | | static const float squeeze_luma_qtable[16] = { |
66 | | 163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28, |
67 | | 0.64, 0.32, 0.16, 0.08, 0.04, 0.02, 0.01, 0.005}; |
68 | | // for 8-bit input, the range of YCoCg chroma is -255..255 so basically this |
69 | | // does 4:2:0 subsampling (two most fine grained layers get quantized away) |
70 | | static const float squeeze_chroma_qtable[16] = { |
71 | | 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 0.5}; |
72 | | |
73 | | // `cutoffs` must be sorted. |
74 | | Tree MakeFixedTree(int property, const std::vector<int32_t>& cutoffs, |
75 | 92 | Predictor pred, size_t num_pixels) { |
76 | 92 | size_t log_px = CeilLog2Nonzero(num_pixels); |
77 | 92 | size_t min_gap = 0; |
78 | | // Reduce fixed tree height when encoding small images. |
79 | 92 | if (log_px < 14) { |
80 | 92 | min_gap = 8 * (14 - log_px); |
81 | 92 | } |
82 | 92 | Tree tree; |
83 | 92 | struct NodeInfo { |
84 | 92 | size_t begin, end, pos; |
85 | 92 | }; |
86 | 92 | std::queue<NodeInfo> q; |
87 | | // Leaf IDs will be set by roundtrip decoding the tree. |
88 | 92 | tree.push_back(PropertyDecisionNode::Leaf(pred)); |
89 | 92 | q.push(NodeInfo{0, cutoffs.size(), 0}); |
90 | 184 | while (!q.empty()) { |
91 | 92 | NodeInfo info = q.front(); |
92 | 92 | q.pop(); |
93 | 92 | if (info.begin + min_gap >= info.end) continue; |
94 | 0 | uint32_t split = (info.begin + info.end) / 2; |
95 | 0 | tree[info.pos] = |
96 | 0 | PropertyDecisionNode::Split(property, cutoffs[split], tree.size()); |
97 | 0 | q.push(NodeInfo{split + 1, info.end, tree.size()}); |
98 | 0 | tree.push_back(PropertyDecisionNode::Leaf(pred)); |
99 | 0 | q.push(NodeInfo{info.begin, split, tree.size()}); |
100 | 0 | tree.push_back(PropertyDecisionNode::Leaf(pred)); |
101 | 0 | } |
102 | 92 | return tree; |
103 | 92 | } |
104 | | |
105 | 184 | Tree PredefinedTree(ModularOptions::TreeKind tree_kind, size_t total_pixels) { |
106 | 184 | if (tree_kind == ModularOptions::TreeKind::kJpegTranscodeACMeta || |
107 | 184 | tree_kind == ModularOptions::TreeKind::kTrivialTreeNoPredictor) { |
108 | | // All the data is 0, so no need for a fancy tree. |
109 | 0 | return {PropertyDecisionNode::Leaf(Predictor::Zero)}; |
110 | 0 | } |
111 | 184 | if (tree_kind == ModularOptions::TreeKind::kFalconACMeta) { |
112 | | // All the data is 0 except the quant field. TODO(veluca): make that 0 too. |
113 | 0 | return {PropertyDecisionNode::Leaf(Predictor::Left)}; |
114 | 0 | } |
115 | 184 | if (tree_kind == ModularOptions::TreeKind::kACMeta) { |
116 | | // Small image. |
117 | 92 | if (total_pixels < 1024) { |
118 | 92 | return {PropertyDecisionNode::Leaf(Predictor::Left)}; |
119 | 92 | } |
120 | 0 | Tree tree; |
121 | | // 0: c > 1 |
122 | 0 | tree.push_back(PropertyDecisionNode::Split(0, 1, 1)); |
123 | | // 1: c > 2 |
124 | 0 | tree.push_back(PropertyDecisionNode::Split(0, 2, 3)); |
125 | | // 2: c > 0 |
126 | 0 | tree.push_back(PropertyDecisionNode::Split(0, 0, 5)); |
127 | | // 3: EPF control field (all 0 or 4), top > 0 |
128 | 0 | tree.push_back(PropertyDecisionNode::Split(6, 0, 21)); |
129 | | // 4: ACS+QF, y > 0 |
130 | 0 | tree.push_back(PropertyDecisionNode::Split(2, 0, 7)); |
131 | | // 5: CfL x |
132 | 0 | tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient)); |
133 | | // 6: CfL b |
134 | 0 | tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient)); |
135 | | // 7: QF: split according to the left quant value. |
136 | 0 | tree.push_back(PropertyDecisionNode::Split(7, 5, 9)); |
137 | | // 8: ACS: split in 4 segments (8x8 from 0 to 3, large square 4-5, large |
138 | | // rectangular 6-11, 8x8 12+), according to previous ACS value. |
139 | 0 | tree.push_back(PropertyDecisionNode::Split(7, 5, 15)); |
140 | | // QF |
141 | 0 | tree.push_back(PropertyDecisionNode::Split(7, 11, 11)); |
142 | 0 | tree.push_back(PropertyDecisionNode::Split(7, 3, 13)); |
143 | 0 | tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left)); |
144 | 0 | tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left)); |
145 | 0 | tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left)); |
146 | 0 | tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left)); |
147 | | // ACS |
148 | 0 | tree.push_back(PropertyDecisionNode::Split(7, 11, 17)); |
149 | 0 | tree.push_back(PropertyDecisionNode::Split(7, 3, 19)); |
150 | 0 | tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); |
151 | 0 | tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); |
152 | 0 | tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); |
153 | 0 | tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); |
154 | | // EPF, left > 0 |
155 | 0 | tree.push_back(PropertyDecisionNode::Split(7, 0, 23)); |
156 | 0 | tree.push_back(PropertyDecisionNode::Split(7, 0, 25)); |
157 | 0 | tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); |
158 | 0 | tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); |
159 | 0 | tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); |
160 | 0 | tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); |
161 | 0 | return tree; |
162 | 92 | } |
163 | 92 | if (tree_kind == ModularOptions::TreeKind::kWPFixedDC) { |
164 | 92 | std::vector<int32_t> cutoffs = { |
165 | 92 | -500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15, |
166 | 92 | -11, -7, -4, -3, -1, 0, 1, 3, 5, 7, 11, |
167 | 92 | 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500}; |
168 | 92 | return MakeFixedTree(kWPProp, cutoffs, Predictor::Weighted, total_pixels); |
169 | 92 | } |
170 | 0 | if (tree_kind == ModularOptions::TreeKind::kGradientFixedDC) { |
171 | 0 | std::vector<int32_t> cutoffs = { |
172 | 0 | -500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15, |
173 | 0 | -11, -7, -4, -3, -1, 0, 1, 3, 5, 7, 11, |
174 | 0 | 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500}; |
175 | 0 | return MakeFixedTree(kGradientProp, cutoffs, Predictor::Gradient, |
176 | 0 | total_pixels); |
177 | 0 | } |
178 | 0 | JXL_ABORT("Unreachable"); |
179 | 0 | return {}; |
180 | 0 | } |
181 | | |
182 | | // Merges the trees in `trees` using nodes that decide on stream_id, as defined |
183 | | // by `tree_splits`. |
184 | | void MergeTrees(const std::vector<Tree>& trees, |
185 | | const std::vector<size_t>& tree_splits, size_t begin, |
186 | 276 | size_t end, Tree* tree) { |
187 | 276 | JXL_ASSERT(trees.size() + 1 == tree_splits.size()); |
188 | 276 | JXL_ASSERT(end > begin); |
189 | 276 | JXL_ASSERT(end <= trees.size()); |
190 | 276 | if (end == begin + 1) { |
191 | | // Insert the tree, adding the opportune offset to all child nodes. |
192 | | // This will make the leaf IDs wrong, but subsequent roundtripping will fix |
193 | | // them. |
194 | 184 | size_t sz = tree->size(); |
195 | 184 | tree->insert(tree->end(), trees[begin].begin(), trees[begin].end()); |
196 | 368 | for (size_t i = sz; i < tree->size(); i++) { |
197 | 184 | (*tree)[i].lchild += sz; |
198 | 184 | (*tree)[i].rchild += sz; |
199 | 184 | } |
200 | 184 | return; |
201 | 184 | } |
202 | 92 | size_t mid = (begin + end) / 2; |
203 | 92 | size_t splitval = tree_splits[mid] - 1; |
204 | 92 | size_t cur = tree->size(); |
205 | 92 | tree->emplace_back(1 /*stream_id*/, splitval, 0, 0, Predictor::Zero, 0, 1); |
206 | 92 | (*tree)[cur].lchild = tree->size(); |
207 | 92 | MergeTrees(trees, tree_splits, mid, end, tree); |
208 | 92 | (*tree)[cur].rchild = tree->size(); |
209 | 92 | MergeTrees(trees, tree_splits, begin, mid, tree); |
210 | 92 | } |
211 | | |
212 | 0 | void QuantizeChannel(Channel& ch, const int q) { |
213 | 0 | if (q == 1) return; |
214 | 0 | for (size_t y = 0; y < ch.plane.ysize(); y++) { |
215 | 0 | pixel_type* row = ch.plane.Row(y); |
216 | 0 | for (size_t x = 0; x < ch.plane.xsize(); x++) { |
217 | 0 | if (row[x] < 0) { |
218 | 0 | row[x] = -((-row[x] + q / 2) / q) * q; |
219 | 0 | } else { |
220 | 0 | row[x] = ((row[x] + q / 2) / q) * q; |
221 | 0 | } |
222 | 0 | } |
223 | 0 | } |
224 | 0 | } |
225 | | |
226 | | // convert binary32 float that corresponds to custom [bits]-bit float (with |
227 | | // [exp_bits] exponent bits) to a [bits]-bit integer representation that should |
228 | | // fit in pixel_type |
229 | | Status float_to_int(const float* const row_in, pixel_type* const row_out, |
230 | | size_t xsize, unsigned int bits, unsigned int exp_bits, |
231 | 0 | bool fp, double dfactor) { |
232 | 0 | JXL_ASSERT(sizeof(pixel_type) * 8 >= bits); |
233 | 0 | if (!fp) { |
234 | 0 | if (bits > 22) { |
235 | 0 | for (size_t x = 0; x < xsize; ++x) { |
236 | 0 | row_out[x] = row_in[x] * dfactor + (row_in[x] < 0 ? -0.5 : 0.5); |
237 | 0 | } |
238 | 0 | } else { |
239 | 0 | float factor = dfactor; |
240 | 0 | for (size_t x = 0; x < xsize; ++x) { |
241 | 0 | row_out[x] = row_in[x] * factor + (row_in[x] < 0 ? -0.5f : 0.5f); |
242 | 0 | } |
243 | 0 | } |
244 | 0 | return true; |
245 | 0 | } |
246 | 0 | if (bits == 32 && fp) { |
247 | 0 | JXL_ASSERT(exp_bits == 8); |
248 | 0 | memcpy((void*)row_out, (const void*)row_in, 4 * xsize); |
249 | 0 | return true; |
250 | 0 | } |
251 | | |
252 | 0 | int exp_bias = (1 << (exp_bits - 1)) - 1; |
253 | 0 | int max_exp = (1 << exp_bits) - 1; |
254 | 0 | uint32_t sign = (1u << (bits - 1)); |
255 | 0 | int mant_bits = bits - exp_bits - 1; |
256 | 0 | int mant_shift = 23 - mant_bits; |
257 | 0 | for (size_t x = 0; x < xsize; ++x) { |
258 | 0 | uint32_t f; |
259 | 0 | memcpy(&f, &row_in[x], 4); |
260 | 0 | int signbit = (f >> 31); |
261 | 0 | f &= 0x7fffffff; |
262 | 0 | if (f == 0) { |
263 | 0 | row_out[x] = (signbit ? sign : 0); |
264 | 0 | continue; |
265 | 0 | } |
266 | 0 | int exp = (f >> 23) - 127; |
267 | 0 | if (exp == 128) return JXL_FAILURE("Inf/NaN not allowed"); |
268 | 0 | int mantissa = (f & 0x007fffff); |
269 | | // broke up the binary32 into its parts, now reassemble into |
270 | | // arbitrary float |
271 | 0 | exp += exp_bias; |
272 | 0 | if (exp < 0) { // will become a subnormal number |
273 | | // add implicit leading 1 to mantissa |
274 | 0 | mantissa |= 0x00800000; |
275 | 0 | if (exp < -mant_bits) { |
276 | 0 | return JXL_FAILURE( |
277 | 0 | "Invalid float number: %g cannot be represented with %i " |
278 | 0 | "exp_bits and %i mant_bits (exp %i)", |
279 | 0 | row_in[x], exp_bits, mant_bits, exp); |
280 | 0 | } |
281 | 0 | mantissa >>= 1 - exp; |
282 | 0 | exp = 0; |
283 | 0 | } |
284 | | // exp should be representable in exp_bits, otherwise input was |
285 | | // invalid |
286 | 0 | if (exp > max_exp) return JXL_FAILURE("Invalid float exponent"); |
287 | 0 | if (mantissa & ((1 << mant_shift) - 1)) { |
288 | 0 | return JXL_FAILURE("%g is losing precision (mant: %x)", row_in[x], |
289 | 0 | mantissa); |
290 | 0 | } |
291 | 0 | mantissa >>= mant_shift; |
292 | 0 | f = (signbit ? sign : 0); |
293 | 0 | f |= (exp << mant_bits); |
294 | 0 | f |= mantissa; |
295 | 0 | row_out[x] = (pixel_type)f; |
296 | 0 | } |
297 | 0 | return true; |
298 | 0 | } |
299 | | } // namespace |
300 | | |
301 | | ModularFrameEncoder::ModularFrameEncoder(const FrameHeader& frame_header, |
302 | | const CompressParams& cparams_orig) |
303 | 92 | : frame_dim_(frame_header.ToFrameDimensions()), cparams_(cparams_orig) { |
304 | 92 | size_t num_streams = |
305 | 92 | ModularStreamId::Num(frame_dim_, frame_header.passes.num_passes); |
306 | 92 | if (cparams_.IsLossless()) { |
307 | 0 | switch (cparams_.decoding_speed_tier) { |
308 | 0 | case 0: |
309 | 0 | break; |
310 | 0 | case 1: |
311 | 0 | cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kWPOnly; |
312 | 0 | break; |
313 | 0 | case 2: { |
314 | 0 | cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly; |
315 | 0 | cparams_.options.predictor = Predictor::Gradient; |
316 | 0 | break; |
317 | 0 | } |
318 | 0 | case 3: { // LZ77, no Gradient. |
319 | 0 | cparams_.options.nb_repeats = 0; |
320 | 0 | cparams_.options.predictor = Predictor::Gradient; |
321 | 0 | break; |
322 | 0 | } |
323 | 0 | default: { // LZ77, no predictor. |
324 | 0 | cparams_.options.nb_repeats = 0; |
325 | 0 | cparams_.options.predictor = Predictor::Zero; |
326 | 0 | break; |
327 | 0 | } |
328 | 0 | } |
329 | 0 | } |
330 | 92 | if (cparams_.decoding_speed_tier >= 1 && cparams_.responsive && |
331 | 92 | cparams_.IsLossless()) { |
332 | 0 | cparams_.options.tree_kind = |
333 | 0 | ModularOptions::TreeKind::kTrivialTreeNoPredictor; |
334 | 0 | cparams_.options.nb_repeats = 0; |
335 | 0 | } |
336 | 92 | stream_images_.resize(num_streams); |
337 | | |
338 | | // use a sensible default if nothing explicit is specified: |
339 | | // Squeeze for lossy, no squeeze for lossless |
340 | 92 | if (cparams_.responsive < 0) { |
341 | 92 | if (cparams_.IsLossless()) { |
342 | 0 | cparams_.responsive = 0; |
343 | 92 | } else { |
344 | 92 | cparams_.responsive = 1; |
345 | 92 | } |
346 | 92 | } |
347 | | |
348 | 92 | if (cparams_.speed_tier > SpeedTier::kWombat) { |
349 | 0 | cparams_.options.splitting_heuristics_node_threshold = 192; |
350 | 92 | } else { |
351 | 92 | cparams_.options.splitting_heuristics_node_threshold = 96; |
352 | 92 | } |
353 | 92 | { |
354 | | // Set properties. |
355 | 92 | std::vector<uint32_t> prop_order; |
356 | 92 | if (cparams_.responsive) { |
357 | | // Properties in order of their likelihood of being useful for Squeeze |
358 | | // residuals. |
359 | 92 | prop_order = {0, 1, 4, 5, 6, 7, 8, 15, 9, 10, 11, 12, 13, 14, 2, 3}; |
360 | 92 | } else { |
361 | | // Same, but for the non-Squeeze case. |
362 | 0 | prop_order = {0, 1, 15, 9, 10, 11, 12, 13, 14, 2, 3, 4, 5, 6, 7, 8}; |
363 | 0 | } |
364 | 92 | switch (cparams_.speed_tier) { |
365 | 92 | case SpeedTier::kSquirrel: |
366 | 92 | cparams_.options.splitting_heuristics_properties.assign( |
367 | 92 | prop_order.begin(), prop_order.begin() + 8); |
368 | 92 | cparams_.options.max_property_values = 32; |
369 | 92 | break; |
370 | 0 | case SpeedTier::kKitten: |
371 | 0 | cparams_.options.splitting_heuristics_properties.assign( |
372 | 0 | prop_order.begin(), prop_order.begin() + 10); |
373 | 0 | cparams_.options.max_property_values = 64; |
374 | 0 | break; |
375 | 0 | case SpeedTier::kTortoise: |
376 | 0 | cparams_.options.splitting_heuristics_properties = prop_order; |
377 | 0 | cparams_.options.max_property_values = 256; |
378 | 0 | break; |
379 | 0 | default: |
380 | 0 | cparams_.options.splitting_heuristics_properties.assign( |
381 | 0 | prop_order.begin(), prop_order.begin() + 6); |
382 | 0 | cparams_.options.max_property_values = 16; |
383 | 0 | break; |
384 | 92 | } |
385 | 92 | if (cparams_.speed_tier > SpeedTier::kTortoise) { |
386 | | // Gradient in previous channels. |
387 | 92 | for (int i = 0; i < cparams_.options.max_properties; i++) { |
388 | 0 | cparams_.options.splitting_heuristics_properties.push_back( |
389 | 0 | kNumNonrefProperties + i * 4 + 3); |
390 | 0 | } |
391 | 92 | } else { |
392 | | // All the extra properties in Tortoise mode. |
393 | 0 | for (int i = 0; i < cparams_.options.max_properties * 4; i++) { |
394 | 0 | cparams_.options.splitting_heuristics_properties.push_back( |
395 | 0 | kNumNonrefProperties + i); |
396 | 0 | } |
397 | 0 | } |
398 | 92 | } |
399 | | |
400 | 92 | if (cparams_.options.predictor == static_cast<Predictor>(-1)) { |
401 | | // no explicit predictor(s) given, set a good default |
402 | 92 | if ((cparams_.speed_tier <= SpeedTier::kTortoise || |
403 | 92 | cparams_.modular_mode == false) && |
404 | 92 | cparams_.IsLossless() && cparams_.responsive == false) { |
405 | | // TODO(veluca): allow all predictors that don't break residual |
406 | | // multipliers in lossy mode. |
407 | 0 | cparams_.options.predictor = Predictor::Variable; |
408 | 92 | } else if (cparams_.responsive || cparams_.lossy_palette) { |
409 | | // zero predictor for Squeeze residues and lossy palette |
410 | 92 | cparams_.options.predictor = Predictor::Zero; |
411 | 92 | } else if (!cparams_.IsLossless()) { |
412 | | // If not responsive and lossy. TODO(veluca): use near_lossless instead? |
413 | 0 | cparams_.options.predictor = Predictor::Gradient; |
414 | 0 | } else if (cparams_.speed_tier < SpeedTier::kFalcon) { |
415 | | // try median and weighted predictor for anything else |
416 | 0 | cparams_.options.predictor = Predictor::Best; |
417 | 0 | } else if (cparams_.speed_tier == SpeedTier::kFalcon) { |
418 | | // just weighted predictor in falcon mode |
419 | 0 | cparams_.options.predictor = Predictor::Weighted; |
420 | 0 | } else if (cparams_.speed_tier > SpeedTier::kFalcon) { |
421 | | // just gradient predictor in thunder mode |
422 | 0 | cparams_.options.predictor = Predictor::Gradient; |
423 | 0 | } |
424 | 92 | } else { |
425 | 0 | delta_pred_ = cparams_.options.predictor; |
426 | 0 | if (cparams_.lossy_palette) cparams_.options.predictor = Predictor::Zero; |
427 | 0 | } |
428 | 92 | if (!cparams_.IsLossless()) { |
429 | 92 | if (cparams_.options.predictor == Predictor::Weighted || |
430 | 92 | cparams_.options.predictor == Predictor::Variable || |
431 | 92 | cparams_.options.predictor == Predictor::Best) |
432 | 0 | cparams_.options.predictor = Predictor::Zero; |
433 | 92 | } |
434 | 92 | tree_splits_.push_back(0); |
435 | 92 | if (cparams_.modular_mode == false) { |
436 | 92 | cparams_.options.fast_decode_multiplier = 1.0f; |
437 | 92 | tree_splits_.push_back(ModularStreamId::VarDCTDC(0).ID(frame_dim_)); |
438 | 92 | tree_splits_.push_back(ModularStreamId::ModularDC(0).ID(frame_dim_)); |
439 | 92 | tree_splits_.push_back(ModularStreamId::ACMetadata(0).ID(frame_dim_)); |
440 | 92 | tree_splits_.push_back(ModularStreamId::QuantTable(0).ID(frame_dim_)); |
441 | 92 | tree_splits_.push_back(ModularStreamId::ModularAC(0, 0).ID(frame_dim_)); |
442 | 92 | ac_metadata_size.resize(frame_dim_.num_dc_groups); |
443 | 92 | extra_dc_precision.resize(frame_dim_.num_dc_groups); |
444 | 92 | } |
445 | 92 | tree_splits_.push_back(num_streams); |
446 | 92 | cparams_.options.max_chan_size = frame_dim_.group_dim; |
447 | 92 | cparams_.options.group_dim = frame_dim_.group_dim; |
448 | | |
449 | | // TODO(veluca): figure out how to use different predictor sets per channel. |
450 | 92 | stream_options_.resize(num_streams, cparams_.options); |
451 | 92 | } |
452 | | |
453 | | bool do_transform(Image& image, const Transform& tr, |
454 | | const weighted::Header& wp_header, |
455 | 0 | jxl::ThreadPool* pool = nullptr, bool force_jxlart = false) { |
456 | 0 | Transform t = tr; |
457 | 0 | bool did_it = true; |
458 | 0 | if (force_jxlart) { |
459 | 0 | if (!t.MetaApply(image)) return false; |
460 | 0 | } else { |
461 | 0 | did_it = TransformForward(t, image, wp_header, pool); |
462 | 0 | } |
463 | 0 | if (did_it) image.transform.push_back(t); |
464 | 0 | return did_it; |
465 | 0 | } |
466 | | |
467 | | Status ModularFrameEncoder::ComputeEncodingData( |
468 | | const FrameHeader& frame_header, const ImageMetadata& metadata, |
469 | | Image3F* JXL_RESTRICT color, const std::vector<ImageF>& extra_channels, |
470 | | PassesEncoderState* JXL_RESTRICT enc_state, const JxlCmsInterface& cms, |
471 | 92 | ThreadPool* pool, AuxOut* aux_out, bool do_color) { |
472 | 92 | JXL_DEBUG_V(6, "Computing modular encoding data for frame %s", |
473 | 92 | frame_header.DebugString().c_str()); |
474 | | |
475 | 92 | if (do_color && frame_header.loop_filter.gab) { |
476 | 0 | GaborishInverse(color, 0.9908511000000001f, pool); |
477 | 0 | } |
478 | | |
479 | 92 | if (do_color && metadata.bit_depth.bits_per_sample <= 16 && |
480 | 92 | cparams_.speed_tier < SpeedTier::kCheetah && |
481 | 92 | cparams_.decoding_speed_tier < 2) { |
482 | 0 | FindBestPatchDictionary(*color, enc_state, cms, nullptr, aux_out, |
483 | 0 | cparams_.color_transform == ColorTransform::kXYB); |
484 | 0 | PatchDictionaryEncoder::SubtractFrom( |
485 | 0 | enc_state->shared.image_features.patches, color); |
486 | 0 | } |
487 | | |
488 | | // Convert ImageBundle to modular Image object |
489 | 92 | const size_t xsize = frame_dim_.xsize; |
490 | 92 | const size_t ysize = frame_dim_.ysize; |
491 | | |
492 | 92 | int nb_chans = 3; |
493 | 92 | if (metadata.color_encoding.IsGray() && |
494 | 92 | cparams_.color_transform == ColorTransform::kNone) { |
495 | 0 | nb_chans = 1; |
496 | 0 | } |
497 | 92 | if (!do_color) nb_chans = 0; |
498 | | |
499 | 92 | nb_chans += extra_channels.size(); |
500 | | |
501 | 92 | bool fp = metadata.bit_depth.floating_point_sample && |
502 | 92 | cparams_.color_transform != ColorTransform::kXYB; |
503 | | |
504 | | // bits_per_sample is just metadata for XYB images. |
505 | 92 | if (metadata.bit_depth.bits_per_sample >= 32 && do_color && |
506 | 92 | cparams_.color_transform != ColorTransform::kXYB) { |
507 | 0 | if (metadata.bit_depth.bits_per_sample == 32 && fp == false) { |
508 | 0 | return JXL_FAILURE("uint32_t not supported in enc_modular"); |
509 | 0 | } else if (metadata.bit_depth.bits_per_sample > 32) { |
510 | 0 | return JXL_FAILURE("bits_per_sample > 32 not supported"); |
511 | 0 | } |
512 | 0 | } |
513 | | |
514 | | // in the non-float case, there is an implicit 0 sign bit |
515 | 92 | int max_bitdepth = |
516 | 92 | do_color ? metadata.bit_depth.bits_per_sample + (fp ? 0 : 1) : 0; |
517 | 92 | Image& gi = stream_images_[0]; |
518 | 92 | gi = Image(xsize, ysize, metadata.bit_depth.bits_per_sample, nb_chans); |
519 | 92 | int c = 0; |
520 | 92 | if (cparams_.color_transform == ColorTransform::kXYB && |
521 | 92 | cparams_.modular_mode == true) { |
522 | 0 | float enc_factors[3] = {32768.0f, 2048.0f, 2048.0f}; |
523 | 0 | if (cparams_.butteraugli_distance > 0 && !cparams_.responsive) { |
524 | | // quantize XYB here and then treat it as a lossless image |
525 | 0 | enc_factors[0] *= 1.f / (1.f + 23.f * cparams_.butteraugli_distance); |
526 | 0 | enc_factors[1] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance); |
527 | 0 | enc_factors[2] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance); |
528 | 0 | cparams_.butteraugli_distance = 0; |
529 | 0 | } |
530 | 0 | if (cparams_.manual_xyb_factors.size() == 3) { |
531 | 0 | DequantMatricesSetCustomDC(&enc_state->shared.matrices, |
532 | 0 | cparams_.manual_xyb_factors.data()); |
533 | | // TODO(jon): update max_bitdepth in this case |
534 | 0 | } else { |
535 | 0 | DequantMatricesSetCustomDC(&enc_state->shared.matrices, enc_factors); |
536 | 0 | max_bitdepth = 12; |
537 | 0 | } |
538 | 0 | } |
539 | 92 | pixel_type maxval = gi.bitdepth < 32 ? (1u << gi.bitdepth) - 1 : 0; |
540 | 92 | if (do_color) { |
541 | 0 | for (; c < 3; c++) { |
542 | 0 | if (metadata.color_encoding.IsGray() && |
543 | 0 | cparams_.color_transform == ColorTransform::kNone && |
544 | 0 | c != (cparams_.color_transform == ColorTransform::kXYB ? 1 : 0)) |
545 | 0 | continue; |
546 | 0 | int c_out = c; |
547 | | // XYB is encoded as YX(B-Y) |
548 | 0 | if (cparams_.color_transform == ColorTransform::kXYB && c < 2) |
549 | 0 | c_out = 1 - c_out; |
550 | 0 | double factor = maxval; |
551 | 0 | if (cparams_.color_transform == ColorTransform::kXYB) |
552 | 0 | factor = enc_state->shared.matrices.InvDCQuant(c); |
553 | 0 | if (c == 2 && cparams_.color_transform == ColorTransform::kXYB) { |
554 | 0 | JXL_ASSERT(!fp); |
555 | 0 | for (size_t y = 0; y < ysize; ++y) { |
556 | 0 | const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y); |
557 | 0 | pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y); |
558 | 0 | pixel_type* const JXL_RESTRICT row_Y = gi.channel[0].Row(y); |
559 | 0 | for (size_t x = 0; x < xsize; ++x) { |
560 | 0 | row_out[x] = row_in[x] * factor + 0.5f; |
561 | 0 | row_out[x] -= row_Y[x]; |
562 | | // zero the lsb of B |
563 | 0 | row_out[x] = row_out[x] / 2 * 2; |
564 | 0 | } |
565 | 0 | } |
566 | 0 | } else { |
567 | 0 | int bits = metadata.bit_depth.bits_per_sample; |
568 | 0 | int exp_bits = metadata.bit_depth.exponent_bits_per_sample; |
569 | 0 | gi.channel[c_out].hshift = |
570 | 0 | enc_state->shared.frame_header.chroma_subsampling.HShift(c); |
571 | 0 | gi.channel[c_out].vshift = |
572 | 0 | enc_state->shared.frame_header.chroma_subsampling.VShift(c); |
573 | 0 | size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_out].hshift); |
574 | 0 | size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_out].vshift); |
575 | 0 | gi.channel[c_out].shrink(xsize_shifted, ysize_shifted); |
576 | 0 | std::atomic<bool> has_error{false}; |
577 | 0 | JXL_RETURN_IF_ERROR(RunOnPool( |
578 | 0 | pool, 0, ysize_shifted, ThreadPool::NoInit, |
579 | 0 | [&](const int task, const int thread) { |
580 | 0 | const size_t y = task; |
581 | 0 | const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y); |
582 | 0 | pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y); |
583 | 0 | if (!float_to_int(row_in, row_out, xsize_shifted, bits, exp_bits, |
584 | 0 | fp, factor)) { |
585 | 0 | has_error = true; |
586 | 0 | }; |
587 | 0 | }, |
588 | 0 | "float2int")); |
589 | 0 | if (has_error) { |
590 | 0 | return JXL_FAILURE("Error in float to integer conversion"); |
591 | 0 | } |
592 | 0 | } |
593 | 0 | } |
594 | 0 | if (metadata.color_encoding.IsGray() && |
595 | 0 | cparams_.color_transform == ColorTransform::kNone) |
596 | 0 | c = 1; |
597 | 0 | } |
598 | | |
599 | 92 | for (size_t ec = 0; ec < extra_channels.size(); ec++, c++) { |
600 | 0 | const ExtraChannelInfo& eci = metadata.extra_channel_info[ec]; |
601 | 0 | size_t ecups = frame_header.extra_channel_upsampling[ec]; |
602 | 0 | gi.channel[c].shrink(DivCeil(frame_dim_.xsize_upsampled, ecups), |
603 | 0 | DivCeil(frame_dim_.ysize_upsampled, ecups)); |
604 | 0 | gi.channel[c].hshift = gi.channel[c].vshift = |
605 | 0 | CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling); |
606 | |
|
607 | 0 | int bits = eci.bit_depth.bits_per_sample; |
608 | 0 | int exp_bits = eci.bit_depth.exponent_bits_per_sample; |
609 | 0 | bool fp = eci.bit_depth.floating_point_sample; |
610 | 0 | double factor = (fp ? 1 : ((1u << eci.bit_depth.bits_per_sample) - 1)); |
611 | 0 | if (bits + (fp ? 0 : 1) > max_bitdepth) max_bitdepth = bits + (fp ? 0 : 1); |
612 | 0 | std::atomic<bool> has_error{false}; |
613 | 0 | JXL_RETURN_IF_ERROR(RunOnPool( |
614 | 0 | pool, 0, gi.channel[c].plane.ysize(), ThreadPool::NoInit, |
615 | 0 | [&](const int task, const int thread) { |
616 | 0 | const size_t y = task; |
617 | 0 | const float* const JXL_RESTRICT row_in = extra_channels[ec].Row(y); |
618 | 0 | pixel_type* const JXL_RESTRICT row_out = gi.channel[c].Row(y); |
619 | 0 | if (!float_to_int(row_in, row_out, gi.channel[c].plane.xsize(), bits, |
620 | 0 | exp_bits, fp, factor)) { |
621 | 0 | has_error = true; |
622 | 0 | }; |
623 | 0 | }, |
624 | 0 | "float2int")); |
625 | 0 | if (has_error) return JXL_FAILURE("Error in float to integer conversion"); |
626 | 0 | } |
627 | 92 | JXL_ASSERT(c == nb_chans); |
628 | | |
629 | 92 | int level_max_bitdepth = (cparams_.level == 5 ? 16 : 32); |
630 | 92 | if (max_bitdepth > level_max_bitdepth) |
631 | 0 | return JXL_FAILURE( |
632 | 92 | "Bitdepth too high for level %i (need %i bits, have only %i in this " |
633 | 92 | "level)", |
634 | 92 | cparams_.level, max_bitdepth, level_max_bitdepth); |
635 | | |
636 | | // Set options and apply transformations |
637 | | |
638 | 92 | if (cparams_.butteraugli_distance > 0) { |
639 | 92 | if (cparams_.palette_colors != 0) { |
640 | 92 | JXL_DEBUG_V(3, "Lossy encode, not doing palette transforms"); |
641 | 92 | } |
642 | 92 | if (cparams_.color_transform == ColorTransform::kXYB) { |
643 | 92 | cparams_.channel_colors_pre_transform_percent = 0; |
644 | 92 | } |
645 | 92 | cparams_.channel_colors_percent = 0; |
646 | 92 | cparams_.palette_colors = 0; |
647 | 92 | cparams_.lossy_palette = false; |
648 | 92 | } |
649 | | |
650 | | // if few colors, do all-channel palette before trying channel palette |
651 | | // Logic is as follows: |
652 | | // - if you can make a palette with few colors (arbitrary threshold: 200), |
653 | | // then you can also make channel palettes, but they will just be extra |
654 | | // signaling cost for almost no benefit |
655 | | // - if the palette needs more colors, then channel palette might help to |
656 | | // reduce palette signaling cost |
657 | 92 | if (cparams_.palette_colors != 0 && |
658 | 92 | cparams_.speed_tier < SpeedTier::kFalcon) { |
659 | | // all-channel palette (e.g. RGBA) |
660 | 0 | if (gi.channel.size() > 1) { |
661 | 0 | Transform maybe_palette(TransformId::kPalette); |
662 | 0 | maybe_palette.begin_c = gi.nb_meta_channels; |
663 | 0 | maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels; |
664 | 0 | maybe_palette.nb_colors = |
665 | 0 | std::min(std::min(200, (int)(xsize * ysize / 8)), |
666 | 0 | std::abs(cparams_.palette_colors) / 16); |
667 | 0 | maybe_palette.ordered_palette = cparams_.palette_colors >= 0; |
668 | 0 | maybe_palette.lossy_palette = false; |
669 | 0 | do_transform(gi, maybe_palette, weighted::Header(), pool); |
670 | 0 | } |
671 | 0 | } |
672 | | |
673 | | // Global channel palette |
674 | 92 | if (cparams_.channel_colors_pre_transform_percent > 0 && |
675 | 92 | !cparams_.lossy_palette && |
676 | 92 | (cparams_.speed_tier <= SpeedTier::kThunder || |
677 | 0 | (do_color && metadata.bit_depth.bits_per_sample > 8))) { |
678 | | // single channel palette (like FLIF's ChannelCompact) |
679 | 0 | size_t nb_channels = gi.channel.size() - gi.nb_meta_channels; |
680 | 0 | int orig_bitdepth = max_bitdepth; |
681 | 0 | max_bitdepth = 0; |
682 | 0 | for (size_t i = 0; i < nb_channels; i++) { |
683 | 0 | int32_t min, max; |
684 | 0 | compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max); |
685 | 0 | int64_t colors = max - min + 1; |
686 | 0 | JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max); |
687 | 0 | Transform maybe_palette_1(TransformId::kPalette); |
688 | 0 | maybe_palette_1.begin_c = i + gi.nb_meta_channels; |
689 | 0 | maybe_palette_1.num_c = 1; |
690 | | // simple heuristic: if less than X percent of the values in the range |
691 | | // actually occur, it is probably worth it to do a compaction |
692 | | // (but only if the channel palette is less than 6% the size of the |
693 | | // image itself) |
694 | 0 | maybe_palette_1.nb_colors = std::min( |
695 | 0 | (int)(xsize * ysize / 16), |
696 | 0 | (int)(cparams_.channel_colors_pre_transform_percent / 100. * colors)); |
697 | 0 | if (do_transform(gi, maybe_palette_1, weighted::Header(), pool)) { |
698 | | // effective bit depth is lower, adjust quantization accordingly |
699 | 0 | compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max); |
700 | 0 | if (max < maxval) maxval = max; |
701 | 0 | int ch_bitdepth = |
702 | 0 | (max > 0 ? CeilLog2Nonzero(static_cast<uint32_t>(max)) : 0); |
703 | 0 | if (ch_bitdepth > max_bitdepth) max_bitdepth = ch_bitdepth; |
704 | 0 | } else |
705 | 0 | max_bitdepth = orig_bitdepth; |
706 | 0 | } |
707 | 0 | } |
708 | | |
709 | | // Global palette |
710 | 92 | if ((cparams_.palette_colors != 0 || cparams_.lossy_palette) && |
711 | 92 | cparams_.speed_tier < SpeedTier::kFalcon) { |
712 | | // all-channel palette (e.g. RGBA) |
713 | 0 | if (gi.channel.size() - gi.nb_meta_channels > 1) { |
714 | 0 | Transform maybe_palette(TransformId::kPalette); |
715 | 0 | maybe_palette.begin_c = gi.nb_meta_channels; |
716 | 0 | maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels; |
717 | 0 | maybe_palette.nb_colors = |
718 | 0 | std::min((int)(xsize * ysize / 8), std::abs(cparams_.palette_colors)); |
719 | 0 | maybe_palette.ordered_palette = cparams_.palette_colors >= 0; |
720 | 0 | maybe_palette.lossy_palette = |
721 | 0 | (cparams_.lossy_palette && maybe_palette.num_c == 3); |
722 | 0 | if (maybe_palette.lossy_palette) { |
723 | 0 | maybe_palette.predictor = delta_pred_; |
724 | 0 | } |
725 | | // TODO(veluca): use a custom weighted header if using the weighted |
726 | | // predictor. |
727 | 0 | do_transform(gi, maybe_palette, weighted::Header(), pool, |
728 | 0 | cparams_.options.zero_tokens); |
729 | 0 | } |
730 | | // all-minus-one-channel palette (RGB with separate alpha, or CMY with |
731 | | // separate K) |
732 | 0 | if (gi.channel.size() - gi.nb_meta_channels > 3) { |
733 | 0 | Transform maybe_palette_3(TransformId::kPalette); |
734 | 0 | maybe_palette_3.begin_c = gi.nb_meta_channels; |
735 | 0 | maybe_palette_3.num_c = gi.channel.size() - gi.nb_meta_channels - 1; |
736 | 0 | maybe_palette_3.nb_colors = |
737 | 0 | std::min((int)(xsize * ysize / 8), std::abs(cparams_.palette_colors)); |
738 | 0 | maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0; |
739 | 0 | maybe_palette_3.lossy_palette = cparams_.lossy_palette; |
740 | 0 | if (maybe_palette_3.lossy_palette) { |
741 | 0 | maybe_palette_3.predictor = delta_pred_; |
742 | 0 | } |
743 | 0 | do_transform(gi, maybe_palette_3, weighted::Header(), pool, |
744 | 0 | cparams_.options.zero_tokens); |
745 | 0 | } |
746 | 0 | } |
747 | | |
748 | | // don't do an RCT if we're short on bits |
749 | 92 | if (cparams_.color_transform == ColorTransform::kNone && do_color && |
750 | 92 | gi.channel.size() - gi.nb_meta_channels >= 3 && |
751 | 92 | max_bitdepth + 1 < level_max_bitdepth) { |
752 | 0 | if (cparams_.colorspace < 0 && |
753 | 0 | (!cparams_.IsLossless() || cparams_.speed_tier > SpeedTier::kHare)) { |
754 | 0 | Transform ycocg{TransformId::kRCT}; |
755 | 0 | ycocg.rct_type = 6; |
756 | 0 | ycocg.begin_c = gi.nb_meta_channels; |
757 | 0 | do_transform(gi, ycocg, weighted::Header(), pool); |
758 | 0 | max_bitdepth++; |
759 | 0 | } else if (cparams_.colorspace > 0) { |
760 | 0 | Transform sg(TransformId::kRCT); |
761 | 0 | sg.begin_c = gi.nb_meta_channels; |
762 | 0 | sg.rct_type = cparams_.colorspace; |
763 | 0 | do_transform(gi, sg, weighted::Header(), pool); |
764 | 0 | max_bitdepth++; |
765 | 0 | } |
766 | 0 | } |
767 | | |
768 | | // don't do squeeze if we don't have some spare bits |
769 | 92 | if (cparams_.responsive && !gi.channel.empty() && |
770 | 92 | max_bitdepth + 2 < level_max_bitdepth) { |
771 | 0 | Transform t(TransformId::kSqueeze); |
772 | 0 | t.squeezes = cparams_.squeezes; |
773 | 0 | do_transform(gi, t, weighted::Header(), pool); |
774 | 0 | max_bitdepth += 2; |
775 | 0 | } |
776 | | |
777 | 92 | if (max_bitdepth + 1 > level_max_bitdepth) { |
778 | | // force no group RCTs if we don't have a spare bit |
779 | 0 | cparams_.colorspace = 0; |
780 | 0 | } |
781 | 92 | JXL_ASSERT(max_bitdepth <= level_max_bitdepth); |
782 | | |
783 | 92 | std::vector<uint32_t> quants; |
784 | | |
785 | 92 | if (cparams_.butteraugli_distance > 0) { |
786 | 92 | quants.resize(gi.channel.size(), 1); |
787 | 92 | float quality = 0.25f * cparams_.butteraugli_distance; |
788 | 92 | JXL_DEBUG_V(2, |
789 | 92 | "Adding quantization constants corresponding to distance %.3f ", |
790 | 92 | quality); |
791 | 92 | if (!cparams_.responsive) { |
792 | 0 | JXL_DEBUG_V(1, |
793 | 0 | "Warning: lossy compression without Squeeze " |
794 | 0 | "transform is just color quantization."); |
795 | 0 | quality *= 0.1f; |
796 | 0 | } |
797 | 92 | if (cparams_.color_transform != ColorTransform::kXYB) { |
798 | 0 | quality *= maxval / 255.f; |
799 | 0 | } |
800 | 92 | if (cparams_.options.nb_repeats == 0) { |
801 | 0 | return JXL_FAILURE("nb_repeats = 0 not supported with modular lossy!"); |
802 | 0 | } |
803 | 92 | for (uint32_t i = gi.nb_meta_channels; i < gi.channel.size(); i++) { |
804 | 0 | Channel& ch = gi.channel[i]; |
805 | 0 | int shift = ch.hshift + ch.vshift; // number of pixel halvings |
806 | 0 | if (shift > 16) shift = 16; |
807 | 0 | if (shift > 0) shift--; |
808 | 0 | int q; |
809 | | // assuming default Squeeze here |
810 | 0 | int component = |
811 | 0 | (do_color ? 0 : 3) + ((i - gi.nb_meta_channels) % nb_chans); |
812 | | // last 4 channels are final chroma residuals |
813 | 0 | if (nb_chans > 2 && i >= gi.channel.size() - 4 && cparams_.responsive) { |
814 | 0 | component = 1; |
815 | 0 | } |
816 | 0 | if (cparams_.color_transform == ColorTransform::kXYB && component < 3) { |
817 | 0 | q = quality * squeeze_quality_factor_xyb * |
818 | 0 | squeeze_xyb_qtable[component][shift]; |
819 | 0 | } else { |
820 | 0 | if (cparams_.colorspace != 0 && component > 0 && component < 3) { |
821 | 0 | q = quality * squeeze_quality_factor * squeeze_chroma_qtable[shift]; |
822 | 0 | } else { |
823 | 0 | q = quality * squeeze_quality_factor * squeeze_luma_factor * |
824 | 0 | squeeze_luma_qtable[shift]; |
825 | 0 | } |
826 | 0 | } |
827 | 0 | if (q < 1) q = 1; |
828 | 0 | QuantizeChannel(gi.channel[i], q); |
829 | 0 | quants[i] = q; |
830 | 0 | } |
831 | 92 | } |
832 | | |
833 | | // Fill other groups. |
834 | 92 | struct GroupParams { |
835 | 92 | Rect rect; |
836 | 92 | int minShift; |
837 | 92 | int maxShift; |
838 | 92 | ModularStreamId id; |
839 | 92 | }; |
840 | 92 | std::vector<GroupParams> stream_params; |
841 | | |
842 | 92 | stream_options_[0] = cparams_.options; |
843 | | |
844 | | // DC |
845 | 184 | for (size_t group_id = 0; group_id < frame_dim_.num_dc_groups; group_id++) { |
846 | 92 | const size_t gx = group_id % frame_dim_.xsize_dc_groups; |
847 | 92 | const size_t gy = group_id / frame_dim_.xsize_dc_groups; |
848 | 92 | const Rect rect(gx * frame_dim_.dc_group_dim, gy * frame_dim_.dc_group_dim, |
849 | 92 | frame_dim_.dc_group_dim, frame_dim_.dc_group_dim); |
850 | | // minShift==3 because (frame_dim.dc_group_dim >> 3) == frame_dim.group_dim |
851 | | // maxShift==1000 is infinity |
852 | 92 | stream_params.push_back( |
853 | 92 | GroupParams{rect, 3, 1000, ModularStreamId::ModularDC(group_id)}); |
854 | 92 | } |
855 | | // AC global -> nothing. |
856 | | // AC |
857 | 184 | for (size_t group_id = 0; group_id < frame_dim_.num_groups; group_id++) { |
858 | 92 | const size_t gx = group_id % frame_dim_.xsize_groups; |
859 | 92 | const size_t gy = group_id / frame_dim_.xsize_groups; |
860 | 92 | const Rect mrect(gx * frame_dim_.group_dim, gy * frame_dim_.group_dim, |
861 | 92 | frame_dim_.group_dim, frame_dim_.group_dim); |
862 | 184 | for (size_t i = 0; i < enc_state->progressive_splitter.GetNumPasses(); |
863 | 92 | i++) { |
864 | 92 | int maxShift, minShift; |
865 | 92 | frame_header.passes.GetDownsamplingBracket(i, minShift, maxShift); |
866 | 92 | stream_params.push_back(GroupParams{ |
867 | 92 | mrect, minShift, maxShift, ModularStreamId::ModularAC(group_id, i)}); |
868 | 92 | } |
869 | 92 | } |
870 | | // if there's only one group, everything ends up in GlobalModular |
871 | | // in that case, also try RCTs/WP params for the one group |
872 | 92 | if (stream_params.size() == 2) { |
873 | 92 | stream_params.push_back(GroupParams{Rect(0, 0, xsize, ysize), 0, 1000, |
874 | 92 | ModularStreamId::Global()}); |
875 | 92 | } |
876 | 92 | gi_channel_.resize(stream_images_.size()); |
877 | | |
878 | 92 | JXL_RETURN_IF_ERROR(RunOnPool( |
879 | 92 | pool, 0, stream_params.size(), ThreadPool::NoInit, |
880 | 92 | [&](const uint32_t i, size_t /* thread */) { |
881 | 92 | stream_options_[stream_params[i].id.ID(frame_dim_)] = cparams_.options; |
882 | 92 | JXL_CHECK(PrepareStreamParams( |
883 | 92 | stream_params[i].rect, cparams_, stream_params[i].minShift, |
884 | 92 | stream_params[i].maxShift, stream_params[i].id, do_color)); |
885 | 92 | }, |
886 | 92 | "ChooseParams")); |
887 | 92 | { |
888 | | // Clear out channels that have been copied to groups. |
889 | 92 | Image& full_image = stream_images_[0]; |
890 | 92 | size_t c = full_image.nb_meta_channels; |
891 | 92 | for (; c < full_image.channel.size(); c++) { |
892 | 0 | Channel& fc = full_image.channel[c]; |
893 | 0 | if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break; |
894 | 0 | } |
895 | 92 | for (; c < full_image.channel.size(); c++) { |
896 | 0 | full_image.channel[c].plane = ImageI(); |
897 | 0 | } |
898 | 92 | } |
899 | | |
900 | 92 | if (!quants.empty()) { |
901 | 0 | for (uint32_t stream_id = 0; stream_id < stream_images_.size(); |
902 | 0 | stream_id++) { |
903 | | // skip non-modular stream_ids |
904 | 0 | if (stream_id > 0 && gi_channel_[stream_id].empty()) continue; |
905 | 0 | const Image& image = stream_images_[stream_id]; |
906 | 0 | const ModularOptions& options = stream_options_[stream_id]; |
907 | 0 | for (uint32_t i = image.nb_meta_channels; i < image.channel.size(); i++) { |
908 | 0 | if (i >= image.nb_meta_channels && |
909 | 0 | (image.channel[i].w > options.max_chan_size || |
910 | 0 | image.channel[i].h > options.max_chan_size)) { |
911 | 0 | continue; |
912 | 0 | } |
913 | 0 | if (stream_id > 0 && gi_channel_[stream_id].empty()) continue; |
914 | 0 | size_t ch_id = stream_id == 0 |
915 | 0 | ? i |
916 | 0 | : gi_channel_[stream_id][i - image.nb_meta_channels]; |
917 | 0 | uint32_t q = quants[ch_id]; |
918 | | // Inform the tree splitting heuristics that each channel in each group |
919 | | // used this quantization factor. This will produce a tree with the |
920 | | // given multipliers. |
921 | 0 | if (multiplier_info_.empty() || |
922 | 0 | multiplier_info_.back().range[1][0] != stream_id || |
923 | 0 | multiplier_info_.back().multiplier != q) { |
924 | 0 | StaticPropRange range; |
925 | 0 | range[0] = {{i, i + 1}}; |
926 | 0 | range[1] = {{stream_id, stream_id + 1}}; |
927 | 0 | multiplier_info_.push_back({range, (uint32_t)q}); |
928 | 0 | } else { |
929 | | // Previous channel in the same group had the same quantization |
930 | | // factor. Don't provide two different ranges, as that creates |
931 | | // unnecessary nodes. |
932 | 0 | multiplier_info_.back().range[0][1] = i + 1; |
933 | 0 | } |
934 | 0 | } |
935 | 0 | } |
936 | | // Merge group+channel settings that have the same channels and quantization |
937 | | // factors, to avoid unnecessary nodes. |
938 | 0 | std::sort(multiplier_info_.begin(), multiplier_info_.end(), |
939 | 0 | [](ModularMultiplierInfo a, ModularMultiplierInfo b) { |
940 | 0 | return std::make_tuple(a.range, a.multiplier) < |
941 | 0 | std::make_tuple(b.range, b.multiplier); |
942 | 0 | }); |
943 | 0 | size_t new_num = 1; |
944 | 0 | for (size_t i = 1; i < multiplier_info_.size(); i++) { |
945 | 0 | ModularMultiplierInfo& prev = multiplier_info_[new_num - 1]; |
946 | 0 | ModularMultiplierInfo& cur = multiplier_info_[i]; |
947 | 0 | if (prev.range[0] == cur.range[0] && prev.multiplier == cur.multiplier && |
948 | 0 | prev.range[1][1] == cur.range[1][0]) { |
949 | 0 | prev.range[1][1] = cur.range[1][1]; |
950 | 0 | } else { |
951 | 0 | multiplier_info_[new_num++] = multiplier_info_[i]; |
952 | 0 | } |
953 | 0 | } |
954 | 0 | multiplier_info_.resize(new_num); |
955 | 0 | } |
956 | | |
957 | 92 | JXL_RETURN_IF_ERROR(ValidateChannelDimensions(gi, stream_options_[0])); |
958 | | |
959 | 92 | return PrepareEncoding(frame_header, pool, enc_state->heuristics.get(), |
960 | 92 | aux_out); |
961 | 92 | } |
962 | | |
963 | | Status ModularFrameEncoder::PrepareEncoding(const FrameHeader& frame_header, |
964 | | ThreadPool* pool, |
965 | | EncoderHeuristics* heuristics, |
966 | 92 | AuxOut* aux_out) { |
967 | 92 | if (!tree_.empty()) return true; |
968 | | |
969 | | // Compute tree. |
970 | 92 | size_t num_streams = stream_images_.size(); |
971 | 92 | stream_headers_.resize(num_streams); |
972 | 92 | tokens_.resize(num_streams); |
973 | | |
974 | 92 | if (heuristics->CustomFixedTreeLossless(frame_dim_, &tree_)) { |
975 | | // Using a fixed tree. |
976 | 92 | } else if (cparams_.speed_tier < SpeedTier::kFalcon || |
977 | 92 | !cparams_.modular_mode) { |
978 | | // Avoid creating a tree with leaves that don't correspond to any pixels. |
979 | 92 | std::vector<size_t> useful_splits; |
980 | 92 | useful_splits.reserve(tree_splits_.size()); |
981 | 644 | for (size_t chunk = 0; chunk < tree_splits_.size() - 1; chunk++) { |
982 | 552 | bool has_pixels = false; |
983 | 552 | size_t start = tree_splits_[chunk]; |
984 | 552 | size_t stop = tree_splits_[chunk + 1]; |
985 | 2.57k | for (size_t i = start; i < stop; i++) { |
986 | 2.02k | if (!stream_images_[i].empty()) has_pixels = true; |
987 | 2.02k | } |
988 | 552 | if (has_pixels) { |
989 | 184 | useful_splits.push_back(tree_splits_[chunk]); |
990 | 184 | } |
991 | 552 | } |
992 | | // Don't do anything if modular mode does not have any pixels in this image |
993 | 92 | if (useful_splits.empty()) return true; |
994 | 92 | useful_splits.push_back(tree_splits_.back()); |
995 | | |
996 | 92 | std::atomic_flag invalid_force_wp = ATOMIC_FLAG_INIT; |
997 | | |
998 | 92 | std::vector<Tree> trees(useful_splits.size() - 1); |
999 | 92 | JXL_RETURN_IF_ERROR(RunOnPool( |
1000 | 92 | pool, 0, useful_splits.size() - 1, ThreadPool::NoInit, |
1001 | 92 | [&](const uint32_t chunk, size_t /* thread */) { |
1002 | | // TODO(veluca): parallelize more. |
1003 | 92 | size_t total_pixels = 0; |
1004 | 92 | uint32_t start = useful_splits[chunk]; |
1005 | 92 | uint32_t stop = useful_splits[chunk + 1]; |
1006 | 92 | while (start < stop && stream_images_[start].empty()) ++start; |
1007 | 92 | while (start < stop && stream_images_[stop - 1].empty()) --stop; |
1008 | 92 | uint32_t max_c = 0; |
1009 | 92 | if (stream_options_[start].tree_kind != |
1010 | 92 | ModularOptions::TreeKind::kLearn) { |
1011 | 92 | for (size_t i = start; i < stop; i++) { |
1012 | 92 | for (const Channel& ch : stream_images_[i].channel) { |
1013 | 92 | total_pixels += ch.w * ch.h; |
1014 | 92 | } |
1015 | 92 | } |
1016 | 92 | trees[chunk] = |
1017 | 92 | PredefinedTree(stream_options_[start].tree_kind, total_pixels); |
1018 | 92 | return; |
1019 | 92 | } |
1020 | 92 | TreeSamples tree_samples; |
1021 | 92 | if (!tree_samples.SetPredictor(stream_options_[start].predictor, |
1022 | 92 | stream_options_[start].wp_tree_mode)) { |
1023 | 92 | invalid_force_wp.test_and_set(std::memory_order_acq_rel); |
1024 | 92 | return; |
1025 | 92 | } |
1026 | 92 | if (!tree_samples.SetProperties( |
1027 | 92 | stream_options_[start].splitting_heuristics_properties, |
1028 | 92 | stream_options_[start].wp_tree_mode)) { |
1029 | 92 | invalid_force_wp.test_and_set(std::memory_order_acq_rel); |
1030 | 92 | return; |
1031 | 92 | } |
1032 | 92 | std::vector<pixel_type> pixel_samples; |
1033 | 92 | std::vector<pixel_type> diff_samples; |
1034 | 92 | std::vector<uint32_t> group_pixel_count; |
1035 | 92 | std::vector<uint32_t> channel_pixel_count; |
1036 | 92 | for (size_t i = start; i < stop; i++) { |
1037 | 92 | max_c = std::max<uint32_t>(stream_images_[i].channel.size(), max_c); |
1038 | 92 | CollectPixelSamples(stream_images_[i], stream_options_[i], i, |
1039 | 92 | group_pixel_count, channel_pixel_count, |
1040 | 92 | pixel_samples, diff_samples); |
1041 | 92 | } |
1042 | 92 | StaticPropRange range; |
1043 | 92 | range[0] = {{0, max_c}}; |
1044 | 92 | range[1] = {{start, stop}}; |
1045 | 92 | auto local_multiplier_info = multiplier_info_; |
1046 | | |
1047 | 92 | tree_samples.PreQuantizeProperties( |
1048 | 92 | range, local_multiplier_info, group_pixel_count, |
1049 | 92 | channel_pixel_count, pixel_samples, diff_samples, |
1050 | 92 | stream_options_[start].max_property_values); |
1051 | 92 | for (size_t i = start; i < stop; i++) { |
1052 | 92 | JXL_CHECK(ModularGenericCompress( |
1053 | 92 | stream_images_[i], stream_options_[i], /*writer=*/nullptr, |
1054 | 92 | /*aux_out=*/nullptr, 0, i, &tree_samples, &total_pixels)); |
1055 | 92 | } |
1056 | | |
1057 | | // TODO(veluca): parallelize more. |
1058 | 92 | trees[chunk] = |
1059 | 92 | LearnTree(std::move(tree_samples), total_pixels, |
1060 | 92 | stream_options_[start], local_multiplier_info, range); |
1061 | 92 | }, |
1062 | 92 | "LearnTrees")); |
1063 | 92 | if (invalid_force_wp.test_and_set(std::memory_order_acq_rel)) { |
1064 | 0 | return JXL_FAILURE("PrepareEncoding: force_no_wp with {Weighted}"); |
1065 | 0 | } |
1066 | 92 | tree_.clear(); |
1067 | 92 | MergeTrees(trees, useful_splits, 0, useful_splits.size() - 1, &tree_); |
1068 | 92 | } else { |
1069 | | // Fixed tree. |
1070 | 0 | size_t total_pixels = 0; |
1071 | 0 | for (const Image& img : stream_images_) { |
1072 | 0 | for (const Channel& ch : img.channel) { |
1073 | 0 | total_pixels += ch.w * ch.h; |
1074 | 0 | } |
1075 | 0 | } |
1076 | 0 | if (cparams_.speed_tier <= SpeedTier::kFalcon) { |
1077 | 0 | tree_ = |
1078 | 0 | PredefinedTree(ModularOptions::TreeKind::kWPFixedDC, total_pixels); |
1079 | 0 | } else if (cparams_.speed_tier <= SpeedTier::kThunder) { |
1080 | 0 | tree_ = PredefinedTree(ModularOptions::TreeKind::kGradientFixedDC, |
1081 | 0 | total_pixels); |
1082 | 0 | } else { |
1083 | 0 | tree_ = {PropertyDecisionNode::Leaf(Predictor::Gradient)}; |
1084 | 0 | } |
1085 | 0 | } |
1086 | 92 | tree_tokens_.resize(1); |
1087 | 92 | tree_tokens_[0].clear(); |
1088 | 92 | Tree decoded_tree; |
1089 | 92 | TokenizeTree(tree_, &tree_tokens_[0], &decoded_tree); |
1090 | 92 | JXL_ASSERT(tree_.size() == decoded_tree.size()); |
1091 | 92 | tree_ = std::move(decoded_tree); |
1092 | | |
1093 | 92 | if (WantDebugOutput(aux_out)) { |
1094 | 0 | if (frame_header.dc_level > 0) { |
1095 | 0 | PrintTree(tree_, aux_out->debug_prefix + "/dc_frame_level" + |
1096 | 0 | std::to_string(frame_header.dc_level) + "_tree"); |
1097 | 0 | } else { |
1098 | 0 | PrintTree(tree_, aux_out->debug_prefix + "/global_tree"); |
1099 | 0 | } |
1100 | 0 | } |
1101 | | |
1102 | 92 | image_widths_.resize(num_streams); |
1103 | 92 | JXL_RETURN_IF_ERROR(RunOnPool( |
1104 | 92 | pool, 0, num_streams, ThreadPool::NoInit, |
1105 | 92 | [&](const uint32_t stream_id, size_t /* thread */) { |
1106 | 92 | AuxOut my_aux_out; |
1107 | 92 | if (aux_out) { |
1108 | 92 | my_aux_out.dump_image = aux_out->dump_image; |
1109 | 92 | my_aux_out.debug_prefix = aux_out->debug_prefix; |
1110 | 92 | } |
1111 | 92 | tokens_[stream_id].clear(); |
1112 | 92 | JXL_CHECK(ModularGenericCompress( |
1113 | 92 | stream_images_[stream_id], stream_options_[stream_id], |
1114 | 92 | /*writer=*/nullptr, &my_aux_out, 0, stream_id, |
1115 | 92 | /*tree_samples=*/nullptr, |
1116 | 92 | /*total_pixels=*/nullptr, |
1117 | 92 | /*tree=*/&tree_, /*header=*/&stream_headers_[stream_id], |
1118 | 92 | /*tokens=*/&tokens_[stream_id], |
1119 | 92 | /*widths=*/&image_widths_[stream_id])); |
1120 | 92 | }, |
1121 | 92 | "ComputeTokens")); |
1122 | 92 | return true; |
1123 | 92 | } |
1124 | | |
1125 | | Status ModularFrameEncoder::EncodeGlobalInfo(BitWriter* writer, |
1126 | 92 | AuxOut* aux_out) { |
1127 | 92 | BitWriter::Allotment allotment(writer, 1); |
1128 | | // If we are using brotli, or not using modular mode. |
1129 | 92 | if (tree_tokens_.empty() || tree_tokens_[0].empty()) { |
1130 | 0 | writer->Write(1, 0); |
1131 | 0 | ReclaimAndCharge(writer, &allotment, kLayerModularTree, aux_out); |
1132 | 0 | return true; |
1133 | 0 | } |
1134 | 92 | writer->Write(1, 1); |
1135 | 92 | ReclaimAndCharge(writer, &allotment, kLayerModularTree, aux_out); |
1136 | | |
1137 | | // Write tree |
1138 | 92 | HistogramParams params; |
1139 | 92 | if (cparams_.speed_tier > SpeedTier::kKitten) { |
1140 | 92 | params.clustering = HistogramParams::ClusteringType::kFast; |
1141 | 92 | params.ans_histogram_strategy = |
1142 | 92 | cparams_.speed_tier > SpeedTier::kThunder |
1143 | 92 | ? HistogramParams::ANSHistogramStrategy::kFast |
1144 | 92 | : HistogramParams::ANSHistogramStrategy::kApproximate; |
1145 | 92 | params.lz77_method = |
1146 | 92 | cparams_.decoding_speed_tier >= 3 && cparams_.modular_mode |
1147 | 92 | ? (cparams_.speed_tier >= SpeedTier::kFalcon |
1148 | 0 | ? HistogramParams::LZ77Method::kRLE |
1149 | 0 | : HistogramParams::LZ77Method::kLZ77) |
1150 | 92 | : HistogramParams::LZ77Method::kNone; |
1151 | | // Near-lossless DC, as well as modular mode, require choosing hybrid uint |
1152 | | // more carefully. |
1153 | 92 | if ((!extra_dc_precision.empty() && extra_dc_precision[0] != 0) || |
1154 | 92 | (cparams_.modular_mode && cparams_.speed_tier < SpeedTier::kCheetah)) { |
1155 | 0 | params.uint_method = HistogramParams::HybridUintMethod::kFast; |
1156 | 92 | } else { |
1157 | 92 | params.uint_method = HistogramParams::HybridUintMethod::kNone; |
1158 | 92 | } |
1159 | 92 | } else if (cparams_.speed_tier <= SpeedTier::kTortoise) { |
1160 | 0 | params.lz77_method = HistogramParams::LZ77Method::kOptimal; |
1161 | 0 | } else { |
1162 | 0 | params.lz77_method = HistogramParams::LZ77Method::kLZ77; |
1163 | 0 | } |
1164 | 92 | if (cparams_.decoding_speed_tier >= 1) { |
1165 | 0 | params.max_histograms = 12; |
1166 | 0 | } |
1167 | 92 | if (cparams_.decoding_speed_tier >= 1 && cparams_.responsive) { |
1168 | 0 | params.lz77_method = cparams_.speed_tier >= SpeedTier::kCheetah |
1169 | 0 | ? HistogramParams::LZ77Method::kRLE |
1170 | 0 | : cparams_.speed_tier >= SpeedTier::kKitten |
1171 | 0 | ? HistogramParams::LZ77Method::kLZ77 |
1172 | 0 | : HistogramParams::LZ77Method::kOptimal; |
1173 | 0 | } |
1174 | 92 | if (cparams_.decoding_speed_tier >= 2 && cparams_.responsive) { |
1175 | 0 | params.uint_method = HistogramParams::HybridUintMethod::k000; |
1176 | 0 | params.force_huffman = true; |
1177 | 0 | } |
1178 | 92 | BuildAndEncodeHistograms(params, kNumTreeContexts, tree_tokens_, &code_, |
1179 | 92 | &context_map_, writer, kLayerModularTree, aux_out); |
1180 | 92 | WriteTokens(tree_tokens_[0], code_, context_map_, writer, kLayerModularTree, |
1181 | 92 | aux_out); |
1182 | 92 | params.image_widths = image_widths_; |
1183 | | // Write histograms. |
1184 | 92 | BuildAndEncodeHistograms(params, (tree_.size() + 1) / 2, tokens_, &code_, |
1185 | 92 | &context_map_, writer, kLayerModularGlobal, aux_out); |
1186 | 92 | return true; |
1187 | 92 | } |
1188 | | |
1189 | | Status ModularFrameEncoder::EncodeStream(BitWriter* writer, AuxOut* aux_out, |
1190 | | size_t layer, |
1191 | 460 | const ModularStreamId& stream) { |
1192 | 460 | size_t stream_id = stream.ID(frame_dim_); |
1193 | 460 | if (stream_images_[stream_id].channel.empty()) { |
1194 | 276 | return true; // Image with no channels, header never gets decoded. |
1195 | 276 | } |
1196 | 184 | JXL_RETURN_IF_ERROR( |
1197 | 184 | Bundle::Write(stream_headers_[stream_id], writer, layer, aux_out)); |
1198 | 184 | WriteTokens(tokens_[stream_id], code_, context_map_, writer, layer, aux_out); |
1199 | 184 | return true; |
1200 | 184 | } |
1201 | | |
1202 | | namespace { |
1203 | 0 | float EstimateWPCost(const Image& img, size_t i) { |
1204 | 0 | size_t extra_bits = 0; |
1205 | 0 | float histo_cost = 0; |
1206 | 0 | HybridUintConfig config; |
1207 | 0 | int32_t cutoffs[] = {-500, -392, -255, -191, -127, -95, -63, -47, -31, |
1208 | 0 | -23, -15, -11, -7, -4, -3, -1, 0, 1, |
1209 | 0 | 3, 5, 7, 11, 15, 23, 31, 47, 63, |
1210 | 0 | 95, 127, 191, 255, 392, 500}; |
1211 | 0 | constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1; |
1212 | 0 | Histogram histo[nc] = {}; |
1213 | 0 | weighted::Header wp_header; |
1214 | 0 | PredictorMode(i, &wp_header); |
1215 | 0 | for (const Channel& ch : img.channel) { |
1216 | 0 | const intptr_t onerow = ch.plane.PixelsPerRow(); |
1217 | 0 | weighted::State wp_state(wp_header, ch.w, ch.h); |
1218 | 0 | Properties properties(1); |
1219 | 0 | for (size_t y = 0; y < ch.h; y++) { |
1220 | 0 | const pixel_type* JXL_RESTRICT r = ch.Row(y); |
1221 | 0 | for (size_t x = 0; x < ch.w; x++) { |
1222 | 0 | size_t offset = 0; |
1223 | 0 | pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); |
1224 | 0 | pixel_type_w top = (y ? *(r + x - onerow) : left); |
1225 | 0 | pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); |
1226 | 0 | pixel_type_w topright = |
1227 | 0 | (x + 1 < ch.w && y ? *(r + x + 1 - onerow) : top); |
1228 | 0 | pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top); |
1229 | 0 | pixel_type guess = wp_state.Predict</*compute_properties=*/true>( |
1230 | 0 | x, y, ch.w, top, left, topright, topleft, toptop, &properties, |
1231 | 0 | offset); |
1232 | 0 | size_t ctx = 0; |
1233 | 0 | for (int c : cutoffs) { |
1234 | 0 | ctx += c >= properties[0]; |
1235 | 0 | } |
1236 | 0 | pixel_type res = r[x] - guess; |
1237 | 0 | uint32_t token, nbits, bits; |
1238 | 0 | config.Encode(PackSigned(res), &token, &nbits, &bits); |
1239 | 0 | histo[ctx].Add(token); |
1240 | 0 | extra_bits += nbits; |
1241 | 0 | wp_state.UpdateErrors(r[x], x, y, ch.w); |
1242 | 0 | } |
1243 | 0 | } |
1244 | 0 | for (size_t h = 0; h < nc; h++) { |
1245 | 0 | histo_cost += histo[h].ShannonEntropy(); |
1246 | 0 | histo[h].Clear(); |
1247 | 0 | } |
1248 | 0 | } |
1249 | 0 | return histo_cost + extra_bits; |
1250 | 0 | } |
1251 | | |
1252 | 0 | float EstimateCost(const Image& img) { |
1253 | | // TODO(veluca): consider SIMDfication of this code. |
1254 | 0 | size_t extra_bits = 0; |
1255 | 0 | float histo_cost = 0; |
1256 | 0 | HybridUintConfig config; |
1257 | 0 | uint32_t cutoffs[] = {0, 1, 3, 5, 7, 11, 15, 23, 31, |
1258 | 0 | 47, 63, 95, 127, 191, 255, 392, 500}; |
1259 | 0 | constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1; |
1260 | 0 | Histogram histo[nc] = {}; |
1261 | 0 | for (const Channel& ch : img.channel) { |
1262 | 0 | const intptr_t onerow = ch.plane.PixelsPerRow(); |
1263 | 0 | for (size_t y = 0; y < ch.h; y++) { |
1264 | 0 | const pixel_type* JXL_RESTRICT r = ch.Row(y); |
1265 | 0 | for (size_t x = 0; x < ch.w; x++) { |
1266 | 0 | pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); |
1267 | 0 | pixel_type_w top = (y ? *(r + x - onerow) : left); |
1268 | 0 | pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); |
1269 | 0 | size_t maxdiff = std::max(std::max(left, top), topleft) - |
1270 | 0 | std::min(std::min(left, top), topleft); |
1271 | 0 | size_t ctx = 0; |
1272 | 0 | for (uint32_t c : cutoffs) { |
1273 | 0 | ctx += c > maxdiff; |
1274 | 0 | } |
1275 | 0 | pixel_type res = r[x] - ClampedGradient(top, left, topleft); |
1276 | 0 | uint32_t token, nbits, bits; |
1277 | 0 | config.Encode(PackSigned(res), &token, &nbits, &bits); |
1278 | 0 | histo[ctx].Add(token); |
1279 | 0 | extra_bits += nbits; |
1280 | 0 | } |
1281 | 0 | } |
1282 | 0 | for (size_t h = 0; h < nc; h++) { |
1283 | 0 | histo_cost += histo[h].ShannonEntropy(); |
1284 | 0 | histo[h].Clear(); |
1285 | 0 | } |
1286 | 0 | } |
1287 | 0 | return histo_cost + extra_bits; |
1288 | 0 | } |
1289 | | |
1290 | | } // namespace |
1291 | | |
1292 | | Status ModularFrameEncoder::PrepareStreamParams(const Rect& rect, |
1293 | | const CompressParams& cparams_, |
1294 | | int minShift, int maxShift, |
1295 | | const ModularStreamId& stream, |
1296 | 276 | bool do_color) { |
1297 | 276 | size_t stream_id = stream.ID(frame_dim_); |
1298 | 276 | Image& full_image = stream_images_[0]; |
1299 | 276 | const size_t xsize = rect.xsize(); |
1300 | 276 | const size_t ysize = rect.ysize(); |
1301 | 276 | Image& gi = stream_images_[stream_id]; |
1302 | 276 | if (stream_id > 0) { |
1303 | 184 | gi = Image(xsize, ysize, full_image.bitdepth, 0); |
1304 | | // start at the first bigger-than-frame_dim.group_dim non-metachannel |
1305 | 184 | size_t c = full_image.nb_meta_channels; |
1306 | 184 | for (; c < full_image.channel.size(); c++) { |
1307 | 0 | Channel& fc = full_image.channel[c]; |
1308 | 0 | if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break; |
1309 | 0 | } |
1310 | 184 | for (; c < full_image.channel.size(); c++) { |
1311 | 0 | Channel& fc = full_image.channel[c]; |
1312 | 0 | int shift = std::min(fc.hshift, fc.vshift); |
1313 | 0 | if (shift > maxShift) continue; |
1314 | 0 | if (shift < minShift) continue; |
1315 | 0 | Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift, |
1316 | 0 | rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h); |
1317 | 0 | if (r.xsize() == 0 || r.ysize() == 0) continue; |
1318 | 0 | gi_channel_[stream_id].push_back(c); |
1319 | 0 | Channel gc(r.xsize(), r.ysize()); |
1320 | 0 | gc.hshift = fc.hshift; |
1321 | 0 | gc.vshift = fc.vshift; |
1322 | 0 | for (size_t y = 0; y < r.ysize(); ++y) { |
1323 | 0 | memcpy(gc.Row(y), r.ConstRow(fc.plane, y), |
1324 | 0 | r.xsize() * sizeof(pixel_type)); |
1325 | 0 | } |
1326 | 0 | gi.channel.emplace_back(std::move(gc)); |
1327 | 0 | } |
1328 | | |
1329 | 184 | if (gi.channel.empty()) return true; |
1330 | | // Do some per-group transforms |
1331 | | |
1332 | | // Local palette |
1333 | | // TODO(veluca): make this work with quantize-after-prediction in lossy |
1334 | | // mode. |
1335 | 0 | if (cparams_.butteraugli_distance == 0.f && cparams_.palette_colors != 0 && |
1336 | 0 | cparams_.speed_tier < SpeedTier::kCheetah) { |
1337 | | // all-channel palette (e.g. RGBA) |
1338 | 0 | if (gi.channel.size() - gi.nb_meta_channels > 1) { |
1339 | 0 | Transform maybe_palette(TransformId::kPalette); |
1340 | 0 | maybe_palette.begin_c = gi.nb_meta_channels; |
1341 | 0 | maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels; |
1342 | 0 | maybe_palette.nb_colors = std::abs(cparams_.palette_colors); |
1343 | 0 | maybe_palette.ordered_palette = cparams_.palette_colors >= 0; |
1344 | 0 | do_transform(gi, maybe_palette, weighted::Header()); |
1345 | 0 | } |
1346 | | // all-minus-one-channel palette (RGB with separate alpha, or CMY with |
1347 | | // separate K) |
1348 | 0 | if (gi.channel.size() - gi.nb_meta_channels > 3) { |
1349 | 0 | Transform maybe_palette_3(TransformId::kPalette); |
1350 | 0 | maybe_palette_3.begin_c = gi.nb_meta_channels; |
1351 | 0 | maybe_palette_3.num_c = gi.channel.size() - gi.nb_meta_channels - 1; |
1352 | 0 | maybe_palette_3.nb_colors = std::abs(cparams_.palette_colors); |
1353 | 0 | maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0; |
1354 | 0 | maybe_palette_3.lossy_palette = cparams_.lossy_palette; |
1355 | 0 | if (maybe_palette_3.lossy_palette) { |
1356 | 0 | maybe_palette_3.predictor = Predictor::Weighted; |
1357 | 0 | } |
1358 | 0 | do_transform(gi, maybe_palette_3, weighted::Header()); |
1359 | 0 | } |
1360 | 0 | } |
1361 | | |
1362 | | // Local channel palette |
1363 | 0 | if (cparams_.channel_colors_percent > 0 && |
1364 | 0 | cparams_.butteraugli_distance == 0.f && !cparams_.lossy_palette && |
1365 | 0 | cparams_.speed_tier < SpeedTier::kCheetah && |
1366 | 0 | !(cparams_.responsive && cparams_.decoding_speed_tier >= 1)) { |
1367 | | // single channel palette (like FLIF's ChannelCompact) |
1368 | 0 | size_t nb_channels = gi.channel.size() - gi.nb_meta_channels; |
1369 | 0 | for (size_t i = 0; i < nb_channels; i++) { |
1370 | 0 | int32_t min, max; |
1371 | 0 | compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max); |
1372 | 0 | int colors = max - min + 1; |
1373 | 0 | JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max); |
1374 | 0 | Transform maybe_palette_1(TransformId::kPalette); |
1375 | 0 | maybe_palette_1.begin_c = i + gi.nb_meta_channels; |
1376 | 0 | maybe_palette_1.num_c = 1; |
1377 | | // simple heuristic: if less than X percent of the values in the range |
1378 | | // actually occur, it is probably worth it to do a compaction |
1379 | | // (but only if the channel palette is less than 80% the size of the |
1380 | | // image itself) |
1381 | 0 | maybe_palette_1.nb_colors = |
1382 | 0 | std::min((int)(xsize * ysize * 0.8), |
1383 | 0 | (int)(cparams_.channel_colors_percent / 100. * colors)); |
1384 | 0 | do_transform(gi, maybe_palette_1, weighted::Header()); |
1385 | 0 | } |
1386 | 0 | } |
1387 | 0 | } |
1388 | | |
1389 | | // lossless and no specific color transform specified: try Nothing, YCoCg, |
1390 | | // and 17 RCTs |
1391 | 92 | if (cparams_.color_transform == ColorTransform::kNone && |
1392 | 92 | cparams_.IsLossless() && cparams_.colorspace < 0 && |
1393 | 92 | gi.channel.size() - gi.nb_meta_channels >= 3 && |
1394 | 92 | cparams_.responsive == false && do_color && |
1395 | 92 | cparams_.speed_tier <= SpeedTier::kHare) { |
1396 | 0 | Transform sg(TransformId::kRCT); |
1397 | 0 | sg.begin_c = gi.nb_meta_channels; |
1398 | 0 | size_t nb_rcts_to_try = 0; |
1399 | 0 | switch (cparams_.speed_tier) { |
1400 | 0 | case SpeedTier::kLightning: |
1401 | 0 | case SpeedTier::kThunder: |
1402 | 0 | case SpeedTier::kFalcon: |
1403 | 0 | case SpeedTier::kCheetah: |
1404 | 0 | nb_rcts_to_try = 0; // Just do global YCoCg |
1405 | 0 | break; |
1406 | 0 | case SpeedTier::kHare: |
1407 | 0 | nb_rcts_to_try = 4; |
1408 | 0 | break; |
1409 | 0 | case SpeedTier::kWombat: |
1410 | 0 | nb_rcts_to_try = 5; |
1411 | 0 | break; |
1412 | 0 | case SpeedTier::kSquirrel: |
1413 | 0 | nb_rcts_to_try = 7; |
1414 | 0 | break; |
1415 | 0 | case SpeedTier::kKitten: |
1416 | 0 | nb_rcts_to_try = 9; |
1417 | 0 | break; |
1418 | 0 | case SpeedTier::kTortoise: |
1419 | 0 | nb_rcts_to_try = 19; |
1420 | 0 | break; |
1421 | 0 | } |
1422 | 0 | float best_cost = std::numeric_limits<float>::max(); |
1423 | 0 | size_t best_rct = 0; |
1424 | | // These should be 19 actually different transforms; the remaining ones |
1425 | | // are equivalent to one of these (note that the first two are do-nothing |
1426 | | // and YCoCg) modulo channel reordering (which only matters in the case of |
1427 | | // MA-with-prev-channels-properties) and/or sign (e.g. RmG vs GmR) |
1428 | 0 | for (int i : {0 * 7 + 0, 0 * 7 + 6, 0 * 7 + 5, 1 * 7 + 3, 3 * 7 + 5, |
1429 | 0 | 5 * 7 + 5, 1 * 7 + 5, 2 * 7 + 5, 1 * 7 + 1, 0 * 7 + 4, |
1430 | 0 | 1 * 7 + 2, 2 * 7 + 1, 2 * 7 + 2, 2 * 7 + 3, 4 * 7 + 4, |
1431 | 0 | 4 * 7 + 5, 0 * 7 + 2, 0 * 7 + 1, 0 * 7 + 3}) { |
1432 | 0 | if (nb_rcts_to_try == 0) break; |
1433 | 0 | sg.rct_type = i; |
1434 | 0 | nb_rcts_to_try--; |
1435 | 0 | if (do_transform(gi, sg, weighted::Header())) { |
1436 | 0 | float cost = EstimateCost(gi); |
1437 | 0 | if (cost < best_cost) { |
1438 | 0 | best_rct = i; |
1439 | 0 | best_cost = cost; |
1440 | 0 | } |
1441 | 0 | Transform t = gi.transform.back(); |
1442 | 0 | JXL_RETURN_IF_ERROR(t.Inverse(gi, weighted::Header(), nullptr)); |
1443 | 0 | gi.transform.pop_back(); |
1444 | 0 | } |
1445 | 0 | } |
1446 | | // Apply the best RCT to the image for future encoding. |
1447 | 0 | sg.rct_type = best_rct; |
1448 | 0 | do_transform(gi, sg, weighted::Header()); |
1449 | 92 | } else { |
1450 | | // No need to try anything, just use the default options. |
1451 | 92 | } |
1452 | 92 | size_t nb_wp_modes = 1; |
1453 | 92 | if (cparams_.speed_tier <= SpeedTier::kTortoise) { |
1454 | 0 | nb_wp_modes = 5; |
1455 | 92 | } else if (cparams_.speed_tier <= SpeedTier::kKitten) { |
1456 | 0 | nb_wp_modes = 2; |
1457 | 0 | } |
1458 | 92 | if (nb_wp_modes > 1 && |
1459 | 92 | (stream_options_[stream_id].predictor == Predictor::Weighted || |
1460 | 0 | stream_options_[stream_id].predictor == Predictor::Best || |
1461 | 0 | stream_options_[stream_id].predictor == Predictor::Variable)) { |
1462 | 0 | float best_cost = std::numeric_limits<float>::max(); |
1463 | 0 | stream_options_[stream_id].wp_mode = 0; |
1464 | 0 | for (size_t i = 0; i < nb_wp_modes; i++) { |
1465 | 0 | float cost = EstimateWPCost(gi, i); |
1466 | 0 | if (cost < best_cost) { |
1467 | 0 | best_cost = cost; |
1468 | 0 | stream_options_[stream_id].wp_mode = i; |
1469 | 0 | } |
1470 | 0 | } |
1471 | 0 | } |
1472 | 92 | return true; |
1473 | 92 | } |
1474 | | |
1475 | | constexpr float q_deadzone = 0.62f; |
1476 | | int QuantizeWP(const int32_t* qrow, size_t onerow, size_t c, size_t x, size_t y, |
1477 | | size_t w, weighted::State* wp_state, float value, |
1478 | 0 | float inv_factor) { |
1479 | 0 | float svalue = value * inv_factor; |
1480 | 0 | PredictionResult pred = |
1481 | 0 | PredictNoTreeWP(w, qrow + x, onerow, x, y, Predictor::Weighted, wp_state); |
1482 | 0 | svalue -= pred.guess; |
1483 | 0 | if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0; |
1484 | 0 | int residual = roundf(svalue); |
1485 | 0 | if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2; |
1486 | 0 | return residual + pred.guess; |
1487 | 0 | } |
1488 | | |
1489 | | int QuantizeGradient(const int32_t* qrow, size_t onerow, size_t c, size_t x, |
1490 | 0 | size_t y, size_t w, float value, float inv_factor) { |
1491 | 0 | float svalue = value * inv_factor; |
1492 | 0 | PredictionResult pred = |
1493 | 0 | PredictNoTreeNoWP(w, qrow + x, onerow, x, y, Predictor::Gradient); |
1494 | 0 | svalue -= pred.guess; |
1495 | 0 | if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0; |
1496 | 0 | int residual = roundf(svalue); |
1497 | 0 | if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2; |
1498 | 0 | return residual + pred.guess; |
1499 | 0 | } |
1500 | | |
1501 | | void ModularFrameEncoder::AddVarDCTDC(const Image3F& dc, size_t group_index, |
1502 | | bool nl_dc, PassesEncoderState* enc_state, |
1503 | 92 | bool jpeg_transcode) { |
1504 | 92 | const Rect r = enc_state->shared.DCGroupRect(group_index); |
1505 | 92 | extra_dc_precision[group_index] = nl_dc ? 1 : 0; |
1506 | 92 | float mul = 1 << extra_dc_precision[group_index]; |
1507 | | |
1508 | 92 | size_t stream_id = ModularStreamId::VarDCTDC(group_index).ID(frame_dim_); |
1509 | 92 | stream_options_[stream_id].max_chan_size = 0xFFFFFF; |
1510 | 92 | stream_options_[stream_id].predictor = Predictor::Weighted; |
1511 | 92 | stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kWPOnly; |
1512 | 92 | if (cparams_.speed_tier >= SpeedTier::kSquirrel) { |
1513 | 92 | stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kWPFixedDC; |
1514 | 92 | } |
1515 | 92 | if (cparams_.speed_tier < SpeedTier::kSquirrel && !nl_dc) { |
1516 | 0 | stream_options_[stream_id].predictor = |
1517 | 0 | (cparams_.speed_tier < SpeedTier::kKitten ? Predictor::Variable |
1518 | 0 | : Predictor::Best); |
1519 | 0 | stream_options_[stream_id].wp_tree_mode = |
1520 | 0 | ModularOptions::TreeMode::kDefault; |
1521 | 0 | stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn; |
1522 | 0 | } |
1523 | 92 | if (cparams_.decoding_speed_tier >= 1) { |
1524 | 0 | stream_options_[stream_id].tree_kind = |
1525 | 0 | ModularOptions::TreeKind::kGradientFixedDC; |
1526 | 0 | } |
1527 | | |
1528 | 92 | stream_images_[stream_id] = Image(r.xsize(), r.ysize(), 8, 3); |
1529 | 92 | if (nl_dc && stream_options_[stream_id].tree_kind == |
1530 | 0 | ModularOptions::TreeKind::kGradientFixedDC) { |
1531 | 0 | JXL_ASSERT(enc_state->shared.frame_header.chroma_subsampling.Is444()); |
1532 | 0 | for (size_t c : {1, 0, 2}) { |
1533 | 0 | float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; |
1534 | 0 | float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; |
1535 | 0 | float cfl_factor = enc_state->shared.cmap.DCFactors()[c]; |
1536 | 0 | for (size_t y = 0; y < r.ysize(); y++) { |
1537 | 0 | int32_t* quant_row = |
1538 | 0 | stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); |
1539 | 0 | size_t stride = stream_images_[stream_id] |
1540 | 0 | .channel[c < 2 ? c ^ 1 : c] |
1541 | 0 | .plane.PixelsPerRow(); |
1542 | 0 | const float* row = r.ConstPlaneRow(dc, c, y); |
1543 | 0 | if (c == 1) { |
1544 | 0 | for (size_t x = 0; x < r.xsize(); x++) { |
1545 | 0 | quant_row[x] = QuantizeGradient(quant_row, stride, c, x, y, |
1546 | 0 | r.xsize(), row[x], inv_factor); |
1547 | 0 | } |
1548 | 0 | } else { |
1549 | 0 | int32_t* quant_row_y = |
1550 | 0 | stream_images_[stream_id].channel[0].plane.Row(y); |
1551 | 0 | for (size_t x = 0; x < r.xsize(); x++) { |
1552 | 0 | quant_row[x] = QuantizeGradient( |
1553 | 0 | quant_row, stride, c, x, y, r.xsize(), |
1554 | 0 | row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor); |
1555 | 0 | } |
1556 | 0 | } |
1557 | 0 | } |
1558 | 0 | } |
1559 | 92 | } else if (nl_dc) { |
1560 | 0 | JXL_ASSERT(enc_state->shared.frame_header.chroma_subsampling.Is444()); |
1561 | 0 | for (size_t c : {1, 0, 2}) { |
1562 | 0 | float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; |
1563 | 0 | float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; |
1564 | 0 | float cfl_factor = enc_state->shared.cmap.DCFactors()[c]; |
1565 | 0 | weighted::Header header; |
1566 | 0 | weighted::State wp_state(header, r.xsize(), r.ysize()); |
1567 | 0 | for (size_t y = 0; y < r.ysize(); y++) { |
1568 | 0 | int32_t* quant_row = |
1569 | 0 | stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); |
1570 | 0 | size_t stride = stream_images_[stream_id] |
1571 | 0 | .channel[c < 2 ? c ^ 1 : c] |
1572 | 0 | .plane.PixelsPerRow(); |
1573 | 0 | const float* row = r.ConstPlaneRow(dc, c, y); |
1574 | 0 | if (c == 1) { |
1575 | 0 | for (size_t x = 0; x < r.xsize(); x++) { |
1576 | 0 | quant_row[x] = QuantizeWP(quant_row, stride, c, x, y, r.xsize(), |
1577 | 0 | &wp_state, row[x], inv_factor); |
1578 | 0 | wp_state.UpdateErrors(quant_row[x], x, y, r.xsize()); |
1579 | 0 | } |
1580 | 0 | } else { |
1581 | 0 | int32_t* quant_row_y = |
1582 | 0 | stream_images_[stream_id].channel[0].plane.Row(y); |
1583 | 0 | for (size_t x = 0; x < r.xsize(); x++) { |
1584 | 0 | quant_row[x] = QuantizeWP( |
1585 | 0 | quant_row, stride, c, x, y, r.xsize(), &wp_state, |
1586 | 0 | row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor); |
1587 | 0 | wp_state.UpdateErrors(quant_row[x], x, y, r.xsize()); |
1588 | 0 | } |
1589 | 0 | } |
1590 | 0 | } |
1591 | 0 | } |
1592 | 92 | } else if (enc_state->shared.frame_header.chroma_subsampling.Is444()) { |
1593 | 276 | for (size_t c : {1, 0, 2}) { |
1594 | 276 | float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; |
1595 | 276 | float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; |
1596 | 276 | float cfl_factor = enc_state->shared.cmap.DCFactors()[c]; |
1597 | 552 | for (size_t y = 0; y < r.ysize(); y++) { |
1598 | 276 | int32_t* quant_row = |
1599 | 276 | stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); |
1600 | 276 | const float* row = r.ConstPlaneRow(dc, c, y); |
1601 | 276 | if (c == 1) { |
1602 | 184 | for (size_t x = 0; x < r.xsize(); x++) { |
1603 | 92 | quant_row[x] = roundf(row[x] * inv_factor); |
1604 | 92 | } |
1605 | 184 | } else { |
1606 | 184 | int32_t* quant_row_y = |
1607 | 184 | stream_images_[stream_id].channel[0].plane.Row(y); |
1608 | 368 | for (size_t x = 0; x < r.xsize(); x++) { |
1609 | 184 | quant_row[x] = |
1610 | 184 | roundf((row[x] - quant_row_y[x] * (y_factor * cfl_factor)) * |
1611 | 184 | inv_factor); |
1612 | 184 | } |
1613 | 184 | } |
1614 | 276 | } |
1615 | 276 | } |
1616 | 92 | } else { |
1617 | 0 | for (size_t c : {1, 0, 2}) { |
1618 | 0 | Rect rect( |
1619 | 0 | r.x0() >> enc_state->shared.frame_header.chroma_subsampling.HShift(c), |
1620 | 0 | r.y0() >> enc_state->shared.frame_header.chroma_subsampling.VShift(c), |
1621 | 0 | r.xsize() >> |
1622 | 0 | enc_state->shared.frame_header.chroma_subsampling.HShift(c), |
1623 | 0 | r.ysize() >> |
1624 | 0 | enc_state->shared.frame_header.chroma_subsampling.VShift(c)); |
1625 | 0 | float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; |
1626 | 0 | size_t ys = rect.ysize(); |
1627 | 0 | size_t xs = rect.xsize(); |
1628 | 0 | Channel& ch = stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c]; |
1629 | 0 | ch.w = xs; |
1630 | 0 | ch.h = ys; |
1631 | 0 | ch.shrink(); |
1632 | 0 | for (size_t y = 0; y < ys; y++) { |
1633 | 0 | int32_t* quant_row = ch.plane.Row(y); |
1634 | 0 | const float* row = rect.ConstPlaneRow(dc, c, y); |
1635 | 0 | for (size_t x = 0; x < xs; x++) { |
1636 | 0 | quant_row[x] = roundf(row[x] * inv_factor); |
1637 | 0 | } |
1638 | 0 | } |
1639 | 0 | } |
1640 | 0 | } |
1641 | | |
1642 | 92 | DequantDC(r, &enc_state->shared.dc_storage, &enc_state->shared.quant_dc, |
1643 | 92 | stream_images_[stream_id], enc_state->shared.quantizer.MulDC(), |
1644 | 92 | 1.0 / mul, enc_state->shared.cmap.DCFactors(), |
1645 | 92 | enc_state->shared.frame_header.chroma_subsampling, |
1646 | 92 | enc_state->shared.block_ctx_map); |
1647 | 92 | } |
1648 | | |
1649 | | void ModularFrameEncoder::AddACMetadata(size_t group_index, bool jpeg_transcode, |
1650 | 92 | PassesEncoderState* enc_state) { |
1651 | 92 | const Rect r = enc_state->shared.DCGroupRect(group_index); |
1652 | 92 | size_t stream_id = ModularStreamId::ACMetadata(group_index).ID(frame_dim_); |
1653 | 92 | stream_options_[stream_id].max_chan_size = 0xFFFFFF; |
1654 | 92 | stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kNoWP; |
1655 | 92 | if (jpeg_transcode) { |
1656 | 0 | stream_options_[stream_id].tree_kind = |
1657 | 0 | ModularOptions::TreeKind::kJpegTranscodeACMeta; |
1658 | 92 | } else if (cparams_.speed_tier >= SpeedTier::kFalcon) { |
1659 | 0 | stream_options_[stream_id].tree_kind = |
1660 | 0 | ModularOptions::TreeKind::kFalconACMeta; |
1661 | 92 | } else if (cparams_.speed_tier > SpeedTier::kKitten) { |
1662 | 92 | stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kACMeta; |
1663 | 92 | } |
1664 | | // If we are using a non-constant CfL field, and are in a slow enough mode, |
1665 | | // re-enable tree computation for it. |
1666 | 92 | if (cparams_.speed_tier < SpeedTier::kSquirrel && |
1667 | 92 | cparams_.force_cfl_jpeg_recompression) { |
1668 | 0 | stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn; |
1669 | 0 | } |
1670 | | // YToX, YToB, ACS + QF, EPF |
1671 | 92 | Image& image = stream_images_[stream_id]; |
1672 | 92 | image = Image(r.xsize(), r.ysize(), 8, 4); |
1673 | 92 | static_assert(kColorTileDimInBlocks == 8, "Color tile size changed"); |
1674 | 92 | Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3); |
1675 | 92 | image.channel[0] = Channel(cr.xsize(), cr.ysize(), 3, 3); |
1676 | 92 | image.channel[1] = Channel(cr.xsize(), cr.ysize(), 3, 3); |
1677 | 92 | image.channel[2] = Channel(r.xsize() * r.ysize(), 2, 0, 0); |
1678 | 92 | ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytox_map, |
1679 | 92 | Rect(image.channel[0].plane), &image.channel[0].plane); |
1680 | 92 | ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytob_map, |
1681 | 92 | Rect(image.channel[1].plane), &image.channel[1].plane); |
1682 | 92 | size_t num = 0; |
1683 | 184 | for (size_t y = 0; y < r.ysize(); y++) { |
1684 | 92 | AcStrategyRow row_acs = enc_state->shared.ac_strategy.ConstRow(r, y); |
1685 | 92 | const int32_t* row_qf = r.ConstRow(enc_state->shared.raw_quant_field, y); |
1686 | 92 | const uint8_t* row_epf = r.ConstRow(enc_state->shared.epf_sharpness, y); |
1687 | 92 | int32_t* out_acs = image.channel[2].plane.Row(0); |
1688 | 92 | int32_t* out_qf = image.channel[2].plane.Row(1); |
1689 | 92 | int32_t* row_out_epf = image.channel[3].plane.Row(y); |
1690 | 184 | for (size_t x = 0; x < r.xsize(); x++) { |
1691 | 92 | row_out_epf[x] = row_epf[x]; |
1692 | 92 | if (!row_acs[x].IsFirstBlock()) continue; |
1693 | 92 | out_acs[num] = row_acs[x].RawStrategy(); |
1694 | 92 | out_qf[num] = row_qf[x] - 1; |
1695 | 92 | num++; |
1696 | 92 | } |
1697 | 92 | } |
1698 | 92 | image.channel[2].w = num; |
1699 | 92 | ac_metadata_size[group_index] = num; |
1700 | 92 | } |
1701 | | |
1702 | | void ModularFrameEncoder::EncodeQuantTable( |
1703 | | size_t size_x, size_t size_y, BitWriter* writer, |
1704 | | const QuantEncoding& encoding, size_t idx, |
1705 | 0 | ModularFrameEncoder* modular_frame_encoder) { |
1706 | 0 | JXL_ASSERT(encoding.qraw.qtable != nullptr); |
1707 | 0 | JXL_ASSERT(size_x * size_y * 3 == encoding.qraw.qtable->size()); |
1708 | 0 | JXL_CHECK(F16Coder::Write(encoding.qraw.qtable_den, writer)); |
1709 | 0 | if (modular_frame_encoder) { |
1710 | 0 | JXL_CHECK(modular_frame_encoder->EncodeStream( |
1711 | 0 | writer, nullptr, 0, ModularStreamId::QuantTable(idx))); |
1712 | 0 | return; |
1713 | 0 | } |
1714 | 0 | Image image(size_x, size_y, 8, 3); |
1715 | 0 | for (size_t c = 0; c < 3; c++) { |
1716 | 0 | for (size_t y = 0; y < size_y; y++) { |
1717 | 0 | int32_t* JXL_RESTRICT row = image.channel[c].Row(y); |
1718 | 0 | for (size_t x = 0; x < size_x; x++) { |
1719 | 0 | row[x] = (*encoding.qraw.qtable)[c * size_x * size_y + y * size_x + x]; |
1720 | 0 | } |
1721 | 0 | } |
1722 | 0 | } |
1723 | 0 | ModularOptions cfopts; |
1724 | 0 | JXL_CHECK(ModularGenericCompress(image, cfopts, writer)); |
1725 | 0 | } |
1726 | | |
1727 | | void ModularFrameEncoder::AddQuantTable(size_t size_x, size_t size_y, |
1728 | | const QuantEncoding& encoding, |
1729 | 0 | size_t idx) { |
1730 | 0 | size_t stream_id = ModularStreamId::QuantTable(idx).ID(frame_dim_); |
1731 | 0 | JXL_ASSERT(encoding.qraw.qtable != nullptr); |
1732 | 0 | JXL_ASSERT(size_x * size_y * 3 == encoding.qraw.qtable->size()); |
1733 | 0 | Image& image = stream_images_[stream_id]; |
1734 | 0 | image = Image(size_x, size_y, 8, 3); |
1735 | 0 | for (size_t c = 0; c < 3; c++) { |
1736 | 0 | for (size_t y = 0; y < size_y; y++) { |
1737 | 0 | int32_t* JXL_RESTRICT row = image.channel[c].Row(y); |
1738 | 0 | for (size_t x = 0; x < size_x; x++) { |
1739 | 0 | row[x] = (*encoding.qraw.qtable)[c * size_x * size_y + y * size_x + x]; |
1740 | 0 | } |
1741 | 0 | } |
1742 | 0 | } |
1743 | 0 | } |
1744 | | } // namespace jxl |