/src/libjxl/lib/jxl/enc_modular.cc
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/enc_modular.h" |
7 | | |
8 | | #include <jxl/cms_interface.h> |
9 | | #include <jxl/memory_manager.h> |
10 | | #include <jxl/types.h> |
11 | | |
12 | | #include <algorithm> |
13 | | #include <array> |
14 | | #include <cmath> |
15 | | #include <cstddef> |
16 | | #include <cstdint> |
17 | | #include <cstdlib> |
18 | | #include <cstring> |
19 | | #include <limits> |
20 | | #include <memory> |
21 | | #include <tuple> |
22 | | #include <utility> |
23 | | #include <vector> |
24 | | |
25 | | #include "lib/jxl/ac_strategy.h" |
26 | | #include "lib/jxl/base/bits.h" |
27 | | #include "lib/jxl/base/common.h" |
28 | | #include "lib/jxl/base/compiler_specific.h" |
29 | | #include "lib/jxl/base/data_parallel.h" |
30 | | #include "lib/jxl/base/printf_macros.h" |
31 | | #include "lib/jxl/base/rect.h" |
32 | | #include "lib/jxl/base/status.h" |
33 | | #include "lib/jxl/chroma_from_luma.h" |
34 | | #include "lib/jxl/common.h" |
35 | | #include "lib/jxl/compressed_dc.h" |
36 | | #include "lib/jxl/dec_ans.h" |
37 | | #include "lib/jxl/dec_modular.h" |
38 | | #include "lib/jxl/enc_ans.h" |
39 | | #include "lib/jxl/enc_ans_params.h" |
40 | | #include "lib/jxl/enc_aux_out.h" |
41 | | #include "lib/jxl/enc_bit_writer.h" |
42 | | #include "lib/jxl/enc_cache.h" |
43 | | #include "lib/jxl/enc_fields.h" |
44 | | #include "lib/jxl/enc_gaborish.h" |
45 | | #include "lib/jxl/enc_modular_simd.h" |
46 | | #include "lib/jxl/enc_params.h" |
47 | | #include "lib/jxl/enc_patch_dictionary.h" |
48 | | #include "lib/jxl/enc_quant_weights.h" |
49 | | #include "lib/jxl/fields.h" |
50 | | #include "lib/jxl/frame_dimensions.h" |
51 | | #include "lib/jxl/frame_header.h" |
52 | | #include "lib/jxl/image.h" |
53 | | #include "lib/jxl/image_metadata.h" |
54 | | #include "lib/jxl/image_ops.h" |
55 | | #include "lib/jxl/memory_manager_internal.h" |
56 | | #include "lib/jxl/modular/encoding/context_predict.h" |
57 | | #include "lib/jxl/modular/encoding/dec_ma.h" |
58 | | #include "lib/jxl/modular/encoding/enc_encoding.h" |
59 | | #include "lib/jxl/modular/encoding/enc_ma.h" |
60 | | #include "lib/jxl/modular/encoding/encoding.h" |
61 | | #include "lib/jxl/modular/encoding/ma_common.h" |
62 | | #include "lib/jxl/modular/modular_image.h" |
63 | | #include "lib/jxl/modular/options.h" |
64 | | #include "lib/jxl/modular/transform/enc_rct.h" |
65 | | #include "lib/jxl/modular/transform/enc_transform.h" |
66 | | #include "lib/jxl/modular/transform/squeeze.h" |
67 | | #include "lib/jxl/modular/transform/squeeze_params.h" |
68 | | #include "lib/jxl/modular/transform/transform.h" |
69 | | #include "lib/jxl/pack_signed.h" |
70 | | #include "lib/jxl/passes_state.h" |
71 | | #include "lib/jxl/quant_weights.h" |
72 | | #include "modular/options.h" |
73 | | |
74 | | namespace jxl { |
75 | | |
76 | | namespace { |
77 | | // constexpr bool kPrintTree = false; |
78 | | |
79 | | // Squeeze default quantization factors |
80 | | // these quantization factors are for -Q 50 (other qualities simply scale the |
81 | | // factors; things are rounded down and obviously cannot get below 1) |
82 | | const float squeeze_quality_factor = |
83 | | 0.35; // for easy tweaking of the quality range (decrease this number for |
84 | | // higher quality) |
85 | | const float squeeze_luma_factor = |
86 | | 1.1; // for easy tweaking of the balance between luma (or anything |
87 | | // non-chroma) and chroma (decrease this number for higher quality |
88 | | // luma) |
89 | | const float squeeze_quality_factor_xyb = 4.0f; |
90 | | const float squeeze_quality_factor_y = 1.5f; |
91 | | |
92 | | const float squeeze_xyb_qtable[3][16] = { |
93 | | {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28, 0.64, 0.32, 0.16, |
94 | | 0.08, 0.04, 0.02, 0.01, 0.005}, // Y |
95 | | {1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, |
96 | | 0.5}, // X |
97 | | {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, |
98 | | 0.5}, // B-Y |
99 | | }; |
100 | | |
101 | | const float squeeze_luma_qtable[16] = {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, |
102 | | 2.56, 1.28, 0.64, 0.32, 0.16, 0.08, |
103 | | 0.04, 0.02, 0.01, 0.005}; |
104 | | // for 8-bit input, the range of YCoCg chroma is -255..255 so basically this |
105 | | // does 4:2:0 subsampling (two most fine grained layers get quantized away) |
106 | | const float squeeze_chroma_qtable[16] = { |
107 | | 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 0.5}; |
108 | | |
109 | | // Merges the trees in `trees` using nodes that decide on stream_id, as defined |
110 | | // by `tree_splits`. |
111 | | Status MergeTrees(const std::vector<Tree>& trees, |
112 | | const std::vector<size_t>& tree_splits, size_t begin, |
113 | 2.33k | size_t end, Tree* tree) { |
114 | 2.33k | JXL_ENSURE(trees.size() + 1 == tree_splits.size()); |
115 | 2.33k | JXL_ENSURE(end > begin); |
116 | 2.33k | JXL_ENSURE(end <= trees.size()); |
117 | 2.33k | if (end == begin + 1) { |
118 | | // Insert the tree, adding the opportune offset to all child nodes. |
119 | | // This will make the leaf IDs wrong, but subsequent roundtripping will fix |
120 | | // them. |
121 | 1.70k | size_t sz = tree->size(); |
122 | 1.70k | tree->insert(tree->end(), trees[begin].begin(), trees[begin].end()); |
123 | 57.0k | for (size_t i = sz; i < tree->size(); i++) { |
124 | 55.3k | (*tree)[i].lchild += sz; |
125 | 55.3k | (*tree)[i].rchild += sz; |
126 | 55.3k | } |
127 | 1.70k | return true; |
128 | 1.70k | } |
129 | 633 | size_t mid = (begin + end) / 2; |
130 | 633 | size_t splitval = tree_splits[mid] - 1; |
131 | 633 | size_t cur = tree->size(); |
132 | 633 | tree->emplace_back(1 /*stream_id*/, static_cast<int>(splitval), 0, 0, |
133 | 633 | Predictor::Zero, 0, 1); |
134 | 633 | (*tree)[cur].lchild = tree->size(); |
135 | 633 | JXL_RETURN_IF_ERROR(MergeTrees(trees, tree_splits, mid, end, tree)); |
136 | 633 | (*tree)[cur].rchild = tree->size(); |
137 | 633 | JXL_RETURN_IF_ERROR(MergeTrees(trees, tree_splits, begin, mid, tree)); |
138 | 633 | return true; |
139 | 633 | } |
140 | | |
141 | 6.52k | void QuantizeChannel(Channel& ch, const int q) { |
142 | 6.52k | if (q == 1) return; |
143 | 233k | for (size_t y = 0; y < ch.plane.ysize(); y++) { |
144 | 230k | pixel_type* row = ch.plane.Row(y); |
145 | 5.34M | for (size_t x = 0; x < ch.plane.xsize(); x++) { |
146 | 5.11M | if (row[x] < 0) { |
147 | 1.15M | row[x] = -((-row[x] + q / 2) / q) * q; |
148 | 3.96M | } else { |
149 | 3.96M | row[x] = ((row[x] + q / 2) / q) * q; |
150 | 3.96M | } |
151 | 5.11M | } |
152 | 230k | } |
153 | 2.76k | } |
154 | | |
155 | | // convert binary32 float that corresponds to custom [bits]-bit float (with |
156 | | // [exp_bits] exponent bits) to a [bits]-bit integer representation that should |
157 | | // fit in pixel_type |
158 | | Status float_to_int(const float* const row_in, pixel_type* const row_out, |
159 | | size_t xsize, unsigned int bits, unsigned int exp_bits, |
160 | 2.43M | bool fp, double dfactor) { |
161 | 2.43M | JXL_ENSURE(sizeof(pixel_type) * 8 >= bits); |
162 | 2.50M | if (!fp) { |
163 | 2.50M | if (bits > 22) { |
164 | 0 | for (size_t x = 0; x < xsize; ++x) { |
165 | 0 | row_out[x] = row_in[x] * dfactor + (row_in[x] < 0 ? -0.5 : 0.5); |
166 | 0 | } |
167 | 2.50M | } else { |
168 | 2.50M | float factor = dfactor; |
169 | 111M | for (size_t x = 0; x < xsize; ++x) { |
170 | 108M | row_out[x] = row_in[x] * factor + (row_in[x] < 0 ? -0.5f : 0.5f); |
171 | 108M | } |
172 | 2.50M | } |
173 | 2.50M | return true; |
174 | 2.50M | } |
175 | 18.4E | if (bits == 32 && fp) { |
176 | 0 | JXL_ENSURE(exp_bits == 8); |
177 | 0 | memcpy(static_cast<void*>(row_out), static_cast<const void*>(row_in), |
178 | 0 | 4 * xsize); |
179 | 0 | return true; |
180 | 0 | } |
181 | | |
182 | 18.4E | JXL_ENSURE(bits > 0); |
183 | 18.4E | int exp_bias = (1 << (exp_bits - 1)) - 1; |
184 | 18.4E | int max_exp = (1 << exp_bits) - 1; |
185 | 18.4E | uint32_t sign = (1u << (bits - 1)); |
186 | 18.4E | int mant_bits = bits - exp_bits - 1; |
187 | 18.4E | int mant_shift = 23 - mant_bits; |
188 | 18.4E | for (size_t x = 0; x < xsize; ++x) { |
189 | 0 | uint32_t f; |
190 | 0 | memcpy(&f, &row_in[x], 4); |
191 | 0 | int signbit = (f >> 31); |
192 | 0 | f &= 0x7fffffff; |
193 | 0 | if (f == 0) { |
194 | 0 | row_out[x] = (signbit ? sign : 0); |
195 | 0 | continue; |
196 | 0 | } |
197 | 0 | int exp = (f >> 23) - 127; |
198 | 0 | int mantissa = (f & 0x007fffff); |
199 | | // broke up the binary32 into its parts, now reassemble into |
200 | | // arbitrary float |
201 | 0 | if (exp == 128) { |
202 | | // NaN or infinity |
203 | 0 | f = (signbit ? sign : 0); |
204 | 0 | f |= ((1 << exp_bits) - 1) << mant_bits; |
205 | 0 | f |= mantissa >> mant_shift; |
206 | 0 | row_out[x] = static_cast<pixel_type>(f); |
207 | 0 | continue; |
208 | 0 | } |
209 | 0 | exp += exp_bias; |
210 | 0 | if (exp <= 0) { // will become a subnormal number |
211 | | // add implicit leading 1 to mantissa |
212 | 0 | mantissa |= 0x00800000; |
213 | 0 | if (exp < -mant_bits) { |
214 | 0 | return JXL_FAILURE( |
215 | 0 | "Invalid float number: %g cannot be represented with %i " |
216 | 0 | "exp_bits and %i mant_bits (exp %i)", |
217 | 0 | row_in[x], exp_bits, mant_bits, exp); |
218 | 0 | } |
219 | 0 | mantissa >>= 1 - exp; |
220 | 0 | exp = 0; |
221 | 0 | } |
222 | | // exp should be representable in exp_bits, otherwise input was |
223 | | // invalid; max_exp is NaN or infinity |
224 | 0 | if (exp >= max_exp) return JXL_FAILURE("Invalid float exponent"); |
225 | 0 | if (mantissa & ((1 << mant_shift) - 1)) { |
226 | 0 | return JXL_FAILURE("%g is losing precision (mant: %x)", row_in[x], |
227 | 0 | mantissa); |
228 | 0 | } |
229 | 0 | mantissa >>= mant_shift; |
230 | 0 | f = (signbit ? sign : 0); |
231 | 0 | f |= (exp << mant_bits); |
232 | 0 | f |= mantissa; |
233 | 0 | row_out[x] = static_cast<pixel_type>(f); |
234 | 0 | } |
235 | 18.4E | return true; |
236 | 18.4E | } |
237 | | |
238 | 352 | float EstimateWPCost(const Image& img, size_t i) { |
239 | 352 | size_t extra_bits = 0; |
240 | 352 | float histo_cost = 0; |
241 | 352 | HybridUintConfig config; |
242 | 352 | int32_t cutoffs[] = {-500, -392, -255, -191, -127, -95, -63, -47, -31, |
243 | 352 | -23, -15, -11, -7, -4, -3, -1, 0, 1, |
244 | 352 | 3, 5, 7, 11, 15, 23, 31, 47, 63, |
245 | 352 | 95, 127, 191, 255, 392, 500}; |
246 | 352 | constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1; |
247 | 352 | Histogram histo[nc] = {}; |
248 | 352 | weighted::Header wp_header; |
249 | 352 | PredictorMode(i, &wp_header); |
250 | 672 | for (const Channel& ch : img.channel) { |
251 | 672 | const ptrdiff_t onerow = ch.plane.PixelsPerRow(); |
252 | 672 | weighted::State wp_state(wp_header, ch.w, ch.h); |
253 | 672 | Properties properties(1); |
254 | 20.4k | for (size_t y = 0; y < ch.h; y++) { |
255 | 19.7k | const pixel_type* JXL_RESTRICT r = ch.Row(y); |
256 | 117k | for (size_t x = 0; x < ch.w; x++) { |
257 | 97.9k | size_t offset = 0; |
258 | 18.4E | pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); |
259 | 97.9k | pixel_type_w top = (y ? *(r + x - onerow) : left); |
260 | 97.9k | pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); |
261 | 97.9k | pixel_type_w topright = |
262 | 97.9k | (x + 1 < ch.w && y ? *(r + x + 1 - onerow) : top); |
263 | 97.9k | pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top); |
264 | 97.9k | pixel_type guess = wp_state.Predict</*compute_properties=*/true>( |
265 | 97.9k | x, y, ch.w, top, left, topright, topleft, toptop, &properties, |
266 | 97.9k | offset); |
267 | 97.9k | size_t ctx = 0; |
268 | 2.92M | for (int c : cutoffs) { |
269 | 2.92M | ctx += (c >= properties[0]) ? 1 : 0; |
270 | 2.92M | } |
271 | 97.9k | pixel_type res = r[x] - guess; |
272 | 97.9k | uint32_t token; |
273 | 97.9k | uint32_t nbits; |
274 | 97.9k | uint32_t bits; |
275 | 97.9k | config.Encode(PackSigned(res), &token, &nbits, &bits); |
276 | 97.9k | histo[ctx].Add(token); |
277 | 97.9k | extra_bits += nbits; |
278 | 97.9k | wp_state.UpdateErrors(r[x], x, y, ch.w); |
279 | 97.9k | } |
280 | 19.7k | } |
281 | 22.6k | for (auto& h : histo) { |
282 | 22.6k | histo_cost += h.ShannonEntropy(); |
283 | 22.6k | h.Clear(); |
284 | 22.6k | } |
285 | 672 | } |
286 | 352 | return histo_cost + extra_bits; |
287 | 352 | } |
288 | | |
289 | | bool do_transform(Image& image, const Transform& tr, |
290 | | const weighted::Header& wp_header, |
291 | 19.3k | jxl::ThreadPool* pool = nullptr, bool force_jxlart = false) { |
292 | 19.3k | Transform t = tr; |
293 | 19.3k | bool did_it = true; |
294 | 19.3k | if (force_jxlart) { |
295 | 0 | if (!t.MetaApply(image)) return false; |
296 | 19.3k | } else { |
297 | 19.3k | did_it = TransformForward(t, image, wp_header, pool); |
298 | 19.3k | } |
299 | 19.3k | if (did_it) image.transform.push_back(t); |
300 | 19.3k | return did_it; |
301 | 19.3k | } |
302 | | |
303 | | StatusOr<bool> maybe_do_transform(Image& image, const Transform& tr, |
304 | | const CompressParams& cparams, |
305 | | const weighted::Header& wp_header, |
306 | | float cost_before, |
307 | | jxl::ThreadPool* pool = nullptr, |
308 | 18.3k | bool force_jxlart = false) { |
309 | 18.3k | if (force_jxlart || cparams.speed_tier >= SpeedTier::kSquirrel) { |
310 | 14.4k | return do_transform(image, tr, wp_header, pool, force_jxlart); |
311 | 14.4k | } |
312 | 3.90k | bool did_it = do_transform(image, tr, wp_header, pool); |
313 | 3.90k | if (did_it) { |
314 | 812 | JXL_ASSIGN_OR_RETURN(float cost_after, EstimateCost(image)); |
315 | 812 | JXL_DEBUG_V(7, "Cost before: %f cost after: %f", cost_before, cost_after); |
316 | 812 | if (cost_after > cost_before) { |
317 | 333 | Transform t = image.transform.back(); |
318 | 333 | if (!t.Inverse(image, wp_header, pool)) { |
319 | 0 | return false; |
320 | 0 | } |
321 | 333 | image.transform.pop_back(); |
322 | 333 | did_it = false; |
323 | 333 | } |
324 | 812 | } |
325 | 3.90k | return did_it; |
326 | 3.90k | } |
327 | | |
328 | | Status try_palettes(Image& gi, int& max_bitdepth, int& maxval, |
329 | | const CompressParams& cparams_, |
330 | | float channel_colors_percent, |
331 | 11.1k | jxl::ThreadPool* pool = nullptr) { |
332 | 11.1k | float cost_before = 0.f; |
333 | 11.1k | size_t did_palette = 0; |
334 | 11.1k | float nb_pixels = gi.channel[0].w * gi.channel[0].h; |
335 | 11.1k | int nb_chans = gi.channel.size() - gi.nb_meta_channels; |
336 | | // arbitrary estimate: 4.8 bpp for 8-bit RGB |
337 | 11.1k | float arbitrary_bpp_estimate = 0.2f * gi.bitdepth * nb_chans; |
338 | | |
339 | 11.1k | if (cparams_.palette_colors != 0 || cparams_.lossy_palette) { |
340 | | // when not estimating, assume some arbitrary bpp |
341 | 10.2k | if (cparams_.speed_tier <= SpeedTier::kSquirrel) { |
342 | 2.86k | JXL_ASSIGN_OR_RETURN(cost_before, EstimateCost(gi)); |
343 | 7.42k | } else { |
344 | 7.42k | cost_before = nb_pixels * arbitrary_bpp_estimate; |
345 | 7.42k | } |
346 | | // all-channel palette (e.g. RGBA) |
347 | 10.2k | if (nb_chans > 1) { |
348 | 5.50k | Transform maybe_palette(TransformId::kPalette); |
349 | 5.50k | maybe_palette.begin_c = gi.nb_meta_channels; |
350 | 5.50k | maybe_palette.num_c = nb_chans; |
351 | | // Heuristic choice of max colors for a palette: |
352 | | // max_colors = nb_pixels * estimated_bpp_without_palette * 0.0005 + |
353 | | // + nb_pixels / 128 + 128 |
354 | | // (estimated_bpp_without_palette = cost_before / nb_pixels) |
355 | | // Rationale: small image with large palette is not effective; |
356 | | // also if the entropy (estimated bpp) is low (e.g. mostly solid/gradient |
357 | | // areas), palette is less useful and may even be counterproductive. |
358 | 5.50k | maybe_palette.nb_colors = std::min( |
359 | 5.50k | static_cast<int>(cost_before * 0.0005f + nb_pixels / 128 + 128), |
360 | 5.50k | std::abs(cparams_.palette_colors)); |
361 | 5.50k | maybe_palette.ordered_palette = cparams_.palette_colors >= 0; |
362 | 5.50k | maybe_palette.lossy_palette = |
363 | 5.50k | (cparams_.lossy_palette && maybe_palette.num_c == 3); |
364 | 5.50k | if (maybe_palette.lossy_palette) { |
365 | 58 | maybe_palette.predictor = Predictor::Average4; |
366 | 58 | } |
367 | | // TODO(veluca): use a custom weighted header if using the weighted |
368 | | // predictor. |
369 | 5.50k | JXL_ASSIGN_OR_RETURN( |
370 | 5.50k | did_palette, |
371 | 5.50k | maybe_do_transform(gi, maybe_palette, cparams_, weighted::Header(), |
372 | 5.50k | cost_before, pool, cparams_.options.zero_tokens)); |
373 | 5.50k | } |
374 | | // all-minus-one-channel palette (RGB with separate alpha, or CMY with |
375 | | // separate K) |
376 | 10.2k | if (!did_palette && nb_chans > 3) { |
377 | 1.56k | Transform maybe_palette_3(TransformId::kPalette); |
378 | 1.56k | maybe_palette_3.begin_c = gi.nb_meta_channels; |
379 | 1.56k | maybe_palette_3.num_c = nb_chans - 1; |
380 | 1.56k | maybe_palette_3.nb_colors = std::min( |
381 | 1.56k | static_cast<int>(cost_before * 0.0005f + nb_pixels / 128 + 128), |
382 | 1.56k | std::abs(cparams_.palette_colors)); |
383 | 1.56k | maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0; |
384 | 1.56k | maybe_palette_3.lossy_palette = cparams_.lossy_palette; |
385 | 1.56k | if (maybe_palette_3.lossy_palette) { |
386 | 4 | maybe_palette_3.predictor = Predictor::Average4; |
387 | 4 | } |
388 | 1.56k | JXL_ASSIGN_OR_RETURN( |
389 | 1.56k | did_palette, |
390 | 1.56k | maybe_do_transform(gi, maybe_palette_3, cparams_, weighted::Header(), |
391 | 1.56k | cost_before, pool, cparams_.options.zero_tokens)); |
392 | 1.56k | } |
393 | 10.2k | } |
394 | | |
395 | 11.1k | if (channel_colors_percent > 0) { |
396 | | // single channel palette (like FLIF's ChannelCompact) |
397 | 8.27k | size_t nb_channels = gi.channel.size() - gi.nb_meta_channels - did_palette; |
398 | 8.27k | int orig_bitdepth = max_bitdepth; |
399 | 8.27k | max_bitdepth = 0; |
400 | 8.27k | if (nb_channels > 0 && (did_palette || cost_before == 0)) { |
401 | 104 | if (cparams_.speed_tier < SpeedTier::kSquirrel) { |
402 | 15 | JXL_ASSIGN_OR_RETURN(cost_before, EstimateCost(gi)); |
403 | 89 | } else { |
404 | 89 | cost_before = 0; |
405 | 89 | } |
406 | 104 | } |
407 | 19.6k | for (size_t i = did_palette; i < nb_channels + did_palette; i++) { |
408 | 11.3k | int32_t min; |
409 | 11.3k | int32_t max; |
410 | 11.3k | compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max); |
411 | 11.3k | int64_t colors = static_cast<int64_t>(max) - min + 1; |
412 | 11.3k | JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max); |
413 | 11.3k | Transform maybe_palette_1(TransformId::kPalette); |
414 | 11.3k | maybe_palette_1.begin_c = i + gi.nb_meta_channels; |
415 | 11.3k | maybe_palette_1.num_c = 1; |
416 | | // simple heuristic: if less than X percent of the values in the range |
417 | | // actually occur, it is probably worth it to do a compaction |
418 | | // (but only if the channel palette is less than 6% the size of the |
419 | | // image itself) |
420 | 11.3k | maybe_palette_1.nb_colors = |
421 | 11.3k | std::min(static_cast<int>(nb_pixels / 16), |
422 | 11.3k | static_cast<int>(channel_colors_percent / 100. * colors)); |
423 | 11.3k | JXL_ASSIGN_OR_RETURN( |
424 | 11.3k | bool did_ch_palette, |
425 | 11.3k | maybe_do_transform(gi, maybe_palette_1, cparams_, weighted::Header(), |
426 | 11.3k | cost_before, pool)); |
427 | 11.3k | if (did_ch_palette) { |
428 | | // effective bit depth is lower, adjust quantization accordingly |
429 | 2.07k | compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max); |
430 | 2.07k | if (max < maxval) maxval = max; |
431 | 2.07k | int ch_bitdepth = |
432 | 2.07k | (max > 0 ? CeilLog2Nonzero(static_cast<uint32_t>(max)) : 0); |
433 | 2.07k | if (ch_bitdepth > max_bitdepth) max_bitdepth = ch_bitdepth; |
434 | 9.29k | } else { |
435 | 9.29k | max_bitdepth = orig_bitdepth; |
436 | 9.29k | } |
437 | 11.3k | } |
438 | 8.27k | } |
439 | 11.1k | return true; |
440 | 11.1k | } |
441 | | |
442 | | } // namespace |
443 | | |
444 | | StatusOr<std::unique_ptr<ModularFrameEncoder>> ModularFrameEncoder::Create( |
445 | | JxlMemoryManager* memory_manager, const FrameHeader& frame_header, |
446 | 2.54k | const CompressParams& cparams_orig, bool streaming_mode) { |
447 | 2.54k | auto self = std::unique_ptr<ModularFrameEncoder>( |
448 | 2.54k | new ModularFrameEncoder(memory_manager)); |
449 | 2.54k | JXL_RETURN_IF_ERROR(self->Init(frame_header, cparams_orig, streaming_mode)); |
450 | 2.54k | return self; |
451 | 2.54k | } |
452 | | |
453 | | ModularFrameEncoder::ModularFrameEncoder(JxlMemoryManager* memory_manager) |
454 | 2.54k | : memory_manager_(memory_manager) {} |
455 | | |
456 | | Status ModularFrameEncoder::Init(const FrameHeader& frame_header, |
457 | | const CompressParams& cparams_orig, |
458 | 2.54k | bool streaming_mode) { |
459 | 2.54k | frame_dim_ = frame_header.ToFrameDimensions(); |
460 | 2.54k | cparams_ = cparams_orig; |
461 | | |
462 | 2.54k | size_t num_streams = |
463 | 2.54k | ModularStreamId::Num(frame_dim_, frame_header.passes.num_passes); |
464 | | |
465 | | // Progressive lossless only benefits from levels 2 and higher |
466 | | // Lower levels of faster decoding can outperform higher tiers |
467 | | // depending on the PC |
468 | 2.54k | if (cparams_.responsive == 1 && cparams_.IsLossless() && |
469 | 70 | cparams_.decoding_speed_tier == 1) { |
470 | 16 | cparams_.decoding_speed_tier = 2; |
471 | 16 | } |
472 | 2.54k | if (cparams_.responsive == 1 && cparams_.IsLossless()) { |
473 | | // RCT selection seems bugged with Squeeze, YCoCg works well. |
474 | 70 | if (cparams_.colorspace < 0) { |
475 | 12 | cparams_.colorspace = 6; |
476 | 12 | } |
477 | 70 | } |
478 | | |
479 | 2.54k | if (cparams_.ModularPartIsLossless()) { |
480 | 2.10k | const auto disable_wp = [this] () { |
481 | 463 | cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kNoWP; |
482 | 463 | if (cparams_.options.predictor == Predictor::Weighted) { |
483 | | // Predictor::Best turns to Predictor::Gradient anyways. |
484 | 6 | cparams_.options.predictor = Predictor::Gradient; |
485 | 6 | } |
486 | 463 | }; |
487 | 2.10k | switch (cparams_.decoding_speed_tier) { |
488 | 1.24k | case 0: |
489 | 1.24k | cparams_.options.fast_decode_multiplier = 1.001f; |
490 | 1.24k | break; |
491 | 234 | case 1: // No Weighted predictor |
492 | 234 | cparams_.options.fast_decode_multiplier = 1.005f; |
493 | 234 | disable_wp(); |
494 | 234 | break; |
495 | 229 | case 2: { // No Weighted predictor and Group size 0 defined in |
496 | | // enc_frame.cc |
497 | 229 | cparams_.options.fast_decode_multiplier = 1.015f; |
498 | 229 | disable_wp(); |
499 | 229 | break; |
500 | 0 | } |
501 | 229 | case 3: { // Gradient only, Group size 0, and Fast MA tree |
502 | 229 | cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly; |
503 | 229 | cparams_.options.predictor = Predictor::Gradient; |
504 | 229 | break; |
505 | 0 | } |
506 | 174 | default: { // Gradient only, Group size 0, and No MA tree |
507 | 174 | cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly; |
508 | 174 | cparams_.options.predictor = Predictor::Gradient; |
509 | 174 | cparams_.options.nb_repeats = 0; |
510 | | // Disabling MA Trees sometimes doesn't increase decode speed |
511 | | // depending on PC |
512 | 174 | break; |
513 | 0 | } |
514 | 2.10k | } |
515 | 2.10k | } |
516 | | |
517 | 165k | for (size_t i = 0; i < num_streams; ++i) { |
518 | 162k | stream_images_.emplace_back(memory_manager_); |
519 | 162k | } |
520 | | |
521 | | // use a sensible default if nothing explicit is specified: |
522 | | // Squeeze for lossy, no squeeze for lossless |
523 | 2.54k | if (cparams_.responsive < 0) { |
524 | 1.33k | if (cparams_.ModularPartIsLossless()) { |
525 | 1.24k | cparams_.responsive = 0; |
526 | 1.24k | } else { |
527 | 92 | cparams_.responsive = 1; |
528 | 92 | } |
529 | 1.33k | } |
530 | | |
531 | 2.54k | cparams_.options.splitting_heuristics_node_threshold = |
532 | 2.54k | 75 + 14 * static_cast<int>(cparams_.speed_tier) + |
533 | 2.54k | 10 * cparams_.decoding_speed_tier; |
534 | | |
535 | 2.54k | { |
536 | | // Set properties. |
537 | 2.54k | std::vector<uint32_t> prop_order; |
538 | 2.54k | if (cparams_.responsive) { |
539 | | // Properties in order of their likelihood of being useful for Squeeze |
540 | | // residuals. |
541 | 634 | prop_order = {0, 1, 4, 5, 6, 7, 8, 15, 9, 10, 11, 12, 13, 14, 2, 3}; |
542 | 1.90k | } else { |
543 | | // Same, but for the non-Squeeze case. |
544 | 1.90k | prop_order = {0, 1, 15, 9, 10, 11, 12, 13, 14, 2, 3, 4, 5, 6, 7, 8}; |
545 | | // if few groups, don't use group as a property |
546 | 1.90k | if (num_streams < 30 && cparams_.speed_tier > SpeedTier::kTortoise && |
547 | 988 | cparams_orig.ModularPartIsLossless()) { |
548 | 804 | prop_order.erase(prop_order.begin() + 1); |
549 | 804 | } |
550 | 1.90k | } |
551 | 2.54k | int max_properties = std::min<int>( |
552 | 2.54k | cparams_.options.max_properties, |
553 | 2.54k | static_cast<int>( |
554 | 2.54k | frame_header.nonserialized_metadata->m.num_extra_channels) + |
555 | 2.54k | (frame_header.encoding == FrameEncoding::kModular ? 2 : -1)); |
556 | 2.54k | switch (cparams_.speed_tier) { |
557 | 64 | case SpeedTier::kHare: |
558 | 64 | cparams_.options.splitting_heuristics_properties.assign( |
559 | 64 | prop_order.begin(), prop_order.begin() + 4); |
560 | 64 | cparams_.options.max_property_values = 48; |
561 | 64 | cparams_.options.nb_repeats *= 0.5f; |
562 | 64 | break; |
563 | 186 | case SpeedTier::kWombat: |
564 | 186 | cparams_.options.splitting_heuristics_properties.assign( |
565 | 186 | prop_order.begin(), prop_order.begin() + 5); |
566 | 186 | cparams_.options.max_property_values = 64; |
567 | 186 | cparams_.options.nb_repeats *= 0.7f; |
568 | 186 | break; |
569 | 486 | case SpeedTier::kSquirrel: |
570 | 486 | cparams_.options.splitting_heuristics_properties.assign( |
571 | 486 | prop_order.begin(), prop_order.begin() + 7); |
572 | 486 | cparams_.options.max_property_values = 96; |
573 | 486 | break; |
574 | 724 | case SpeedTier::kKitten: |
575 | 724 | cparams_.options.splitting_heuristics_properties.assign( |
576 | 724 | prop_order.begin(), prop_order.begin() + 10); |
577 | 724 | cparams_.options.max_property_values = 128; |
578 | 724 | cparams_.options.nb_repeats *= 1.1f; |
579 | 724 | break; |
580 | 0 | case SpeedTier::kGlacier: |
581 | 430 | case SpeedTier::kTortoise: |
582 | 430 | cparams_.options.splitting_heuristics_properties = prop_order; |
583 | 430 | cparams_.options.max_property_values = 256; |
584 | 430 | cparams_.options.nb_repeats *= 1.3f; |
585 | 430 | break; |
586 | 653 | default: |
587 | 653 | cparams_.options.splitting_heuristics_properties.assign( |
588 | 653 | prop_order.begin(), prop_order.begin() + 3); |
589 | 653 | cparams_.options.max_property_values = 32; |
590 | 653 | cparams_.options.nb_repeats *= 0.3f; |
591 | 653 | break; |
592 | 2.54k | } |
593 | 2.54k | if (cparams_.speed_tier > SpeedTier::kTortoise) { |
594 | | // Gradient in previous channels. |
595 | 3.13k | for (int i = 0; i < max_properties; i++) { |
596 | 1.01k | cparams_.options.splitting_heuristics_properties.push_back( |
597 | 1.01k | kNumNonrefProperties + i * 4 + 3); |
598 | 1.01k | } |
599 | 2.11k | } else { |
600 | | // All the extra properties in Tortoise mode. |
601 | 1.42k | for (int i = 0; i < max_properties * 4; i++) { |
602 | 992 | cparams_.options.splitting_heuristics_properties.push_back( |
603 | 992 | kNumNonrefProperties + i); |
604 | 992 | } |
605 | 430 | } |
606 | 2.54k | } |
607 | 0 | cparams_.options.nb_repeats = std::min(1.0f, cparams_.options.nb_repeats); |
608 | | |
609 | 2.54k | if ((cparams_.options.predictor == Predictor::Average0 || |
610 | 2.53k | cparams_.options.predictor == Predictor::Average1 || |
611 | 2.49k | cparams_.options.predictor == Predictor::Average2 || |
612 | 2.40k | cparams_.options.predictor == Predictor::Average3 || |
613 | 2.26k | cparams_.options.predictor == Predictor::Average4 || |
614 | 2.24k | cparams_.options.predictor == Predictor::Weighted) && |
615 | 338 | !cparams_.ModularPartIsLossless()) { |
616 | | // Lossy + Average/Weighted predictors does not work, so switch to default |
617 | | // predictors. |
618 | 26 | cparams_.options.predictor = kUndefinedPredictor; |
619 | 26 | } |
620 | | |
621 | 2.54k | if (cparams_.options.predictor == kUndefinedPredictor) { |
622 | | // no explicit predictor(s) given, set a good default |
623 | 918 | if ((cparams_.speed_tier <= SpeedTier::kGlacier || |
624 | 918 | cparams_.modular_mode == false) && |
625 | 646 | cparams_.IsLossless() && cparams_.responsive == JXL_FALSE) { |
626 | | // TODO(veluca): allow all predictors that don't break residual |
627 | | // multipliers in lossy mode. |
628 | 0 | cparams_.options.predictor = Predictor::Variable; |
629 | 918 | } else if (cparams_.responsive || cparams_.lossy_palette) { |
630 | | // zero predictor for Squeeze residues and lossy palette indices |
631 | | // TODO: Try adding 'Squeezed' predictor set, with the most |
632 | | // common predictors used by Variable in squeezed images, including none. |
633 | 162 | cparams_.options.predictor = Predictor::Zero; |
634 | 756 | } else if (!cparams_.IsLossless()) { |
635 | | // If not responsive and lossy. TODO(veluca): use near_lossless instead? |
636 | 614 | cparams_.options.predictor = Predictor::Gradient; |
637 | 614 | } else if (cparams_.speed_tier < SpeedTier::kFalcon) { |
638 | | // try median and weighted predictor for anything else |
639 | 100 | cparams_.options.predictor = Predictor::Best; |
640 | 100 | } else if (cparams_.speed_tier == SpeedTier::kFalcon) { |
641 | | // just weighted predictor in falcon mode |
642 | 6 | cparams_.options.predictor = Predictor::Weighted; |
643 | 36 | } else if (cparams_.speed_tier > SpeedTier::kFalcon) { |
644 | | // just gradient predictor in thunder mode |
645 | 36 | cparams_.options.predictor = Predictor::Gradient; |
646 | 36 | } |
647 | 1.62k | } else { |
648 | 1.62k | if (cparams_.lossy_palette) cparams_.options.predictor = Predictor::Zero; |
649 | 1.62k | } |
650 | 2.54k | if (!cparams_.ModularPartIsLossless()) { |
651 | 436 | if (cparams_.options.predictor == Predictor::Weighted || |
652 | 436 | cparams_.options.predictor == Predictor::Variable || |
653 | 418 | cparams_.options.predictor == Predictor::Best) |
654 | 18 | cparams_.options.predictor = Predictor::Zero; |
655 | 436 | } |
656 | 2.54k | tree_splits_.push_back(0); |
657 | 2.54k | if (cparams_.modular_mode == false) { |
658 | 1.63k | JXL_ASSIGN_OR_RETURN(ModularStreamId qt0, ModularStreamId::QuantTable(0)); |
659 | 1.63k | cparams_.options.fast_decode_multiplier = 1.0f; |
660 | 1.63k | tree_splits_.push_back(ModularStreamId::VarDCTDC(0).ID(frame_dim_)); |
661 | 1.63k | tree_splits_.push_back(ModularStreamId::ModularDC(0).ID(frame_dim_)); |
662 | 1.63k | tree_splits_.push_back(ModularStreamId::ACMetadata(0).ID(frame_dim_)); |
663 | 1.63k | tree_splits_.push_back(qt0.ID(frame_dim_)); |
664 | 1.63k | tree_splits_.push_back(ModularStreamId::ModularAC(0, 0).ID(frame_dim_)); |
665 | 1.63k | ac_metadata_size.resize(frame_dim_.num_dc_groups); |
666 | 1.63k | extra_dc_precision.resize(frame_dim_.num_dc_groups); |
667 | 1.63k | } |
668 | 2.54k | tree_splits_.push_back(num_streams); |
669 | 2.54k | cparams_.options.max_chan_size = frame_dim_.group_dim; |
670 | 2.54k | cparams_.options.group_dim = frame_dim_.group_dim; |
671 | | |
672 | | // TODO(veluca): figure out how to use different predictor sets per channel. |
673 | 2.54k | stream_options_.resize(num_streams, cparams_.options); |
674 | | |
675 | 2.54k | stream_options_[0] = cparams_.options; |
676 | 2.54k | if (cparams_.speed_tier == SpeedTier::kFalcon) { |
677 | 68 | stream_options_[0].tree_kind = ModularOptions::TreeKind::kWPFixedDC; |
678 | 2.47k | } else if (cparams_.speed_tier == SpeedTier::kThunder) { |
679 | 403 | stream_options_[0].tree_kind = ModularOptions::TreeKind::kGradientFixedDC; |
680 | 403 | } |
681 | 2.54k | stream_options_[0].histogram_params = |
682 | 2.54k | HistogramParams::ForModular(cparams_, {}, streaming_mode); |
683 | 2.54k | return true; |
684 | 2.54k | } |
685 | | |
686 | | Status ModularFrameEncoder::ComputeEncodingData( |
687 | | const FrameHeader& frame_header, const ImageMetadata& metadata, |
688 | | Image3F* JXL_RESTRICT color, const std::vector<ImageF>& extra_channels, |
689 | | const Rect& group_rect, const FrameDimensions& patch_dim, |
690 | | const Rect& frame_area_rect, PassesEncoderState* JXL_RESTRICT enc_state, |
691 | | const JxlCmsInterface& cms, ThreadPool* pool, AuxOut* aux_out, |
692 | 2.11k | bool do_color) { |
693 | 2.11k | JxlMemoryManager* memory_manager = enc_state->memory_manager(); |
694 | 2.11k | JXL_DEBUG_V(6, "Computing modular encoding data for frame %s", |
695 | 2.11k | frame_header.DebugString().c_str()); |
696 | | |
697 | 2.11k | bool groupwise = enc_state->streaming_mode; |
698 | | |
699 | 2.11k | if (do_color && frame_header.loop_filter.gab && !groupwise) { |
700 | 210 | float w = 0.9908511000000001f; |
701 | 210 | float weights[3] = {w, w, w}; |
702 | 210 | JXL_RETURN_IF_ERROR(GaborishInverse(color, Rect(*color), weights, pool)); |
703 | 210 | } |
704 | | |
705 | 2.11k | if (do_color && metadata.bit_depth.bits_per_sample <= 16 && |
706 | 1.98k | cparams_.speed_tier < SpeedTier::kCheetah && |
707 | 890 | cparams_.decoding_speed_tier < 2 && !groupwise) { |
708 | 336 | JXL_RETURN_IF_ERROR(FindBestPatchDictionary( |
709 | 336 | *color, enc_state, cms, nullptr, aux_out, |
710 | 336 | cparams_.color_transform == ColorTransform::kXYB)); |
711 | 336 | JXL_RETURN_IF_ERROR(PatchDictionaryEncoder::SubtractFrom( |
712 | 336 | enc_state->shared.image_features.patches, color)); |
713 | 336 | } |
714 | | |
715 | 2.11k | if (cparams_.custom_splines.HasAny()) { |
716 | 0 | PassesSharedState& shared = enc_state->shared; |
717 | 0 | ImageFeatures& image_features = shared.image_features; |
718 | 0 | image_features.splines = cparams_.custom_splines; |
719 | 0 | } |
720 | | |
721 | | // Convert ImageBundle to modular Image object |
722 | 2.11k | const size_t xsize = patch_dim.xsize; |
723 | 2.11k | const size_t ysize = patch_dim.ysize; |
724 | | |
725 | 2.11k | int nb_chans = 3; |
726 | 2.11k | if (metadata.color_encoding.IsGray() && |
727 | 660 | cparams_.color_transform == ColorTransform::kNone) { |
728 | 516 | nb_chans = 1; |
729 | 516 | } |
730 | 2.11k | if (!do_color) nb_chans = 0; |
731 | | |
732 | 2.11k | nb_chans += extra_channels.size(); |
733 | | |
734 | 2.11k | bool fp = metadata.bit_depth.floating_point_sample && |
735 | 0 | cparams_.color_transform != ColorTransform::kXYB; |
736 | | |
737 | | // bits_per_sample is just metadata for XYB images. |
738 | 2.11k | if (metadata.bit_depth.bits_per_sample >= 32 && do_color && |
739 | 0 | cparams_.color_transform != ColorTransform::kXYB) { |
740 | 0 | if (metadata.bit_depth.bits_per_sample == 32 && fp == false) { |
741 | 0 | return JXL_FAILURE("uint32_t not supported in enc_modular"); |
742 | 0 | } else if (metadata.bit_depth.bits_per_sample > 32) { |
743 | 0 | return JXL_FAILURE("bits_per_sample > 32 not supported"); |
744 | 0 | } |
745 | 0 | } |
746 | | |
747 | | // in the non-float case, there is an implicit 0 sign bit |
748 | 2.11k | int max_bitdepth = |
749 | 2.11k | do_color ? metadata.bit_depth.bits_per_sample + (fp ? 0 : 1) : 0; |
750 | 2.11k | Image& gi = stream_images_[0]; |
751 | 2.11k | JXL_ASSIGN_OR_RETURN( |
752 | 2.11k | gi, Image::Create(memory_manager, xsize, ysize, |
753 | 2.11k | metadata.bit_depth.bits_per_sample, nb_chans)); |
754 | 2.11k | int c = 0; |
755 | 2.11k | if (cparams_.color_transform == ColorTransform::kXYB && |
756 | 563 | cparams_.modular_mode == true) { |
757 | 436 | float enc_factors[3] = {65536.0f, 4096.0f, 4096.0f}; |
758 | 436 | if (cparams_.butteraugli_distance > 0 && !cparams_.responsive) { |
759 | | // quantize XYB here and then treat it as a lossless image |
760 | 216 | enc_factors[0] *= 1.f / (1.f + 23.f * cparams_.butteraugli_distance); |
761 | 216 | enc_factors[1] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance); |
762 | 216 | enc_factors[2] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance); |
763 | 216 | cparams_.butteraugli_distance = 0; |
764 | 216 | } |
765 | 436 | if (cparams_.manual_xyb_factors.size() == 3) { |
766 | 0 | JXL_RETURN_IF_ERROR(DequantMatricesSetCustomDC( |
767 | 0 | memory_manager, &enc_state->shared.matrices, |
768 | 0 | cparams_.manual_xyb_factors.data())); |
769 | | // TODO(jon): update max_bitdepth in this case |
770 | 436 | } else { |
771 | 436 | JXL_RETURN_IF_ERROR(DequantMatricesSetCustomDC( |
772 | 436 | memory_manager, &enc_state->shared.matrices, enc_factors)); |
773 | 436 | max_bitdepth = 12; |
774 | 436 | } |
775 | 436 | } |
776 | 2.11k | pixel_type maxval = gi.bitdepth < 32 ? (1u << gi.bitdepth) - 1 : 0; |
777 | 2.11k | if (do_color) { |
778 | 7.93k | for (; c < 3; c++) { |
779 | 5.95k | if (metadata.color_encoding.IsGray() && |
780 | 1.80k | cparams_.color_transform == ColorTransform::kNone && |
781 | 1.54k | c != (cparams_.color_transform == ColorTransform::kXYB ? 1 : 0)) |
782 | 1.03k | continue; |
783 | 4.92k | int c_out = c; |
784 | | // XYB is encoded as YX(B-Y) |
785 | 4.92k | if (cparams_.color_transform == ColorTransform::kXYB && c < 2) |
786 | 872 | c_out = 1 - c_out; |
787 | 4.92k | double factor = maxval; |
788 | 4.92k | if (cparams_.color_transform == ColorTransform::kXYB) |
789 | 1.30k | factor = enc_state->shared.matrices.InvDCQuant(c); |
790 | 4.92k | if (c == 2 && cparams_.color_transform == ColorTransform::kXYB) { |
791 | 436 | JXL_ENSURE(!fp); |
792 | 97.0k | for (size_t y = 0; y < ysize; ++y) { |
793 | 96.5k | const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y); |
794 | 96.5k | pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y); |
795 | 96.5k | pixel_type* const JXL_RESTRICT row_Y = gi.channel[0].Row(y); |
796 | 5.25M | for (size_t x = 0; x < xsize; ++x) { |
797 | | // TODO(eustas): check if std::roundf is appropriate |
798 | 5.15M | row_out[x] = row_in[x] * factor + 0.5f; |
799 | 5.15M | row_out[x] -= row_Y[x]; |
800 | 5.15M | } |
801 | 96.5k | } |
802 | 4.48k | } else { |
803 | 4.48k | int bits = metadata.bit_depth.bits_per_sample; |
804 | 4.48k | int exp_bits = metadata.bit_depth.exponent_bits_per_sample; |
805 | 4.48k | gi.channel[c_out].hshift = frame_header.chroma_subsampling.HShift(c); |
806 | 4.48k | gi.channel[c_out].vshift = frame_header.chroma_subsampling.VShift(c); |
807 | 4.48k | size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_out].hshift); |
808 | 4.48k | size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_out].vshift); |
809 | 4.48k | JXL_RETURN_IF_ERROR( |
810 | 4.48k | gi.channel[c_out].shrink(xsize_shifted, ysize_shifted)); |
811 | 4.48k | const auto process_row = [&](const int task, |
812 | 2.20M | const int thread) -> Status { |
813 | 2.20M | const size_t y = task; |
814 | 2.20M | const float* const JXL_RESTRICT row_in = |
815 | 2.20M | color->PlaneRow(c, y + group_rect.y0()) + group_rect.x0(); |
816 | 2.20M | pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y); |
817 | 2.20M | JXL_RETURN_IF_ERROR(float_to_int(row_in, row_out, xsize_shifted, bits, |
818 | 2.20M | exp_bits, fp, factor)); |
819 | 2.20M | return true; |
820 | 2.20M | }; |
821 | 4.48k | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, ysize_shifted, |
822 | 4.48k | ThreadPool::NoInit, process_row, |
823 | 4.48k | "float2int")); |
824 | 4.48k | } |
825 | 4.92k | } |
826 | 1.98k | if (metadata.color_encoding.IsGray() && |
827 | 600 | cparams_.color_transform == ColorTransform::kNone) |
828 | 516 | c = 1; |
829 | 1.98k | } |
830 | | |
831 | 2.94k | for (size_t ec = 0; ec < extra_channels.size(); ec++, c++) { |
832 | 833 | const ExtraChannelInfo& eci = metadata.extra_channel_info[ec]; |
833 | 833 | size_t ecups = frame_header.extra_channel_upsampling[ec]; |
834 | 833 | JXL_RETURN_IF_ERROR( |
835 | 833 | gi.channel[c].shrink(DivCeil(patch_dim.xsize_upsampled, ecups), |
836 | 833 | DivCeil(patch_dim.ysize_upsampled, ecups))); |
837 | 833 | gi.channel[c].hshift = gi.channel[c].vshift = |
838 | 833 | CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling); |
839 | | |
840 | 833 | int bits = eci.bit_depth.bits_per_sample; |
841 | 833 | int exp_bits = eci.bit_depth.exponent_bits_per_sample; |
842 | 833 | bool ec_fp = eci.bit_depth.floating_point_sample; |
843 | 833 | double factor = (ec_fp ? 1 : ((1u << eci.bit_depth.bits_per_sample) - 1)); |
844 | 833 | if (bits + (ec_fp ? 0 : 1) > max_bitdepth) { |
845 | 145 | max_bitdepth = bits + (ec_fp ? 0 : 1); |
846 | 145 | } |
847 | 567k | const auto process_row = [&](const int task, const int thread) -> Status { |
848 | 567k | const size_t y = task; |
849 | 567k | const float* const JXL_RESTRICT row_in = |
850 | 567k | extra_channels[ec].Row(y + group_rect.y0()) + group_rect.x0(); |
851 | 567k | pixel_type* const JXL_RESTRICT row_out = gi.channel[c].Row(y); |
852 | 567k | JXL_RETURN_IF_ERROR(float_to_int(row_in, row_out, |
853 | 567k | gi.channel[c].plane.xsize(), bits, |
854 | 567k | exp_bits, ec_fp, factor)); |
855 | 567k | return true; |
856 | 567k | }; |
857 | 833 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, gi.channel[c].plane.ysize(), |
858 | 833 | ThreadPool::NoInit, process_row, |
859 | 833 | "float2int")); |
860 | 833 | } |
861 | 2.11k | JXL_ENSURE(c == nb_chans); |
862 | | |
863 | 2.11k | int level_max_bitdepth = (cparams_.level == 5 ? 16 : 32); |
864 | 2.11k | if (max_bitdepth > level_max_bitdepth) { |
865 | 0 | return JXL_FAILURE( |
866 | 0 | "Bitdepth too high for level %i (need %i bits, have only %i in this " |
867 | 0 | "level)", |
868 | 0 | cparams_.level, max_bitdepth, level_max_bitdepth); |
869 | 0 | } |
870 | | |
871 | | // Set options and apply transformations |
872 | 2.11k | if (!cparams_.ModularPartIsLossless()) { |
873 | 436 | if (cparams_.palette_colors != 0) { |
874 | 428 | JXL_DEBUG_V(3, "Lossy encode, not doing palette transforms"); |
875 | 428 | } |
876 | 436 | if (cparams_.color_transform == ColorTransform::kXYB) { |
877 | 436 | cparams_.channel_colors_pre_transform_percent = 0; |
878 | 436 | } |
879 | 436 | cparams_.channel_colors_percent = 0; |
880 | 436 | cparams_.palette_colors = 0; |
881 | 436 | cparams_.lossy_palette = false; |
882 | 436 | } |
883 | | |
884 | | // Global palette transforms |
885 | 2.11k | float channel_colors_percent = 0; |
886 | 2.11k | if (!cparams_.lossy_palette && |
887 | 2.00k | (cparams_.speed_tier <= SpeedTier::kThunder || |
888 | 2.00k | (do_color && metadata.bit_depth.bits_per_sample > 8))) { |
889 | 2.00k | channel_colors_percent = cparams_.channel_colors_pre_transform_percent; |
890 | 2.00k | } |
891 | 2.11k | if (!groupwise) { |
892 | 916 | JXL_RETURN_IF_ERROR(try_palettes(gi, max_bitdepth, maxval, cparams_, |
893 | 916 | channel_colors_percent, pool)); |
894 | 916 | } |
895 | | |
896 | | // don't do an RCT if we're short on bits |
897 | 2.11k | if (cparams_.color_transform == ColorTransform::kNone && do_color && |
898 | 1.54k | gi.channel.size() - gi.nb_meta_channels >= 3 && |
899 | 813 | max_bitdepth + 1 < level_max_bitdepth) { |
900 | 813 | if (cparams_.colorspace < 0 && (!cparams_.ModularPartIsLossless() || |
901 | 222 | cparams_.speed_tier > SpeedTier::kHare)) { |
902 | 79 | Transform ycocg{TransformId::kRCT}; |
903 | 79 | ycocg.rct_type = 6; |
904 | 79 | ycocg.begin_c = gi.nb_meta_channels; |
905 | 79 | do_transform(gi, ycocg, weighted::Header(), pool); |
906 | 79 | max_bitdepth++; |
907 | 734 | } else if (cparams_.colorspace > 0) { |
908 | 589 | Transform sg(TransformId::kRCT); |
909 | 589 | sg.begin_c = gi.nb_meta_channels; |
910 | 589 | sg.rct_type = cparams_.colorspace; |
911 | 589 | do_transform(gi, sg, weighted::Header(), pool); |
912 | 589 | max_bitdepth++; |
913 | 589 | } |
914 | 813 | } |
915 | | |
916 | 2.11k | if (cparams_.move_to_front_from_channel > 0) { |
917 | 0 | for (size_t tgt = 0; |
918 | 0 | tgt + cparams_.move_to_front_from_channel < gi.channel.size(); tgt++) { |
919 | 0 | size_t pos = cparams_.move_to_front_from_channel; |
920 | 0 | while (pos > 0) { |
921 | 0 | Transform move(TransformId::kRCT); |
922 | 0 | if (pos == 1) { |
923 | 0 | move.begin_c = tgt; |
924 | 0 | move.rct_type = 28; // RGB -> GRB |
925 | 0 | pos -= 1; |
926 | 0 | } else { |
927 | 0 | move.begin_c = tgt + pos - 2; |
928 | 0 | move.rct_type = 14; // RGB -> BRG |
929 | 0 | pos -= 2; |
930 | 0 | } |
931 | 0 | do_transform(gi, move, weighted::Header(), pool); |
932 | 0 | } |
933 | 0 | } |
934 | 0 | } |
935 | | |
936 | | // don't do squeeze if we don't have some spare bits |
937 | 2.11k | if (!groupwise && cparams_.responsive && !gi.channel.empty() && |
938 | 322 | max_bitdepth + 2 < level_max_bitdepth) { |
939 | 322 | Transform t(TransformId::kSqueeze); |
940 | | // Check if default squeeze parameters are ok. |
941 | 322 | std::vector<SqueezeParams> params; |
942 | 322 | DefaultSqueezeParameters(¶ms, gi); |
943 | | // If image is smaller than group_dim, then default squeeze parameters |
944 | | // are not going too far. Else, channel size don't turn zero. Thus we only |
945 | | // check if tile does not go to zero-dim. |
946 | 322 | size_t shift_cap = 7 + frame_header.group_size_shift; |
947 | 322 | size_t hshift = 0; |
948 | 322 | size_t vshift = 0; |
949 | 2.84k | for (size_t i = 0; i < params.size(); ++i) { |
950 | 2.62k | if (params[i].horizontal) { |
951 | 1.69k | hshift++; |
952 | 1.69k | } else { |
953 | 932 | vshift++; |
954 | 932 | } |
955 | 2.62k | size_t dc_boost = (std::min(hshift, vshift) >= 3) ? 3 : 0; |
956 | | // In case we squeeze too much, truncate squeeze script. |
957 | 2.62k | if (std::max(hshift, vshift) > shift_cap + dc_boost) { |
958 | 106 | params.resize(i - 1); |
959 | 106 | t.squeezes = params; |
960 | 106 | break; |
961 | 106 | } |
962 | 2.62k | } |
963 | 322 | do_transform(gi, t, weighted::Header(), pool); |
964 | 322 | max_bitdepth += 2; |
965 | 322 | } |
966 | | |
967 | 2.11k | if (max_bitdepth + 1 > level_max_bitdepth) { |
968 | | // force no group RCTs if we don't have a spare bit |
969 | 0 | cparams_.colorspace = 0; |
970 | 0 | } |
971 | 2.11k | JXL_ENSURE(max_bitdepth <= level_max_bitdepth); |
972 | | |
973 | 2.11k | if (!cparams_.ModularPartIsLossless()) { |
974 | 436 | quants_.resize(gi.channel.size(), 1); |
975 | 436 | float quantizer = 0.25f; |
976 | 436 | if (!cparams_.responsive) { |
977 | 216 | JXL_DEBUG_V(1, |
978 | 216 | "Warning: lossy compression without Squeeze " |
979 | 216 | "transform is just color quantization."); |
980 | 216 | quantizer *= 0.1f; |
981 | 216 | } |
982 | 436 | float bitdepth_correction = 1.f; |
983 | 436 | if (cparams_.color_transform != ColorTransform::kXYB) { |
984 | 0 | bitdepth_correction = maxval / 255.f; |
985 | 0 | } |
986 | 436 | std::vector<float> quantizers; |
987 | 1.74k | for (size_t i = 0; i < 3; i++) { |
988 | 1.30k | float dist = cparams_.butteraugli_distance; |
989 | 1.30k | quantizers.push_back(quantizer * powf(dist, 1.2) * bitdepth_correction); |
990 | 1.30k | } |
991 | 520 | for (size_t i = 0; i < extra_channels.size(); i++) { |
992 | 84 | int ec_bitdepth = |
993 | 84 | metadata.extra_channel_info[i].bit_depth.bits_per_sample; |
994 | 84 | pixel_type ec_maxval = ec_bitdepth < 32 ? (1u << ec_bitdepth) - 1 : 0; |
995 | 84 | bitdepth_correction = ec_maxval / 255.f; |
996 | 84 | float dist = 0; |
997 | 84 | if (i < cparams_.ec_distance.size()) dist = cparams_.ec_distance[i]; |
998 | 84 | if (dist < 0) dist = cparams_.butteraugli_distance; |
999 | 84 | quantizers.push_back(quantizer * dist * bitdepth_correction); |
1000 | 84 | } |
1001 | 436 | if (cparams_.options.nb_repeats == 0) { |
1002 | 0 | return JXL_FAILURE("nb_repeats = 0 not supported with modular lossy!"); |
1003 | 0 | } |
1004 | 6.95k | for (uint32_t i = gi.nb_meta_channels; i < gi.channel.size(); i++) { |
1005 | 6.52k | Channel& ch = gi.channel[i]; |
1006 | 6.52k | int shift = ch.hshift + ch.vshift; // number of pixel halvings |
1007 | 6.52k | if (shift > 16) shift = 16; |
1008 | 6.52k | if (shift > 0) shift--; |
1009 | 6.52k | int component = (do_color ? 0 : 3) + ch.component; |
1010 | 6.52k | int q; |
1011 | 6.52k | if (cparams_.color_transform == ColorTransform::kXYB && component < 3) { |
1012 | 6.02k | q = quantizers[component] * squeeze_quality_factor_xyb * |
1013 | 6.02k | squeeze_xyb_qtable[component][shift]; |
1014 | 6.02k | if (component == 0) q *= squeeze_quality_factor_y; |
1015 | 6.02k | } else { |
1016 | 494 | if (cparams_.colorspace != 0 && component > 0 && component < 3) { |
1017 | 0 | q = quantizers[component] * squeeze_quality_factor * |
1018 | 0 | squeeze_chroma_qtable[shift]; |
1019 | 494 | } else { |
1020 | 494 | q = quantizers[component] * squeeze_quality_factor * |
1021 | 494 | squeeze_luma_factor * squeeze_luma_qtable[shift]; |
1022 | 494 | } |
1023 | 494 | } |
1024 | 6.52k | if (q < 1) q = 1; |
1025 | 6.52k | QuantizeChannel(gi.channel[i], q); |
1026 | 6.52k | quants_[i] = q; |
1027 | 6.52k | } |
1028 | 436 | } |
1029 | | |
1030 | | // Fill other groups. |
1031 | | // DC |
1032 | 7.49k | for (size_t group_id = 0; group_id < patch_dim.num_dc_groups; group_id++) { |
1033 | 5.38k | const size_t rgx = group_id % patch_dim.xsize_dc_groups; |
1034 | 5.38k | const size_t rgy = group_id / patch_dim.xsize_dc_groups; |
1035 | 5.38k | const Rect rect(rgx * patch_dim.dc_group_dim, rgy * patch_dim.dc_group_dim, |
1036 | 5.38k | patch_dim.dc_group_dim, patch_dim.dc_group_dim); |
1037 | 5.38k | size_t gx = rgx + frame_area_rect.x0() / 2048; |
1038 | 5.38k | size_t gy = rgy + frame_area_rect.y0() / 2048; |
1039 | 5.38k | size_t real_group_id = gy * frame_dim_.xsize_dc_groups + gx; |
1040 | | // minShift==3 because (frame_dim.dc_group_dim >> 3) == frame_dim.group_dim |
1041 | | // maxShift==1000 is infinity |
1042 | 5.38k | stream_params_.push_back( |
1043 | 5.38k | GroupParams{rect, 3, 1000, ModularStreamId::ModularDC(real_group_id)}); |
1044 | 5.38k | } |
1045 | | // AC global -> nothing. |
1046 | | // AC |
1047 | 40.4k | for (size_t group_id = 0; group_id < patch_dim.num_groups; group_id++) { |
1048 | 38.3k | const size_t rgx = group_id % patch_dim.xsize_groups; |
1049 | 38.3k | const size_t rgy = group_id / patch_dim.xsize_groups; |
1050 | 38.3k | const Rect mrect(rgx * patch_dim.group_dim, rgy * patch_dim.group_dim, |
1051 | 38.3k | patch_dim.group_dim, patch_dim.group_dim); |
1052 | 38.3k | size_t gx = rgx + frame_area_rect.x0() / (frame_dim_.group_dim); |
1053 | 38.3k | size_t gy = rgy + frame_area_rect.y0() / (frame_dim_.group_dim); |
1054 | 38.3k | size_t real_group_id = gy * frame_dim_.xsize_groups + gx; |
1055 | 92.6k | for (size_t i = 0; i < enc_state->progressive_splitter.GetNumPasses(); |
1056 | 54.3k | i++) { |
1057 | 54.3k | int maxShift; |
1058 | 54.3k | int minShift; |
1059 | 54.3k | frame_header.passes.GetDownsamplingBracket(i, minShift, maxShift); |
1060 | 54.3k | stream_params_.push_back( |
1061 | 54.3k | GroupParams{mrect, minShift, maxShift, |
1062 | 54.3k | ModularStreamId::ModularAC(real_group_id, i)}); |
1063 | 54.3k | } |
1064 | 38.3k | } |
1065 | | // if there's only one group, everything ends up in GlobalModular |
1066 | | // in that case, also try RCTs/WP params for the one group |
1067 | 2.11k | if (stream_params_.size() == 2) { |
1068 | 362 | stream_params_.push_back(GroupParams{Rect(0, 0, xsize, ysize), 0, 1000, |
1069 | 362 | ModularStreamId::Global()}); |
1070 | 362 | } |
1071 | 2.11k | gi_channel_.resize(stream_images_.size()); |
1072 | | |
1073 | 2.11k | const auto process_row = [&](const uint32_t i, |
1074 | 59.7k | size_t /* thread */) -> Status { |
1075 | 59.7k | size_t stream = stream_params_[i].id.ID(frame_dim_); |
1076 | 59.7k | if (stream != 0) { |
1077 | 59.3k | stream_options_[stream] = stream_options_[0]; |
1078 | 59.3k | } |
1079 | 59.7k | JXL_RETURN_IF_ERROR(PrepareStreamParams( |
1080 | 59.7k | stream_params_[i].rect, cparams_, stream_params_[i].minShift, |
1081 | 59.7k | stream_params_[i].maxShift, stream_params_[i].id, do_color, groupwise)); |
1082 | 59.7k | return true; |
1083 | 59.7k | }; |
1084 | 2.11k | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, stream_params_.size(), |
1085 | 2.11k | ThreadPool::NoInit, process_row, |
1086 | 2.11k | "ChooseParams")); |
1087 | 2.11k | { |
1088 | | // Clear out channels that have been copied to groups. |
1089 | 2.11k | Image& full_image = stream_images_[0]; |
1090 | 2.11k | size_t ch = full_image.nb_meta_channels; |
1091 | 8.31k | for (; ch < full_image.channel.size(); ch++) { |
1092 | 7.81k | Channel& fc = full_image.channel[ch]; |
1093 | 7.81k | if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break; |
1094 | 7.81k | } |
1095 | 7.44k | for (; ch < full_image.channel.size(); ch++) { |
1096 | | // TODO(eustas): shrink / assign channel to keep size consistency |
1097 | 5.33k | full_image.channel[ch].plane = ImageI(); |
1098 | 5.33k | } |
1099 | 2.11k | } |
1100 | | |
1101 | 2.11k | JXL_RETURN_IF_ERROR(ValidateChannelDimensions(gi, stream_options_[0])); |
1102 | 2.11k | return true; |
1103 | 2.11k | } |
1104 | | |
1105 | 1.19k | Status ModularFrameEncoder::ComputeTree(ThreadPool* pool) { |
1106 | 1.19k | std::vector<ModularMultiplierInfo> multiplier_info; |
1107 | 1.19k | if (!quants_.empty()) { |
1108 | 18.4k | for (uint32_t stream_id = 0; stream_id < stream_images_.size(); |
1109 | 17.9k | stream_id++) { |
1110 | | // skip non-modular stream_ids |
1111 | 17.9k | if (stream_id > 0 && gi_channel_[stream_id].empty()) continue; |
1112 | 5.29k | const Image& image = stream_images_[stream_id]; |
1113 | 5.29k | const ModularOptions& options = stream_options_[stream_id]; |
1114 | 67.8k | for (uint32_t i = image.nb_meta_channels; i < image.channel.size(); i++) { |
1115 | 62.5k | if (image.channel[i].w > options.max_chan_size || |
1116 | 61.6k | image.channel[i].h > options.max_chan_size) { |
1117 | 1.26k | continue; |
1118 | 1.26k | } |
1119 | 61.2k | if (stream_id > 0 && gi_channel_[stream_id].empty()) continue; |
1120 | 61.2k | size_t ch_id = stream_id == 0 |
1121 | 61.2k | ? i |
1122 | 61.2k | : gi_channel_[stream_id][i - image.nb_meta_channels]; |
1123 | 61.2k | uint32_t q = quants_[ch_id]; |
1124 | | // Inform the tree splitting heuristics that each channel in each group |
1125 | | // used this quantization factor. This will produce a tree with the |
1126 | | // given multipliers. |
1127 | 61.2k | if (multiplier_info.empty() || |
1128 | 60.8k | multiplier_info.back().range[1][0] != stream_id || |
1129 | 56.0k | multiplier_info.back().multiplier != q) { |
1130 | 39.3k | StaticPropRange range; |
1131 | 39.3k | range[0] = {{i, i + 1}}; |
1132 | 39.3k | range[1] = {{stream_id, stream_id + 1}}; |
1133 | 39.3k | multiplier_info.push_back({range, q}); |
1134 | 39.3k | } else { |
1135 | | // Previous channel in the same group had the same quantization |
1136 | | // factor. Don't provide two different ranges, as that creates |
1137 | | // unnecessary nodes. |
1138 | 21.9k | multiplier_info.back().range[0][1] = i + 1; |
1139 | 21.9k | } |
1140 | 61.2k | } |
1141 | 5.29k | } |
1142 | | // Merge group+channel settings that have the same channels and quantization |
1143 | | // factors, to avoid unnecessary nodes. |
1144 | 436 | std::sort(multiplier_info.begin(), multiplier_info.end(), |
1145 | 442k | [](ModularMultiplierInfo a, ModularMultiplierInfo b) { |
1146 | 442k | return std::make_tuple(a.range, a.multiplier) < |
1147 | 442k | std::make_tuple(b.range, b.multiplier); |
1148 | 442k | }); |
1149 | 436 | size_t new_num = 1; |
1150 | 39.3k | for (size_t i = 1; i < multiplier_info.size(); i++) { |
1151 | 38.9k | ModularMultiplierInfo& prev = multiplier_info[new_num - 1]; |
1152 | 38.9k | ModularMultiplierInfo& cur = multiplier_info[i]; |
1153 | 38.9k | if (prev.range[0] == cur.range[0] && prev.multiplier == cur.multiplier && |
1154 | 35.6k | prev.range[1][1] == cur.range[1][0]) { |
1155 | 35.6k | prev.range[1][1] = cur.range[1][1]; |
1156 | 35.6k | } else { |
1157 | 3.31k | multiplier_info[new_num++] = multiplier_info[i]; |
1158 | 3.31k | } |
1159 | 38.9k | } |
1160 | 436 | multiplier_info.resize(new_num); |
1161 | 436 | } |
1162 | | |
1163 | 1.19k | if (!cparams_.custom_fixed_tree.empty()) { |
1164 | 0 | tree_ = cparams_.custom_fixed_tree; |
1165 | 1.19k | } else if (cparams_.speed_tier < SpeedTier::kFalcon || |
1166 | 1.06k | !cparams_.modular_mode) { |
1167 | | // Avoid creating a tree with leaves that don't correspond to any pixels. |
1168 | 1.06k | std::vector<size_t> useful_splits; |
1169 | 1.06k | useful_splits.reserve(tree_splits_.size()); |
1170 | 5.16k | for (size_t chunk = 0; chunk < tree_splits_.size() - 1; chunk++) { |
1171 | 4.09k | bool has_pixels = false; |
1172 | 4.09k | size_t start = tree_splits_[chunk]; |
1173 | 4.09k | size_t stop = tree_splits_[chunk + 1]; |
1174 | 54.3k | for (size_t i = start; i < stop; i++) { |
1175 | 50.2k | if (!stream_images_[i].empty()) has_pixels = true; |
1176 | 50.2k | } |
1177 | 4.09k | if (has_pixels) { |
1178 | 1.70k | useful_splits.push_back(tree_splits_[chunk]); |
1179 | 1.70k | } |
1180 | 4.09k | } |
1181 | | // Don't do anything if modular mode does not have any pixels in this image |
1182 | 1.06k | if (useful_splits.empty()) return true; |
1183 | 1.06k | useful_splits.push_back(tree_splits_.back()); |
1184 | | |
1185 | 1.06k | std::vector<Tree> trees(useful_splits.size() - 1); |
1186 | 1.06k | const auto process_chunk = [&](const uint32_t chunk, |
1187 | 1.70k | size_t /* thread */) -> Status { |
1188 | | // TODO(veluca): parallelize more. |
1189 | 1.70k | uint32_t start = useful_splits[chunk]; |
1190 | 1.70k | uint32_t stop = useful_splits[chunk + 1]; |
1191 | 3.40k | while (start < stop && stream_images_[start].empty()) ++start; |
1192 | 27.8k | while (start < stop && stream_images_[stop - 1].empty()) --stop; |
1193 | | |
1194 | 1.70k | if (stream_options_[start].tree_kind == |
1195 | 1.70k | ModularOptions::TreeKind::kLearn) { |
1196 | 711 | JXL_ASSIGN_OR_RETURN( |
1197 | 711 | trees[chunk], |
1198 | 711 | LearnTree(stream_images_.data(), stream_options_.data(), start, |
1199 | 711 | stop, multiplier_info)); |
1200 | 990 | } else { |
1201 | 990 | size_t total_pixels = 0; |
1202 | 3.71k | for (size_t i = start; i < stop; i++) { |
1203 | 8.66k | for (const Channel& ch : stream_images_[i].channel) { |
1204 | 8.66k | total_pixels += ch.w * ch.h; |
1205 | 8.66k | } |
1206 | 2.72k | } |
1207 | 990 | total_pixels = std::max<size_t>(total_pixels, 1); |
1208 | | |
1209 | 990 | trees[chunk] = PredefinedTree(stream_options_[start].tree_kind, |
1210 | 990 | total_pixels, 8, 0); |
1211 | 990 | } |
1212 | 1.70k | return true; |
1213 | 1.70k | }; |
1214 | 1.06k | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, useful_splits.size() - 1, |
1215 | 1.06k | ThreadPool::NoInit, process_chunk, |
1216 | 1.06k | "LearnTrees")); |
1217 | 1.06k | tree_.clear(); |
1218 | 1.06k | JXL_RETURN_IF_ERROR( |
1219 | 1.06k | MergeTrees(trees, useful_splits, 0, useful_splits.size() - 1, &tree_)); |
1220 | 1.06k | } else { |
1221 | | // Fixed tree. |
1222 | 129 | size_t total_pixels = 0; |
1223 | 129 | int max_bitdepth = 0; |
1224 | 8.68k | for (const Image& img : stream_images_) { |
1225 | 8.68k | max_bitdepth = std::max(max_bitdepth, img.bitdepth); |
1226 | 19.5k | for (const Channel& ch : img.channel) { |
1227 | 19.5k | total_pixels += ch.w * ch.h; |
1228 | 19.5k | } |
1229 | 8.68k | } |
1230 | 129 | if (cparams_.speed_tier <= SpeedTier::kFalcon) { |
1231 | 42 | tree_ = PredefinedTree(ModularOptions::TreeKind::kWPFixedDC, total_pixels, |
1232 | 42 | max_bitdepth, stream_options_[0].max_properties); |
1233 | 87 | } else if (cparams_.speed_tier <= SpeedTier::kThunder) { |
1234 | 87 | tree_ = PredefinedTree(ModularOptions::TreeKind::kGradientFixedDC, |
1235 | 87 | total_pixels, max_bitdepth, |
1236 | 87 | stream_options_[0].max_properties); |
1237 | 87 | } else { |
1238 | 0 | tree_ = {PropertyDecisionNode::Leaf(Predictor::Gradient)}; |
1239 | 0 | } |
1240 | 129 | } |
1241 | 1.19k | tree_tokens_.resize(1); |
1242 | 1.19k | tree_tokens_[0].clear(); |
1243 | 1.19k | Tree decoded_tree; |
1244 | 1.19k | JXL_RETURN_IF_ERROR(TokenizeTree(tree_, tree_tokens_.data(), &decoded_tree)); |
1245 | 1.19k | JXL_ENSURE(tree_.size() == decoded_tree.size()); |
1246 | 1.19k | tree_ = std::move(decoded_tree); |
1247 | | |
1248 | | /* TODO(szabadka) Add text output callback to cparams |
1249 | | if (kPrintTree && WantDebugOutput(aux_out)) { |
1250 | | if (frame_header.dc_level > 0) { |
1251 | | PrintTree(tree_, aux_out->debug_prefix + "/dc_frame_level" + |
1252 | | std::to_string(frame_header.dc_level) + "_tree"); |
1253 | | } else { |
1254 | | PrintTree(tree_, aux_out->debug_prefix + "/global_tree"); |
1255 | | } |
1256 | | } */ |
1257 | 1.19k | return true; |
1258 | 1.19k | } |
1259 | | |
1260 | 1.19k | Status ModularFrameEncoder::ComputeTokens(ThreadPool* pool) { |
1261 | 1.19k | size_t num_streams = stream_images_.size(); |
1262 | 1.19k | stream_headers_.resize(num_streams); |
1263 | 1.19k | tokens_.resize(num_streams); |
1264 | 1.19k | image_widths_.resize(num_streams); |
1265 | 1.19k | const auto process_stream = [&](const uint32_t stream_id, |
1266 | 58.5k | size_t /* thread */) -> Status { |
1267 | 58.5k | tokens_[stream_id].clear(); |
1268 | 58.5k | JXL_RETURN_IF_ERROR( |
1269 | 58.5k | ModularCompress(stream_images_[stream_id], stream_options_[stream_id], |
1270 | 58.5k | stream_id, tree_, stream_headers_[stream_id], |
1271 | 58.5k | tokens_[stream_id], &image_widths_[stream_id])); |
1272 | 58.5k | return true; |
1273 | 58.5k | }; |
1274 | 1.19k | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, num_streams, ThreadPool::NoInit, |
1275 | 1.19k | process_stream, "ComputeTokens")); |
1276 | 1.19k | return true; |
1277 | 1.19k | } |
1278 | | |
1279 | | Status ModularFrameEncoder::EncodeGlobalInfo(bool streaming_mode, |
1280 | | BitWriter* writer, |
1281 | 1.87k | AuxOut* aux_out) { |
1282 | 1.87k | JxlMemoryManager* memory_manager = writer->memory_manager(); |
1283 | 1.87k | bool skip_rest = false; |
1284 | 1.87k | JXL_RETURN_IF_ERROR( |
1285 | 1.87k | writer->WithMaxBits(1, LayerType::ModularTree, aux_out, [&] { |
1286 | | // If we are using brotli, or not using modular mode. |
1287 | 1.87k | if (tree_tokens_.empty() || tree_tokens_[0].empty()) { |
1288 | 1.87k | writer->Write(1, 0); |
1289 | 1.87k | skip_rest = true; |
1290 | 1.87k | } else { |
1291 | 1.87k | writer->Write(1, 1); |
1292 | 1.87k | } |
1293 | 1.87k | return true; |
1294 | 1.87k | })); |
1295 | 1.87k | if (skip_rest) return true; |
1296 | | |
1297 | | // Write tree |
1298 | 1.19k | HistogramParams params = |
1299 | 1.19k | HistogramParams::ForModular(cparams_, extra_dc_precision, streaming_mode); |
1300 | 1.19k | { |
1301 | 1.19k | EntropyEncodingData tree_code; |
1302 | 1.19k | JXL_ASSIGN_OR_RETURN( |
1303 | 1.19k | size_t cost, BuildAndEncodeHistograms( |
1304 | 1.19k | memory_manager, params, kNumTreeContexts, tree_tokens_, |
1305 | 1.19k | &tree_code, writer, LayerType::ModularTree, aux_out)); |
1306 | 1.19k | (void)cost; |
1307 | 1.19k | JXL_RETURN_IF_ERROR(WriteTokens(tree_tokens_[0], tree_code, 0, writer, |
1308 | 1.19k | LayerType::ModularTree, aux_out)); |
1309 | 1.19k | } |
1310 | 1.19k | params.streaming_mode = streaming_mode; |
1311 | 1.19k | params.add_missing_symbols = streaming_mode; |
1312 | 1.19k | params.image_widths = image_widths_; |
1313 | | // Write histograms. |
1314 | 1.19k | JXL_ASSIGN_OR_RETURN( |
1315 | 1.19k | size_t cost, BuildAndEncodeHistograms( |
1316 | 1.19k | memory_manager, params, (tree_.size() + 1) / 2, tokens_, |
1317 | 1.19k | &code_, writer, LayerType::ModularGlobal, aux_out)); |
1318 | 1.19k | (void)cost; |
1319 | 1.19k | return true; |
1320 | 1.19k | } |
1321 | | |
1322 | | Status ModularFrameEncoder::EncodeStream(BitWriter* writer, AuxOut* aux_out, |
1323 | | LayerType layer, |
1324 | 91.1k | const ModularStreamId& stream) { |
1325 | 91.1k | size_t stream_id = stream.ID(frame_dim_); |
1326 | 91.1k | if (stream_images_[stream_id].channel.empty()) { |
1327 | 44.9k | JXL_DEBUG_V(10, "Modular stream %" PRIuS " is empty.", stream_id); |
1328 | 44.9k | return true; // Image with no channels, header never gets decoded. |
1329 | 44.9k | } |
1330 | 46.2k | if (tokens_.empty()) { |
1331 | 30.3k | JXL_RETURN_IF_ERROR(ModularGenericCompress( |
1332 | 30.3k | stream_images_[stream_id], stream_options_[stream_id], *writer, aux_out, |
1333 | 30.3k | layer, stream_id)); |
1334 | 30.3k | } else { |
1335 | 15.8k | JXL_RETURN_IF_ERROR( |
1336 | 15.8k | Bundle::Write(stream_headers_[stream_id], writer, layer, aux_out)); |
1337 | 15.8k | JXL_RETURN_IF_ERROR( |
1338 | 15.8k | WriteTokens(tokens_[stream_id], code_, 0, writer, layer, aux_out)); |
1339 | 15.8k | } |
1340 | 46.2k | return true; |
1341 | 46.2k | } |
1342 | | |
1343 | 14.0k | void ModularFrameEncoder::ClearStreamData(const ModularStreamId& stream) { |
1344 | 14.0k | size_t stream_id = stream.ID(frame_dim_); |
1345 | 14.0k | Image empty_image(stream_images_[stream_id].memory_manager()); |
1346 | 14.0k | std::swap(stream_images_[stream_id], empty_image); |
1347 | 14.0k | } |
1348 | | |
1349 | 1.60k | void ModularFrameEncoder::ClearModularStreamData() { |
1350 | 10.7k | for (const auto& group : stream_params_) { |
1351 | 10.7k | ClearStreamData(group.id); |
1352 | 10.7k | } |
1353 | 1.60k | stream_params_.clear(); |
1354 | 1.60k | } |
1355 | | |
1356 | | size_t ModularFrameEncoder::ComputeStreamingAbsoluteAcGroupId( |
1357 | | size_t dc_group_id, size_t ac_group_id, |
1358 | 13.0k | const FrameDimensions& patch_dim) const { |
1359 | 13.0k | size_t dc_group_x = dc_group_id % frame_dim_.xsize_dc_groups; |
1360 | 13.0k | size_t dc_group_y = dc_group_id / frame_dim_.xsize_dc_groups; |
1361 | 13.0k | size_t ac_group_x = ac_group_id % patch_dim.xsize_groups; |
1362 | 13.0k | size_t ac_group_y = ac_group_id / patch_dim.xsize_groups; |
1363 | 13.0k | return (dc_group_x * 8 + ac_group_x) + |
1364 | 13.0k | (dc_group_y * 8 + ac_group_y) * frame_dim_.xsize_groups; |
1365 | 13.0k | } |
1366 | | |
1367 | | Status ModularFrameEncoder::PrepareStreamParams(const Rect& rect, |
1368 | | const CompressParams& cparams, |
1369 | | int minShift, int maxShift, |
1370 | | const ModularStreamId& stream, |
1371 | 58.8k | bool do_color, bool groupwise) { |
1372 | 58.8k | size_t stream_id = stream.ID(frame_dim_); |
1373 | 58.8k | if (stream_id == 0 && frame_dim_.num_groups != 1) { |
1374 | | // If we have multiple groups, then the stream with ID 0 holds the full |
1375 | | // image and we do not want to apply transforms or in general change the |
1376 | | // pixel values. |
1377 | 14 | return true; |
1378 | 14 | } |
1379 | 58.8k | Image& full_image = stream_images_[0]; |
1380 | 58.8k | JxlMemoryManager* memory_manager = full_image.memory_manager(); |
1381 | 58.8k | const size_t xsize = rect.xsize(); |
1382 | 58.8k | const size_t ysize = rect.ysize(); |
1383 | 58.8k | Image& gi = stream_images_[stream_id]; |
1384 | 58.9k | if (stream_id > 0) { |
1385 | 58.9k | JXL_ASSIGN_OR_RETURN(gi, Image::Create(memory_manager, xsize, ysize, |
1386 | 58.9k | full_image.bitdepth, 0)); |
1387 | | // start at the first bigger-than-frame_dim.group_dim non-metachannel |
1388 | 58.9k | size_t c = full_image.nb_meta_channels; |
1389 | 58.9k | if (!groupwise) { |
1390 | 90.9k | for (; c < full_image.channel.size(); c++) { |
1391 | 89.7k | Channel& fc = full_image.channel[c]; |
1392 | 89.7k | if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break; |
1393 | 89.7k | } |
1394 | 48.4k | } |
1395 | 341k | for (; c < full_image.channel.size(); c++) { |
1396 | 282k | Channel& fc = full_image.channel[c]; |
1397 | 282k | int shift = std::min(fc.hshift, fc.vshift); |
1398 | 282k | if (shift > maxShift) continue; |
1399 | 252k | if (shift < minShift) continue; |
1400 | 145k | Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift, |
1401 | 145k | rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h); |
1402 | 145k | if (r.xsize() == 0 || r.ysize() == 0) continue; |
1403 | 142k | gi_channel_[stream_id].push_back(c); |
1404 | 142k | JXL_ASSIGN_OR_RETURN( |
1405 | 142k | Channel gc, Channel::Create(memory_manager, r.xsize(), r.ysize())); |
1406 | 142k | gc.hshift = fc.hshift; |
1407 | 142k | gc.vshift = fc.vshift; |
1408 | 2.51M | for (size_t y = 0; y < r.ysize(); ++y) { |
1409 | 2.37M | memcpy(gc.Row(y), r.ConstRow(fc.plane, y), |
1410 | 2.37M | r.xsize() * sizeof(pixel_type)); |
1411 | 2.37M | } |
1412 | 142k | gi.channel.emplace_back(std::move(gc)); |
1413 | 142k | } |
1414 | | |
1415 | 58.9k | if (gi.channel.empty()) return true; |
1416 | | // Do some per-group transforms |
1417 | | |
1418 | | // Local palette transforms |
1419 | | // TODO(veluca): make this work with quantize-after-prediction in lossy |
1420 | | // mode. |
1421 | 39.0k | if (cparams.butteraugli_distance == 0.f && !cparams.lossy_palette && |
1422 | 29.3k | cparams.speed_tier < SpeedTier::kCheetah) { |
1423 | 10.1k | int max_bitdepth = 0, maxval = 0; // don't care about that here |
1424 | 10.1k | float channel_color_percent = 0; |
1425 | 10.1k | if (!(cparams.responsive && |
1426 | 8.85k | (cparams.decoding_speed_tier >= 1 || cparams.IsLossless()))) { |
1427 | 8.85k | channel_color_percent = cparams.channel_colors_percent; |
1428 | 8.85k | } |
1429 | 10.1k | JXL_RETURN_IF_ERROR(try_palettes(gi, max_bitdepth, maxval, cparams, |
1430 | 10.1k | channel_color_percent)); |
1431 | 10.1k | } |
1432 | 39.0k | } |
1433 | | |
1434 | | // lossless and no specific color transform specified: try Nothing, YCoCg, |
1435 | | // and 17 RCTs |
1436 | 38.9k | if (cparams.color_transform == ColorTransform::kNone && |
1437 | 30.7k | cparams.IsLossless() && cparams.colorspace < 0 && |
1438 | 8.69k | gi.channel.size() - gi.nb_meta_channels >= 3 && |
1439 | 753 | cparams.responsive == JXL_FALSE && do_color && |
1440 | 754 | cparams.speed_tier <= SpeedTier::kHare) { |
1441 | 6 | size_t nb_rcts_to_try = 0; |
1442 | 6 | switch (cparams.speed_tier) { |
1443 | 0 | case SpeedTier::kLightning: |
1444 | 0 | case SpeedTier::kThunder: |
1445 | 0 | case SpeedTier::kFalcon: |
1446 | 0 | case SpeedTier::kCheetah: |
1447 | 0 | nb_rcts_to_try = 0; // Just do global YCoCg |
1448 | 0 | break; |
1449 | 2 | case SpeedTier::kHare: |
1450 | 2 | nb_rcts_to_try = 4; |
1451 | 2 | break; |
1452 | 0 | case SpeedTier::kWombat: |
1453 | 0 | nb_rcts_to_try = 5; |
1454 | 0 | break; |
1455 | 0 | case SpeedTier::kSquirrel: |
1456 | 0 | nb_rcts_to_try = 7; |
1457 | 0 | break; |
1458 | 4 | case SpeedTier::kKitten: |
1459 | 4 | nb_rcts_to_try = 9; |
1460 | 4 | break; |
1461 | 0 | case SpeedTier::kTectonicPlate: |
1462 | 0 | case SpeedTier::kGlacier: |
1463 | 0 | case SpeedTier::kTortoise: |
1464 | 0 | nb_rcts_to_try = 19; |
1465 | 0 | break; |
1466 | 6 | } |
1467 | 6 | float best_cost = std::numeric_limits<float>::max(); |
1468 | 6 | size_t best_rct = 0; |
1469 | 6 | bool need_to_restore = (nb_rcts_to_try > 1); |
1470 | 6 | std::vector<Channel> orig; |
1471 | 6 | orig.reserve(3); |
1472 | | // These should be 19 actually different transforms; the remaining ones |
1473 | | // are equivalent to one of these (note that the first two are do-nothing |
1474 | | // and YCoCg) modulo channel reordering (which only matters in the case of |
1475 | | // MA-with-prev-channels-properties) and/or sign (e.g. RmG vs GmR) |
1476 | 6 | for (int rct_type : {0 * 7 + 0, 0 * 7 + 6, 0 * 7 + 5, 1 * 7 + 3, 3 * 7 + 5, |
1477 | 6 | 5 * 7 + 5, 1 * 7 + 5, 2 * 7 + 5, 1 * 7 + 1, 0 * 7 + 4, |
1478 | 6 | 1 * 7 + 2, 2 * 7 + 1, 2 * 7 + 2, 2 * 7 + 3, 4 * 7 + 4, |
1479 | 50 | 4 * 7 + 5, 0 * 7 + 2, 0 * 7 + 1, 0 * 7 + 3}) { |
1480 | 50 | if (nb_rcts_to_try == 0) break; |
1481 | 44 | nb_rcts_to_try--; |
1482 | | // no-op rct_type; use as baseline cost |
1483 | 44 | if (rct_type == 0) { |
1484 | 6 | JXL_ASSIGN_OR_RETURN(best_cost, EstimateCost(gi)); |
1485 | 24 | for (size_t c = 0; c < 3; ++c) { |
1486 | 18 | Channel& genuine = gi.channel[gi.nb_meta_channels + c]; |
1487 | 18 | JXL_ASSIGN_OR_RETURN( |
1488 | 18 | Channel ch, |
1489 | 18 | Channel::Create(genuine.memory_manager(), genuine.w, genuine.h, |
1490 | 18 | genuine.hshift, genuine.vshift)); |
1491 | 18 | orig.emplace_back(std::move(ch)); |
1492 | 18 | genuine.plane.Swap(orig[c].plane); |
1493 | 18 | } |
1494 | 38 | } else { |
1495 | 38 | std::array<const Channel*, 3> in = {&orig[0], &orig[1], &orig[2]}; |
1496 | 38 | std::array<Channel*, 3> out = {&gi.channel[gi.nb_meta_channels + 0], |
1497 | 38 | &gi.channel[gi.nb_meta_channels + 1], |
1498 | 38 | &gi.channel[gi.nb_meta_channels + 2]}; |
1499 | 38 | JXL_RETURN_IF_ERROR(FwdRct(in, out, rct_type, /* pool */ nullptr)); |
1500 | 76 | JXL_ASSIGN_OR_RETURN(float cost, EstimateCost(gi)); |
1501 | 76 | if (cost < best_cost) { |
1502 | 0 | best_rct = rct_type; |
1503 | 0 | best_cost = cost; |
1504 | 0 | } |
1505 | 76 | } |
1506 | 44 | } |
1507 | 6 | if (need_to_restore) { |
1508 | 24 | for (size_t c = 0; c < 3; ++c) { |
1509 | 18 | gi.channel[gi.nb_meta_channels + c].plane.Swap(orig[c].plane); |
1510 | 18 | } |
1511 | 6 | } |
1512 | | // Apply the best RCT to the image for future encoding. |
1513 | 6 | if (best_rct != 0) { |
1514 | 0 | Transform sg(TransformId::kRCT); |
1515 | 0 | sg.begin_c = gi.nb_meta_channels; |
1516 | 0 | sg.rct_type = best_rct; |
1517 | 0 | do_transform(gi, sg, weighted::Header()); |
1518 | 0 | } |
1519 | 38.9k | } else { |
1520 | | // No need to try anything, just use the default options. |
1521 | 38.9k | } |
1522 | 38.9k | size_t nb_wp_modes = 1; |
1523 | 38.9k | if (cparams.speed_tier <= SpeedTier::kTortoise) { |
1524 | 1.19k | nb_wp_modes = 5; |
1525 | 37.7k | } else if (cparams.speed_tier <= SpeedTier::kKitten) { |
1526 | 2.96k | nb_wp_modes = 2; |
1527 | 2.96k | } |
1528 | 38.9k | if (nb_wp_modes > 1 && |
1529 | 4.16k | PredictorHasWeighted(stream_options_[stream_id].predictor)) { |
1530 | 80 | float best_cost = std::numeric_limits<float>::max(); |
1531 | 80 | stream_options_[stream_id].wp_mode = 0; |
1532 | 432 | for (size_t i = 0; i < nb_wp_modes; i++) { |
1533 | 352 | float cost = EstimateWPCost(gi, i); |
1534 | 352 | if (cost < best_cost) { |
1535 | 106 | best_cost = cost; |
1536 | 106 | stream_options_[stream_id].wp_mode = i; |
1537 | 106 | } |
1538 | 352 | } |
1539 | 80 | } |
1540 | 38.9k | return true; |
1541 | 38.9k | } |
1542 | | |
1543 | | constexpr float q_deadzone = 0.62f; |
1544 | | int QuantizeWP(const int32_t* qrow, size_t onerow, size_t c, size_t x, size_t y, |
1545 | | size_t w, weighted::State* wp_state, float value, |
1546 | 1.43M | float inv_factor, bool* has_outliers) { |
1547 | 1.43M | float svalue = value * inv_factor; |
1548 | 1.43M | PredictionResult pred = |
1549 | 1.43M | PredictNoTreeWP(w, qrow + x, onerow, x, y, Predictor::Weighted, wp_state); |
1550 | 1.43M | svalue -= pred.guess; |
1551 | 1.43M | if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0; |
1552 | 1.43M | int residual = 0; |
1553 | 1.43M | if (svalue > static_cast<float>(std::numeric_limits<int>::max()) || |
1554 | 1.38M | svalue < static_cast<float>(std::numeric_limits<int>::min())) { |
1555 | 0 | *has_outliers = true; |
1556 | 1.43M | } else { |
1557 | 1.43M | residual = std::round(svalue); |
1558 | 1.43M | } |
1559 | 1.43M | if (residual > 2 || residual < -2) residual = std::round(svalue * 0.5f) * 2; |
1560 | 1.43M | return residual + pred.guess; |
1561 | 1.43M | } |
1562 | | |
1563 | | int QuantizeGradient(const int32_t* qrow, size_t onerow, size_t c, size_t x, |
1564 | 1.12M | size_t y, size_t w, float value, float inv_factor) { |
1565 | 1.12M | float svalue = value * inv_factor; |
1566 | 1.12M | PredictionResult pred = |
1567 | 1.12M | PredictNoTreeNoWP(w, qrow + x, onerow, x, y, Predictor::Gradient); |
1568 | 1.12M | svalue -= pred.guess; |
1569 | 1.12M | if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0; |
1570 | 1.12M | int residual = std::round(svalue); |
1571 | 1.12M | if (residual > 2 || residual < -2) residual = std::round(svalue * 0.5f) * 2; |
1572 | 1.12M | return residual + pred.guess; |
1573 | 1.12M | } |
1574 | | |
1575 | | Status ModularFrameEncoder::AddVarDCTDC(const FrameHeader& frame_header, |
1576 | | const Image3F& dc, const Rect& r, |
1577 | | size_t group_index, bool nl_dc, |
1578 | | PassesEncoderState* enc_state, |
1579 | 3.67k | bool jpeg_transcode) { |
1580 | 3.67k | JxlMemoryManager* memory_manager = dc.memory_manager(); |
1581 | 3.67k | extra_dc_precision[group_index] = nl_dc ? 1 : 0; |
1582 | 3.67k | float mul = 1 << extra_dc_precision[group_index]; |
1583 | 3.67k | bool has_outliers = false; |
1584 | | |
1585 | 3.67k | size_t stream_id = ModularStreamId::VarDCTDC(group_index).ID(frame_dim_); |
1586 | 3.67k | stream_options_[stream_id].max_chan_size = 0xFFFFFF; |
1587 | 3.67k | stream_options_[stream_id].predictor = Predictor::Weighted; |
1588 | 3.67k | stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kWPOnly; |
1589 | 3.67k | if (cparams_.speed_tier >= SpeedTier::kSquirrel) { |
1590 | 2.24k | stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kWPFixedDC; |
1591 | 2.24k | } |
1592 | 3.67k | if (cparams_.speed_tier < SpeedTier::kSquirrel && !nl_dc) { |
1593 | 0 | stream_options_[stream_id].predictor = |
1594 | 0 | (cparams_.speed_tier < SpeedTier::kKitten ? Predictor::Variable |
1595 | 0 | : Predictor::Best); |
1596 | 0 | stream_options_[stream_id].wp_tree_mode = |
1597 | 0 | ModularOptions::TreeMode::kDefault; |
1598 | 0 | stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn; |
1599 | 0 | } |
1600 | 3.67k | if (cparams_.decoding_speed_tier >= 1) { |
1601 | 1.49k | stream_options_[stream_id].tree_kind = |
1602 | 1.49k | ModularOptions::TreeKind::kGradientFixedDC; |
1603 | 1.49k | } |
1604 | 3.67k | stream_options_[stream_id].histogram_params = |
1605 | 3.67k | stream_options_[0].histogram_params; |
1606 | | |
1607 | 3.67k | JXL_ASSIGN_OR_RETURN( |
1608 | 3.67k | stream_images_[stream_id], |
1609 | 3.67k | Image::Create(memory_manager, r.xsize(), r.ysize(), 8, 3)); |
1610 | 3.67k | const ColorCorrelation& color_correlation = enc_state->shared.cmap.base(); |
1611 | 3.67k | if (nl_dc && stream_options_[stream_id].tree_kind == |
1612 | 2.56k | ModularOptions::TreeKind::kGradientFixedDC) { |
1613 | 1.04k | JXL_ENSURE(frame_header.chroma_subsampling.Is444()); |
1614 | 3.13k | for (size_t c : {1, 0, 2}) { |
1615 | 3.13k | float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; |
1616 | 3.13k | float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; |
1617 | 3.13k | float cfl_factor = color_correlation.DCFactors()[c]; |
1618 | 104k | for (size_t y = 0; y < r.ysize(); y++) { |
1619 | 101k | int32_t* quant_row = |
1620 | 101k | stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); |
1621 | 101k | size_t stride = stream_images_[stream_id] |
1622 | 101k | .channel[c < 2 ? c ^ 1 : c] |
1623 | 101k | .plane.PixelsPerRow(); |
1624 | 101k | const float* row = r.ConstPlaneRow(dc, c, y); |
1625 | 101k | if (c == 1) { |
1626 | 424k | for (size_t x = 0; x < r.xsize(); x++) { |
1627 | 391k | quant_row[x] = QuantizeGradient(quant_row, stride, c, x, y, |
1628 | 391k | r.xsize(), row[x], inv_factor); |
1629 | 391k | } |
1630 | 68.1k | } else { |
1631 | 68.1k | int32_t* quant_row_y = |
1632 | 68.1k | stream_images_[stream_id].channel[0].plane.Row(y); |
1633 | 837k | for (size_t x = 0; x < r.xsize(); x++) { |
1634 | 769k | quant_row[x] = QuantizeGradient( |
1635 | 769k | quant_row, stride, c, x, y, r.xsize(), |
1636 | 769k | row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor); |
1637 | 769k | } |
1638 | 68.1k | } |
1639 | 101k | } |
1640 | 3.13k | } |
1641 | 2.62k | } else if (nl_dc) { |
1642 | 1.52k | JXL_ENSURE(frame_header.chroma_subsampling.Is444()); |
1643 | 4.55k | for (size_t c : {1, 0, 2}) { |
1644 | 4.55k | float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; |
1645 | 4.55k | float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; |
1646 | 4.55k | float cfl_factor = color_correlation.DCFactors()[c]; |
1647 | 4.55k | weighted::Header header; |
1648 | 4.55k | weighted::State wp_state(header, r.xsize(), r.ysize()); |
1649 | 121k | for (size_t y = 0; y < r.ysize(); y++) { |
1650 | 116k | int32_t* quant_row = |
1651 | 116k | stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); |
1652 | 116k | size_t stride = stream_images_[stream_id] |
1653 | 116k | .channel[c < 2 ? c ^ 1 : c] |
1654 | 116k | .plane.PixelsPerRow(); |
1655 | 116k | const float* row = r.ConstPlaneRow(dc, c, y); |
1656 | 116k | if (c == 1) { |
1657 | 526k | for (size_t x = 0; x < r.xsize(); x++) { |
1658 | 487k | quant_row[x] = |
1659 | 487k | QuantizeWP(quant_row, stride, c, x, y, r.xsize(), &wp_state, |
1660 | 487k | row[x], inv_factor, &has_outliers); |
1661 | 487k | wp_state.UpdateErrors(quant_row[x], x, y, r.xsize()); |
1662 | 487k | } |
1663 | 77.8k | } else { |
1664 | 77.8k | int32_t* quant_row_y = |
1665 | 77.8k | stream_images_[stream_id].channel[0].plane.Row(y); |
1666 | 1.04M | for (size_t x = 0; x < r.xsize(); x++) { |
1667 | 971k | quant_row[x] = |
1668 | 971k | QuantizeWP(quant_row, stride, c, x, y, r.xsize(), &wp_state, |
1669 | 971k | row[x] - quant_row_y[x] * (y_factor * cfl_factor), |
1670 | 971k | inv_factor, &has_outliers); |
1671 | 971k | wp_state.UpdateErrors(quant_row[x], x, y, r.xsize()); |
1672 | 971k | } |
1673 | 77.8k | } |
1674 | 116k | } |
1675 | 4.55k | } |
1676 | 1.52k | } else if (frame_header.chroma_subsampling.Is444()) { |
1677 | 3.31k | for (size_t c : {1, 0, 2}) { |
1678 | 3.31k | float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; |
1679 | 3.31k | float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; |
1680 | 3.31k | float cfl_factor = color_correlation.DCFactors()[c]; |
1681 | 188k | for (size_t y = 0; y < r.ysize(); y++) { |
1682 | 185k | int32_t* quant_row = |
1683 | 185k | stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); |
1684 | 185k | const float* row = r.ConstPlaneRow(dc, c, y); |
1685 | 185k | if (c == 1) { |
1686 | 2.15M | for (size_t x = 0; x < r.xsize(); x++) { |
1687 | 2.09M | quant_row[x] = std::round(row[x] * inv_factor); |
1688 | 2.09M | } |
1689 | 125k | } else { |
1690 | 125k | int32_t* quant_row_y = |
1691 | 125k | stream_images_[stream_id].channel[0].plane.Row(y); |
1692 | 4.28M | for (size_t x = 0; x < r.xsize(); x++) { |
1693 | 4.15M | quant_row[x] = |
1694 | 4.15M | std::round((row[x] - quant_row_y[x] * (y_factor * cfl_factor)) * |
1695 | 4.15M | inv_factor); |
1696 | 4.15M | } |
1697 | 125k | } |
1698 | 185k | } |
1699 | 3.31k | } |
1700 | 1.10k | } else { |
1701 | 0 | for (size_t c : {1, 0, 2}) { |
1702 | 0 | Rect rect(r.x0() >> frame_header.chroma_subsampling.HShift(c), |
1703 | 0 | r.y0() >> frame_header.chroma_subsampling.VShift(c), |
1704 | 0 | r.xsize() >> frame_header.chroma_subsampling.HShift(c), |
1705 | 0 | r.ysize() >> frame_header.chroma_subsampling.VShift(c)); |
1706 | 0 | float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; |
1707 | 0 | size_t ys = rect.ysize(); |
1708 | 0 | size_t xs = rect.xsize(); |
1709 | 0 | Channel& ch = stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c]; |
1710 | 0 | ch.w = xs; |
1711 | 0 | ch.h = ys; |
1712 | 0 | JXL_RETURN_IF_ERROR(ch.shrink()); |
1713 | 0 | for (size_t y = 0; y < ys; y++) { |
1714 | 0 | int32_t* quant_row = ch.plane.Row(y); |
1715 | 0 | const float* row = rect.ConstPlaneRow(dc, c, y); |
1716 | 0 | for (size_t x = 0; x < xs; x++) { |
1717 | 0 | quant_row[x] = std::round(row[x] * inv_factor); |
1718 | 0 | } |
1719 | 0 | } |
1720 | 0 | } |
1721 | 0 | } |
1722 | | |
1723 | 3.67k | if (has_outliers) { |
1724 | 0 | return JXL_FAILURE("Unsupported range of DC values"); |
1725 | 0 | } |
1726 | | |
1727 | 3.67k | DequantDC(r, &enc_state->shared.dc_storage, &enc_state->shared.quant_dc, |
1728 | 3.67k | stream_images_[stream_id], enc_state->shared.quantizer.MulDC(), |
1729 | 3.67k | 1.0 / mul, color_correlation.DCFactors(), |
1730 | 3.67k | frame_header.chroma_subsampling, enc_state->shared.block_ctx_map); |
1731 | 3.67k | return true; |
1732 | 3.67k | } |
1733 | | |
1734 | | Status ModularFrameEncoder::AddACMetadata(const Rect& r, size_t group_index, |
1735 | | bool jpeg_transcode, |
1736 | 2.95k | PassesEncoderState* enc_state) { |
1737 | 2.95k | JxlMemoryManager* memory_manager = enc_state->memory_manager(); |
1738 | 2.95k | size_t stream_id = ModularStreamId::ACMetadata(group_index).ID(frame_dim_); |
1739 | 2.95k | stream_options_[stream_id].max_chan_size = 0xFFFFFF; |
1740 | 2.95k | if (stream_options_[stream_id].predictor != Predictor::Weighted) { |
1741 | 2.93k | stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kNoWP; |
1742 | 2.93k | } |
1743 | 2.95k | if (jpeg_transcode) { |
1744 | 0 | stream_options_[stream_id].tree_kind = |
1745 | 0 | ModularOptions::TreeKind::kJpegTranscodeACMeta; |
1746 | 2.95k | } else if (cparams_.speed_tier >= SpeedTier::kFalcon) { |
1747 | 1.28k | stream_options_[stream_id].tree_kind = |
1748 | 1.28k | ModularOptions::TreeKind::kFalconACMeta; |
1749 | 1.67k | } else if (cparams_.speed_tier > SpeedTier::kKitten) { |
1750 | 1.30k | stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kACMeta; |
1751 | 1.30k | } |
1752 | | // If we are using a non-constant CfL field, and are in a slow enough mode, |
1753 | | // re-enable tree computation for it. |
1754 | 2.95k | if (cparams_.speed_tier < SpeedTier::kSquirrel && |
1755 | 360 | cparams_.force_cfl_jpeg_recompression) { |
1756 | 360 | stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn; |
1757 | 360 | } |
1758 | 2.95k | stream_options_[stream_id].histogram_params = |
1759 | 2.95k | stream_options_[0].histogram_params; |
1760 | | // YToX, YToB, ACS + QF, EPF |
1761 | 2.95k | Image& image = stream_images_[stream_id]; |
1762 | 2.95k | JXL_ASSIGN_OR_RETURN( |
1763 | 2.95k | image, Image::Create(memory_manager, r.xsize(), r.ysize(), 8, 4)); |
1764 | 2.95k | static_assert(kColorTileDimInBlocks == 8, "Color tile size changed"); |
1765 | 2.95k | Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3); |
1766 | 2.95k | JXL_ASSIGN_OR_RETURN( |
1767 | 2.95k | image.channel[0], |
1768 | 2.95k | Channel::Create(memory_manager, cr.xsize(), cr.ysize(), 3, 3)); |
1769 | 2.95k | JXL_ASSIGN_OR_RETURN( |
1770 | 2.95k | image.channel[1], |
1771 | 2.95k | Channel::Create(memory_manager, cr.xsize(), cr.ysize(), 3, 3)); |
1772 | 2.95k | JXL_ASSIGN_OR_RETURN( |
1773 | 2.95k | image.channel[2], |
1774 | 2.95k | Channel::Create(memory_manager, r.xsize() * r.ysize(), 2, 0, 0)); |
1775 | 2.95k | JXL_RETURN_IF_ERROR(ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytox_map, |
1776 | 2.95k | Rect(image.channel[0].plane), |
1777 | 2.95k | &image.channel[0].plane)); |
1778 | 2.95k | JXL_RETURN_IF_ERROR(ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytob_map, |
1779 | 2.95k | Rect(image.channel[1].plane), |
1780 | 2.95k | &image.channel[1].plane)); |
1781 | 2.95k | size_t num = 0; |
1782 | 128k | for (size_t y = 0; y < r.ysize(); y++) { |
1783 | 125k | AcStrategyRow row_acs = enc_state->shared.ac_strategy.ConstRow(r, y); |
1784 | 125k | const int32_t* row_qf = r.ConstRow(enc_state->shared.raw_quant_field, y); |
1785 | 125k | const uint8_t* row_epf = r.ConstRow(enc_state->shared.epf_sharpness, y); |
1786 | 125k | int32_t* out_acs = image.channel[2].plane.Row(0); |
1787 | 125k | int32_t* out_qf = image.channel[2].plane.Row(1); |
1788 | 125k | int32_t* row_out_epf = image.channel[3].plane.Row(y); |
1789 | 2.87M | for (size_t x = 0; x < r.xsize(); x++) { |
1790 | 2.74M | row_out_epf[x] = row_epf[x]; |
1791 | 2.74M | if (!row_acs[x].IsFirstBlock()) continue; |
1792 | 2.58M | out_acs[num] = row_acs[x].RawStrategy(); |
1793 | 2.58M | out_qf[num] = row_qf[x] - 1; |
1794 | 2.58M | num++; |
1795 | 2.58M | } |
1796 | 125k | } |
1797 | 2.95k | image.channel[2].w = num; |
1798 | 2.95k | ac_metadata_size[group_index] = num; |
1799 | 2.95k | return true; |
1800 | 2.95k | } |
1801 | | |
1802 | | Status ModularFrameEncoder::EncodeQuantTable( |
1803 | | JxlMemoryManager* memory_manager, size_t size_x, size_t size_y, |
1804 | | BitWriter* writer, const QuantEncoding& encoding, size_t idx, |
1805 | 0 | ModularFrameEncoder* modular_frame_encoder) { |
1806 | 0 | JXL_ENSURE(encoding.qraw.qtable); |
1807 | 0 | JXL_ENSURE(size_x * size_y * 3 == encoding.qraw.qtable->size()); |
1808 | 0 | JXL_ENSURE(idx < kNumQuantTables); |
1809 | 0 | int* qtable = encoding.qraw.qtable->data(); |
1810 | 0 | JXL_RETURN_IF_ERROR(F16Coder::Write(encoding.qraw.qtable_den, writer)); |
1811 | 0 | if (modular_frame_encoder) { |
1812 | 0 | JXL_ASSIGN_OR_RETURN(ModularStreamId qt, ModularStreamId::QuantTable(idx)); |
1813 | 0 | JXL_RETURN_IF_ERROR(modular_frame_encoder->EncodeStream( |
1814 | 0 | writer, nullptr, LayerType::Header, qt)); |
1815 | 0 | return true; |
1816 | 0 | } |
1817 | 0 | JXL_ASSIGN_OR_RETURN(Image image, |
1818 | 0 | Image::Create(memory_manager, size_x, size_y, 8, 3)); |
1819 | 0 | for (size_t c = 0; c < 3; c++) { |
1820 | 0 | for (size_t y = 0; y < size_y; y++) { |
1821 | 0 | int32_t* JXL_RESTRICT row = image.channel[c].Row(y); |
1822 | 0 | for (size_t x = 0; x < size_x; x++) { |
1823 | 0 | row[x] = qtable[c * size_x * size_y + y * size_x + x]; |
1824 | 0 | } |
1825 | 0 | } |
1826 | 0 | } |
1827 | 0 | ModularOptions cfopts; |
1828 | 0 | JXL_RETURN_IF_ERROR(ModularGenericCompress(image, cfopts, *writer)); |
1829 | 0 | return true; |
1830 | 0 | } |
1831 | | |
1832 | | Status ModularFrameEncoder::AddQuantTable(size_t size_x, size_t size_y, |
1833 | | const QuantEncoding& encoding, |
1834 | 0 | size_t idx) { |
1835 | 0 | JXL_ENSURE(idx < kNumQuantTables); |
1836 | 0 | JXL_ASSIGN_OR_RETURN(ModularStreamId qt, ModularStreamId::QuantTable(idx)); |
1837 | 0 | size_t stream_id = qt.ID(frame_dim_); |
1838 | 0 | JXL_ENSURE(encoding.qraw.qtable); |
1839 | 0 | JXL_ENSURE(size_x * size_y * 3 == encoding.qraw.qtable->size()); |
1840 | 0 | int* qtable = encoding.qraw.qtable->data(); |
1841 | 0 | Image& image = stream_images_[stream_id]; |
1842 | 0 | JxlMemoryManager* memory_manager = image.memory_manager(); |
1843 | 0 | JXL_ASSIGN_OR_RETURN(image, |
1844 | 0 | Image::Create(memory_manager, size_x, size_y, 8, 3)); |
1845 | 0 | for (size_t c = 0; c < 3; c++) { |
1846 | 0 | for (size_t y = 0; y < size_y; y++) { |
1847 | 0 | int32_t* JXL_RESTRICT row = image.channel[c].Row(y); |
1848 | 0 | for (size_t x = 0; x < size_x; x++) { |
1849 | 0 | row[x] = qtable[c * size_x * size_y + y * size_x + x]; |
1850 | 0 | } |
1851 | 0 | } |
1852 | 0 | } |
1853 | 0 | return true; |
1854 | 0 | } |
1855 | | } // namespace jxl |