/src/libjxl/lib/jxl/modular/encoding/encoding.cc
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/modular/encoding/encoding.h" |
7 | | |
8 | | #include <jxl/memory_manager.h> |
9 | | |
10 | | #include <algorithm> |
11 | | #include <array> |
12 | | #include <cstddef> |
13 | | #include <cstdint> |
14 | | #include <cstdlib> |
15 | | #include <queue> |
16 | | #include <utility> |
17 | | #include <vector> |
18 | | |
19 | | #include "lib/jxl/base/common.h" |
20 | | #include "lib/jxl/base/compiler_specific.h" |
21 | | #include "lib/jxl/base/printf_macros.h" |
22 | | #include "lib/jxl/base/scope_guard.h" |
23 | | #include "lib/jxl/base/status.h" |
24 | | #include "lib/jxl/dec_ans.h" |
25 | | #include "lib/jxl/dec_bit_reader.h" |
26 | | #include "lib/jxl/fields.h" |
27 | | #include "lib/jxl/frame_dimensions.h" |
28 | | #include "lib/jxl/image_ops.h" |
29 | | #include "lib/jxl/modular/encoding/context_predict.h" |
30 | | #include "lib/jxl/modular/encoding/dec_ma.h" |
31 | | #include "lib/jxl/modular/modular_image.h" |
32 | | #include "lib/jxl/modular/options.h" |
33 | | #include "lib/jxl/modular/transform/transform.h" |
34 | | #include "lib/jxl/pack_signed.h" |
35 | | |
36 | | namespace jxl { |
37 | | |
38 | | // Removes all nodes that use a static property (i.e. channel or group ID) from |
39 | | // the tree and collapses each node on even levels with its two children to |
40 | | // produce a flatter tree. Also computes whether the resulting tree requires |
41 | | // using the weighted predictor. |
42 | | FlatTree FilterTree(const Tree &global_tree, |
43 | | std::array<pixel_type, kNumStaticProperties> &static_props, |
44 | | size_t *num_props, bool *use_wp, bool *wp_only, |
45 | 888k | bool *gradient_only) { |
46 | 888k | *num_props = 0; |
47 | 888k | bool has_wp = false; |
48 | 888k | bool has_non_wp = false; |
49 | 888k | *gradient_only = true; |
50 | 906k | const auto mark_property = [&](int32_t p) { |
51 | 906k | if (p == kWPProp) { |
52 | 142k | has_wp = true; |
53 | 764k | } else if (p >= kNumStaticProperties) { |
54 | 414k | has_non_wp = true; |
55 | 414k | } |
56 | 906k | if (p >= kNumStaticProperties && p != kGradientProp) { |
57 | 458k | *gradient_only = false; |
58 | 458k | } |
59 | 906k | }; |
60 | 888k | FlatTree output; |
61 | 888k | std::queue<size_t> nodes; |
62 | 888k | nodes.push(0); |
63 | | // Produces a trimmed and flattened tree by doing a BFS visit of the original |
64 | | // tree, ignoring branches that are known to be false and proceeding two |
65 | | // levels at a time to collapse nodes in a flatter tree; if an inner parent |
66 | | // node has a leaf as a child, the leaf is duplicated and an implicit fake |
67 | | // node is added. This allows to reduce the number of branches when traversing |
68 | | // the resulting flat tree. |
69 | 2.98M | while (!nodes.empty()) { |
70 | 2.09M | size_t cur = nodes.front(); |
71 | 2.09M | nodes.pop(); |
72 | | // Skip nodes that we can decide now, by jumping directly to their children. |
73 | 2.24M | while (global_tree[cur].property < kNumStaticProperties && |
74 | 1.94M | global_tree[cur].property != -1) { |
75 | 148k | if (static_props[global_tree[cur].property] > global_tree[cur].splitval) { |
76 | 79.6k | cur = global_tree[cur].lchild; |
77 | 79.6k | } else { |
78 | 69.0k | cur = global_tree[cur].rchild; |
79 | 69.0k | } |
80 | 148k | } |
81 | 2.09M | FlatDecisionNode flat; |
82 | 2.09M | if (global_tree[cur].property == -1) { |
83 | 1.79M | flat.property0 = -1; |
84 | 1.79M | flat.childID = global_tree[cur].lchild; |
85 | 1.79M | flat.predictor = global_tree[cur].predictor; |
86 | 1.79M | flat.predictor_offset = global_tree[cur].predictor_offset; |
87 | 1.79M | flat.multiplier = global_tree[cur].multiplier; |
88 | 1.79M | *gradient_only &= flat.predictor == Predictor::Gradient; |
89 | 1.79M | has_wp |= flat.predictor == Predictor::Weighted; |
90 | 1.79M | has_non_wp |= flat.predictor != Predictor::Weighted; |
91 | 1.79M | output.push_back(flat); |
92 | 1.79M | continue; |
93 | 1.79M | } |
94 | 302k | flat.childID = output.size() + nodes.size() + 1; |
95 | | |
96 | 302k | flat.property0 = global_tree[cur].property; |
97 | 302k | *num_props = std::max<size_t>(flat.property0 + 1, *num_props); |
98 | 302k | flat.splitval0 = global_tree[cur].splitval; |
99 | | |
100 | 906k | for (size_t i = 0; i < 2; i++) { |
101 | 604k | size_t cur_child = |
102 | 604k | i == 0 ? global_tree[cur].lchild : global_tree[cur].rchild; |
103 | | // Skip nodes that we can decide now. |
104 | 651k | while (global_tree[cur_child].property < kNumStaticProperties && |
105 | 396k | global_tree[cur_child].property != -1) { |
106 | 47.0k | if (static_props[global_tree[cur_child].property] > |
107 | 47.0k | global_tree[cur_child].splitval) { |
108 | 24.5k | cur_child = global_tree[cur_child].lchild; |
109 | 24.5k | } else { |
110 | 22.4k | cur_child = global_tree[cur_child].rchild; |
111 | 22.4k | } |
112 | 47.0k | } |
113 | | // We ended up in a leaf, add a placeholder decision and two copies of the |
114 | | // leaf. |
115 | 604k | if (global_tree[cur_child].property == -1) { |
116 | 349k | flat.properties[i] = 0; |
117 | 349k | flat.splitvals[i] = 0; |
118 | 349k | nodes.push(cur_child); |
119 | 349k | nodes.push(cur_child); |
120 | 349k | } else { |
121 | 254k | flat.properties[i] = global_tree[cur_child].property; |
122 | 254k | flat.splitvals[i] = global_tree[cur_child].splitval; |
123 | 254k | nodes.push(global_tree[cur_child].lchild); |
124 | 254k | nodes.push(global_tree[cur_child].rchild); |
125 | 254k | *num_props = std::max<size_t>(flat.properties[i] + 1, *num_props); |
126 | 254k | } |
127 | 604k | } |
128 | | |
129 | 604k | for (int16_t property : flat.properties) mark_property(property); |
130 | 302k | mark_property(flat.property0); |
131 | 302k | output.push_back(flat); |
132 | 302k | } |
133 | 888k | if (*num_props > kNumNonrefProperties) { |
134 | 3.89k | *num_props = |
135 | 3.89k | DivCeil(*num_props - kNumNonrefProperties, kExtraPropsPerChannel) * |
136 | 3.89k | kExtraPropsPerChannel + |
137 | 3.89k | kNumNonrefProperties; |
138 | 884k | } else { |
139 | 884k | *num_props = kNumNonrefProperties; |
140 | 884k | } |
141 | 888k | *use_wp = has_wp; |
142 | 888k | *wp_only = has_wp && !has_non_wp; |
143 | | |
144 | 888k | return output; |
145 | 888k | } |
146 | | |
147 | | namespace detail { |
148 | | template <bool uses_lz77> |
149 | | Status DecodeModularChannelMAANS(BitReader *br, ANSSymbolReader *reader, |
150 | | const std::vector<uint8_t> &context_map, |
151 | | const Tree &global_tree, |
152 | | const weighted::Header &wp_header, |
153 | | pixel_type chan, size_t group_id, |
154 | | TreeLut<uint8_t, false, false> &tree_lut, |
155 | | Image *image, uint32_t &fl_run, |
156 | 850k | uint32_t &fl_v) { |
157 | 850k | JxlMemoryManager *memory_manager = image->memory_manager(); |
158 | 850k | Channel &channel = image->channel[chan]; |
159 | | |
160 | 850k | std::array<pixel_type, kNumStaticProperties> static_props = { |
161 | 850k | {chan, static_cast<int>(group_id)}}; |
162 | | // TODO(veluca): filter the tree according to static_props. |
163 | | |
164 | | // zero pixel channel? could happen |
165 | 850k | if (channel.w == 0 || channel.h == 0) return true; |
166 | | |
167 | 850k | bool tree_has_wp_prop_or_pred = false; |
168 | 850k | bool is_wp_only = false; |
169 | 850k | bool is_gradient_only = false; |
170 | 850k | size_t num_props; |
171 | 850k | FlatTree tree = |
172 | 850k | FilterTree(global_tree, static_props, &num_props, |
173 | 850k | &tree_has_wp_prop_or_pred, &is_wp_only, &is_gradient_only); |
174 | | |
175 | | // From here on, tree lookup returns a *clustered* context ID. |
176 | | // This avoids an extra memory lookup after tree traversal. |
177 | 1.62M | for (auto &node : tree) { |
178 | 1.62M | if (node.property0 == -1) { |
179 | 1.43M | node.childID = context_map[node.childID]; |
180 | 1.43M | } |
181 | 1.62M | } |
182 | | |
183 | 850k | JXL_DEBUG_V(3, "Decoded MA tree with %" PRIuS " nodes", tree.size()); |
184 | | |
185 | | // MAANS decode |
186 | 850k | const auto make_pixel = [](uint64_t v, pixel_type multiplier, |
187 | 631M | pixel_type_w offset) -> pixel_type { |
188 | 631M | JXL_DASSERT((v & 0xFFFFFFFF) == v); |
189 | 631M | pixel_type_w val = static_cast<pixel_type_w>(UnpackSigned(v)); |
190 | | // if it overflows, it overflows, and we have a problem anyway |
191 | 631M | return val * multiplier + offset; |
192 | 631M | }; jxl::detail::DecodeModularChannelMAANS<true>(jxl::BitReader*, jxl::ANSSymbolReader*, std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > const&, std::__1::vector<jxl::PropertyDecisionNode, std::__1::allocator<jxl::PropertyDecisionNode> > const&, jxl::weighted::Header const&, int, unsigned long, jxl::TreeLut<unsigned char, false, false>&, jxl::Image*, unsigned int&, unsigned int&)::{lambda(unsigned long, int, long)#1}::operator()(unsigned long, int, long) constLine | Count | Source | 187 | 289M | pixel_type_w offset) -> pixel_type { | 188 | 289M | JXL_DASSERT((v & 0xFFFFFFFF) == v); | 189 | 289M | pixel_type_w val = static_cast<pixel_type_w>(UnpackSigned(v)); | 190 | | // if it overflows, it overflows, and we have a problem anyway | 191 | 289M | return val * multiplier + offset; | 192 | 289M | }; |
jxl::detail::DecodeModularChannelMAANS<false>(jxl::BitReader*, jxl::ANSSymbolReader*, std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > const&, std::__1::vector<jxl::PropertyDecisionNode, std::__1::allocator<jxl::PropertyDecisionNode> > const&, jxl::weighted::Header const&, int, unsigned long, jxl::TreeLut<unsigned char, false, false>&, jxl::Image*, unsigned int&, unsigned int&)::{lambda(unsigned long, int, long)#1}::operator()(unsigned long, int, long) constLine | Count | Source | 187 | 342M | pixel_type_w offset) -> pixel_type { | 188 | 342M | JXL_DASSERT((v & 0xFFFFFFFF) == v); | 189 | 342M | pixel_type_w val = static_cast<pixel_type_w>(UnpackSigned(v)); | 190 | | // if it overflows, it overflows, and we have a problem anyway | 191 | 342M | return val * multiplier + offset; | 192 | 342M | }; |
|
193 | | |
194 | | // True iff every decision node in global_tree splits on a static property |
195 | | // (channel or group_id) and every leaf has Gradient predictor with identity |
196 | | // transform. When this holds, all channels collapse to a single-leaf |
197 | | // Gradient+noop tree regardless of channel index, so the shared fl_run/fl_v |
198 | | // RLE state remains consistent across channel calls. |
199 | 850k | const bool global_tree_is_all_gradient_noop = [&] { |
200 | 902k | for (const auto& n : global_tree) { |
201 | 902k | if (n.property == -1) { |
202 | 808k | if (n.predictor != Predictor::Gradient || n.predictor_offset != 0 || |
203 | 8.08k | n.multiplier != 1) |
204 | 801k | return false; |
205 | 808k | } else if (n.property >= kNumStaticProperties) { |
206 | 45.6k | return false; |
207 | 45.6k | } |
208 | 902k | } |
209 | 3.62k | return true; |
210 | 850k | }(); jxl::detail::DecodeModularChannelMAANS<true>(jxl::BitReader*, jxl::ANSSymbolReader*, std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > const&, std::__1::vector<jxl::PropertyDecisionNode, std::__1::allocator<jxl::PropertyDecisionNode> > const&, jxl::weighted::Header const&, int, unsigned long, jxl::TreeLut<unsigned char, false, false>&, jxl::Image*, unsigned int&, unsigned int&)::{lambda()#1}::operator()() constLine | Count | Source | 199 | 131k | const bool global_tree_is_all_gradient_noop = [&] { | 200 | 133k | for (const auto& n : global_tree) { | 201 | 133k | if (n.property == -1) { | 202 | 126k | if (n.predictor != Predictor::Gradient || n.predictor_offset != 0 || | 203 | 2.30k | n.multiplier != 1) | 204 | 124k | return false; | 205 | 126k | } else if (n.property >= kNumStaticProperties) { | 206 | 4.59k | return false; | 207 | 4.59k | } | 208 | 133k | } | 209 | 1.99k | return true; | 210 | 131k | }(); |
jxl::detail::DecodeModularChannelMAANS<false>(jxl::BitReader*, jxl::ANSSymbolReader*, std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > const&, std::__1::vector<jxl::PropertyDecisionNode, std::__1::allocator<jxl::PropertyDecisionNode> > const&, jxl::weighted::Header const&, int, unsigned long, jxl::TreeLut<unsigned char, false, false>&, jxl::Image*, unsigned int&, unsigned int&)::{lambda()#1}::operator()() constLine | Count | Source | 199 | 719k | const bool global_tree_is_all_gradient_noop = [&] { | 200 | 768k | for (const auto& n : global_tree) { | 201 | 768k | if (n.property == -1) { | 202 | 682k | if (n.predictor != Predictor::Gradient || n.predictor_offset != 0 || | 203 | 5.77k | n.multiplier != 1) | 204 | 676k | return false; | 205 | 682k | } else if (n.property >= kNumStaticProperties) { | 206 | 41.0k | return false; | 207 | 41.0k | } | 208 | 768k | } | 209 | 1.63k | return true; | 210 | 719k | }(); |
|
211 | | |
212 | 850k | if (tree.size() == 1) { |
213 | | // special optimized case: no meta-adaptation, so no need |
214 | | // to compute properties. |
215 | 809k | Predictor predictor = tree[0].predictor; |
216 | 809k | int64_t offset = tree[0].predictor_offset; |
217 | 809k | int32_t multiplier = tree[0].multiplier; |
218 | 809k | size_t ctx_id = tree[0].childID; |
219 | 809k | if (predictor == Predictor::Zero) { |
220 | 660k | uint32_t value; |
221 | 660k | if (reader->IsSingleValueAndAdvance(ctx_id, &value, |
222 | 660k | channel.w * channel.h)) { |
223 | | // Special-case: histogram has a single symbol, with no extra bits, and |
224 | | // we use ANS mode. |
225 | 102k | JXL_DEBUG_V(8, "Fastest track."); |
226 | 102k | pixel_type v = make_pixel(value, multiplier, offset); |
227 | 2.05M | for (size_t y = 0; y < channel.h; y++) { |
228 | 1.95M | pixel_type *JXL_RESTRICT r = channel.Row(y); |
229 | 1.95M | std::fill(r, r + channel.w, v); |
230 | 1.95M | } |
231 | 558k | } else { |
232 | 558k | JXL_DEBUG_V(8, "Fast track."); |
233 | 558k | if (multiplier == 1 && offset == 0) { |
234 | 7.15M | for (size_t y = 0; y < channel.h; y++) { |
235 | 6.63M | pixel_type *JXL_RESTRICT r = channel.Row(y); |
236 | 318M | for (size_t x = 0; x < channel.w; x++) { |
237 | 311M | uint32_t v = |
238 | 311M | reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br); |
239 | 311M | r[x] = UnpackSigned(v); |
240 | 311M | } |
241 | 6.63M | } |
242 | 525k | } else { |
243 | 535k | for (size_t y = 0; y < channel.h; y++) { |
244 | 502k | pixel_type *JXL_RESTRICT r = channel.Row(y); |
245 | 25.6M | for (size_t x = 0; x < channel.w; x++) { |
246 | 25.1M | uint32_t v = |
247 | 25.1M | reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>(ctx_id, |
248 | 25.1M | br); |
249 | 25.1M | r[x] = make_pixel(v, multiplier, offset); |
250 | 25.1M | } |
251 | 502k | } |
252 | 32.5k | } |
253 | 558k | } |
254 | 660k | return true; |
255 | 660k | } else if (uses_lz77 && reader->IsHuffRleOnly() && |
256 | 432 | global_tree_is_all_gradient_noop) { |
257 | 432 | JXL_DEBUG_V(8, "Gradient RLE (fjxl) very fast track."); |
258 | 432 | pixel_type_w sv = UnpackSigned(fl_v); |
259 | 6.04k | for (size_t y = 0; y < channel.h; y++) { |
260 | 5.60k | pixel_type *JXL_RESTRICT r = channel.Row(y); |
261 | 5.60k | const pixel_type *JXL_RESTRICT rtop = (y ? channel.Row(y - 1) : r - 1); |
262 | 5.60k | const pixel_type *JXL_RESTRICT rtopleft = |
263 | 5.60k | (y ? channel.Row(y - 1) - 1 : r - 1); |
264 | 5.60k | pixel_type_w guess_0 = (y ? rtop[0] : 0); |
265 | 5.60k | if (fl_run == 0) { |
266 | 5.60k | reader->ReadHybridUintClusteredHuffRleOnly(ctx_id, br, &fl_v, |
267 | 5.60k | &fl_run); |
268 | 5.60k | sv = UnpackSigned(fl_v); |
269 | 5.60k | } else { |
270 | 0 | fl_run--; |
271 | 0 | } |
272 | 5.60k | r[0] = sv + guess_0; |
273 | 215k | for (size_t x = 1; x < channel.w; x++) { |
274 | 210k | pixel_type left = r[x - 1]; |
275 | 210k | pixel_type top = rtop[x]; |
276 | 210k | pixel_type topleft = rtopleft[x]; |
277 | 210k | pixel_type_w guess = ClampedGradient(top, left, topleft); |
278 | 210k | if (!fl_run) { |
279 | 210k | reader->ReadHybridUintClusteredHuffRleOnly(ctx_id, br, &fl_v, |
280 | 210k | &fl_run); |
281 | 210k | sv = UnpackSigned(fl_v); |
282 | 210k | } else { |
283 | 0 | fl_run--; |
284 | 0 | } |
285 | 210k | r[x] = sv + guess; |
286 | 210k | } |
287 | 5.60k | } |
288 | 432 | return true; |
289 | 148k | } else if (predictor == Predictor::Gradient && offset == 0 && |
290 | 8.92k | multiplier == 1) { |
291 | 8.14k | JXL_DEBUG_V(8, "Gradient very fast track."); |
292 | 8.14k | const ptrdiff_t onerow = channel.plane.PixelsPerRow(); |
293 | 189k | for (size_t y = 0; y < channel.h; y++) { |
294 | 181k | pixel_type *JXL_RESTRICT r = channel.Row(y); |
295 | 4.53M | for (size_t x = 0; x < channel.w; x++) { |
296 | 4.35M | pixel_type left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); |
297 | 4.35M | pixel_type top = (y ? *(r + x - onerow) : left); |
298 | 4.35M | pixel_type topleft = (x && y ? *(r + x - 1 - onerow) : left); |
299 | 4.35M | pixel_type guess = ClampedGradient(top, left, topleft); |
300 | 4.35M | uint64_t v = reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>( |
301 | 4.35M | ctx_id, br); |
302 | 4.35M | r[x] = make_pixel(v, 1, guess); |
303 | 4.35M | } |
304 | 181k | } |
305 | 8.14k | return true; |
306 | 8.14k | } |
307 | 809k | } |
308 | | |
309 | | // Check if this tree is a WP-only tree with a small enough property value |
310 | | // range. |
311 | 181k | if (is_wp_only) { |
312 | 16.0k | is_wp_only = TreeToLookupTable(tree, tree_lut); |
313 | 16.0k | } |
314 | 181k | if (is_gradient_only) { |
315 | 6.65k | is_gradient_only = TreeToLookupTable(tree, tree_lut); |
316 | 6.65k | } |
317 | | |
318 | 181k | if (is_gradient_only) { |
319 | 4.20k | JXL_DEBUG_V(8, "Gradient fast track."); |
320 | 4.20k | const ptrdiff_t onerow = channel.plane.PixelsPerRow(); |
321 | 212k | for (size_t y = 0; y < channel.h; y++) { |
322 | 207k | pixel_type *JXL_RESTRICT r = channel.Row(y); |
323 | 5.06M | for (size_t x = 0; x < channel.w; x++) { |
324 | 4.85M | pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); |
325 | 4.85M | pixel_type_w top = (y ? *(r + x - onerow) : left); |
326 | 4.85M | pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); |
327 | 4.85M | int32_t guess = ClampedGradient(top, left, topleft); |
328 | 4.85M | uint32_t pos = |
329 | 4.85M | kPropRangeFast + |
330 | 4.85M | std::min<pixel_type_w>( |
331 | 4.85M | std::max<pixel_type_w>(-kPropRangeFast, top + left - topleft), |
332 | 4.85M | kPropRangeFast - 1); |
333 | 4.85M | uint32_t ctx_id = tree_lut.context_lookup[pos]; |
334 | 4.85M | uint64_t v = |
335 | 4.85M | reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>(ctx_id, br); |
336 | 4.85M | r[x] = make_pixel(v, 1, guess); |
337 | 4.85M | } |
338 | 207k | } |
339 | 177k | } else if (!uses_lz77 && is_wp_only && channel.w > 8) { |
340 | 7.43k | JXL_DEBUG_V(8, "WP fast track."); |
341 | 7.43k | weighted::State wp_state(wp_header, channel.w, channel.h); |
342 | 7.43k | Properties properties(1); |
343 | 271k | for (size_t y = 0; y < channel.h; y++) { |
344 | 264k | pixel_type *JXL_RESTRICT r = channel.Row(y); |
345 | 264k | const pixel_type *JXL_RESTRICT rtop = (y ? channel.Row(y - 1) : r - 1); |
346 | 264k | const pixel_type *JXL_RESTRICT rtoptop = |
347 | 264k | (y > 1 ? channel.Row(y - 2) : rtop); |
348 | 264k | const pixel_type *JXL_RESTRICT rtopleft = |
349 | 264k | (y ? channel.Row(y - 1) - 1 : r - 1); |
350 | 264k | const pixel_type *JXL_RESTRICT rtopright = |
351 | 264k | (y ? channel.Row(y - 1) + 1 : r - 1); |
352 | 264k | size_t x = 0; |
353 | 264k | { |
354 | 264k | size_t offset = 0; |
355 | 264k | pixel_type_w left = y ? rtop[x] : 0; |
356 | 264k | pixel_type_w toptop = y ? rtoptop[x] : 0; |
357 | 264k | pixel_type_w topright = (x + 1 < channel.w && y ? rtop[x + 1] : left); |
358 | 264k | int32_t guess = wp_state.Predict</*compute_properties=*/true>( |
359 | 264k | x, y, channel.w, left, left, topright, left, toptop, &properties, |
360 | 264k | offset); |
361 | 264k | uint32_t pos = |
362 | 264k | kPropRangeFast + |
363 | 264k | jxl::Clamp1(properties[0], -kPropRangeFast, kPropRangeFast - 1); |
364 | 264k | uint32_t ctx_id = tree_lut.context_lookup[pos]; |
365 | 264k | uint64_t v = |
366 | 264k | reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br); |
367 | 264k | r[x] = make_pixel(v, 1, guess); |
368 | 264k | wp_state.UpdateErrors(r[x], x, y, channel.w); |
369 | 264k | } |
370 | 10.9M | for (x = 1; x + 1 < channel.w; x++) { |
371 | 10.7M | size_t offset = 0; |
372 | 10.7M | int32_t guess = wp_state.Predict</*compute_properties=*/true>( |
373 | 10.7M | x, y, channel.w, rtop[x], r[x - 1], rtopright[x], rtopleft[x], |
374 | 10.7M | rtoptop[x], &properties, offset); |
375 | 10.7M | uint32_t pos = |
376 | 10.7M | kPropRangeFast + |
377 | 10.7M | jxl::Clamp1(properties[0], -kPropRangeFast, kPropRangeFast - 1); |
378 | 10.7M | uint32_t ctx_id = tree_lut.context_lookup[pos]; |
379 | 10.7M | uint64_t v = |
380 | 10.7M | reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br); |
381 | 10.7M | r[x] = make_pixel(v, 1, guess); |
382 | 10.7M | wp_state.UpdateErrors(r[x], x, y, channel.w); |
383 | 10.7M | } |
384 | 264k | { |
385 | 264k | size_t offset = 0; |
386 | 264k | int32_t guess = wp_state.Predict</*compute_properties=*/true>( |
387 | 264k | x, y, channel.w, rtop[x], r[x - 1], rtop[x], rtopleft[x], |
388 | 264k | rtoptop[x], &properties, offset); |
389 | 264k | uint32_t pos = |
390 | 264k | kPropRangeFast + |
391 | 264k | jxl::Clamp1(properties[0], -kPropRangeFast, kPropRangeFast - 1); |
392 | 264k | uint32_t ctx_id = tree_lut.context_lookup[pos]; |
393 | 264k | uint64_t v = |
394 | 264k | reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br); |
395 | 264k | r[x] = make_pixel(v, 1, guess); |
396 | 264k | wp_state.UpdateErrors(r[x], x, y, channel.w); |
397 | 264k | } |
398 | 264k | } |
399 | 170k | } else if (!tree_has_wp_prop_or_pred) { |
400 | | // special optimized case: the weighted predictor and its properties are not |
401 | | // used, so no need to compute weights and properties. |
402 | 144k | JXL_DEBUG_V(8, "Slow track."); |
403 | 144k | MATreeLookup tree_lookup(tree); |
404 | 144k | Properties properties = Properties(num_props); |
405 | 144k | const ptrdiff_t onerow = channel.plane.PixelsPerRow(); |
406 | 144k | JXL_ASSIGN_OR_RETURN( |
407 | 144k | Channel references, |
408 | 144k | Channel::Create(memory_manager, |
409 | 144k | properties.size() - kNumNonrefProperties, channel.w)); |
410 | 8.11M | for (size_t y = 0; y < channel.h; y++) { |
411 | 7.96M | pixel_type *JXL_RESTRICT p = channel.Row(y); |
412 | 7.96M | PrecomputeReferences(channel, y, *image, chan, &references); |
413 | 7.96M | InitPropsRow(&properties, static_props, y); |
414 | 7.96M | if (y > 1 && channel.w > 8 && references.w == 0) { |
415 | 21.4M | for (size_t x = 0; x < 2; x++) { |
416 | 14.2M | PredictionResult res = |
417 | 14.2M | PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y, |
418 | 14.2M | tree_lookup, references); |
419 | 14.2M | uint64_t v = |
420 | 14.2M | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); |
421 | 14.2M | p[x] = make_pixel(v, res.multiplier, res.guess); |
422 | 14.2M | } |
423 | 432M | for (size_t x = 2; x < channel.w - 2; x++) { |
424 | 425M | PredictionResult res = |
425 | 425M | PredictTreeNoWPNEC(&properties, channel.w, p + x, onerow, x, y, |
426 | 425M | tree_lookup, references); |
427 | 425M | uint64_t v = reader->ReadHybridUintClusteredInlined<uses_lz77>( |
428 | 425M | res.context, br); |
429 | 425M | p[x] = make_pixel(v, res.multiplier, res.guess); |
430 | 425M | } |
431 | 21.4M | for (size_t x = channel.w - 2; x < channel.w; x++) { |
432 | 14.2M | PredictionResult res = |
433 | 14.2M | PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y, |
434 | 14.2M | tree_lookup, references); |
435 | 14.2M | uint64_t v = |
436 | 14.2M | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); |
437 | 14.2M | p[x] = make_pixel(v, res.multiplier, res.guess); |
438 | 14.2M | } |
439 | 7.14M | } else { |
440 | 19.5M | for (size_t x = 0; x < channel.w; x++) { |
441 | 18.7M | PredictionResult res = |
442 | 18.7M | PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y, |
443 | 18.7M | tree_lookup, references); |
444 | 18.7M | uint64_t v = reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>( |
445 | 18.7M | res.context, br); |
446 | 18.7M | p[x] = make_pixel(v, res.multiplier, res.guess); |
447 | 18.7M | } |
448 | 820k | } |
449 | 7.96M | } |
450 | 144k | } else { |
451 | 25.5k | JXL_DEBUG_V(8, "Slowest track."); |
452 | 25.5k | MATreeLookup tree_lookup(tree); |
453 | 25.5k | Properties properties = Properties(num_props); |
454 | 25.5k | const ptrdiff_t onerow = channel.plane.PixelsPerRow(); |
455 | 25.5k | JXL_ASSIGN_OR_RETURN( |
456 | 25.5k | Channel references, |
457 | 25.5k | Channel::Create(memory_manager, |
458 | 25.5k | properties.size() - kNumNonrefProperties, channel.w)); |
459 | 25.5k | weighted::State wp_state(wp_header, channel.w, channel.h); |
460 | 1.60M | for (size_t y = 0; y < channel.h; y++) { |
461 | 1.58M | pixel_type *JXL_RESTRICT p = channel.Row(y); |
462 | 1.58M | InitPropsRow(&properties, static_props, y); |
463 | 1.58M | PrecomputeReferences(channel, y, *image, chan, &references); |
464 | 1.58M | if (!uses_lz77 && y > 1 && channel.w > 8 && references.w == 0) { |
465 | 1.52M | for (size_t x = 0; x < 2; x++) { |
466 | 1.01M | PredictionResult res = |
467 | 1.01M | PredictTreeWP(&properties, channel.w, p + x, onerow, x, y, |
468 | 1.01M | tree_lookup, references, &wp_state); |
469 | 1.01M | uint64_t v = |
470 | 1.01M | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); |
471 | 1.01M | p[x] = make_pixel(v, res.multiplier, res.guess); |
472 | 1.01M | wp_state.UpdateErrors(p[x], x, y, channel.w); |
473 | 1.01M | } |
474 | 101M | for (size_t x = 2; x < channel.w - 2; x++) { |
475 | 100M | PredictionResult res = |
476 | 100M | PredictTreeWPNEC(&properties, channel.w, p + x, onerow, x, y, |
477 | 100M | tree_lookup, references, &wp_state); |
478 | 100M | uint64_t v = reader->ReadHybridUintClusteredInlined<uses_lz77>( |
479 | 100M | res.context, br); |
480 | 100M | p[x] = make_pixel(v, res.multiplier, res.guess); |
481 | 100M | wp_state.UpdateErrors(p[x], x, y, channel.w); |
482 | 100M | } |
483 | 1.52M | for (size_t x = channel.w - 2; x < channel.w; x++) { |
484 | 1.01M | PredictionResult res = |
485 | 1.01M | PredictTreeWP(&properties, channel.w, p + x, onerow, x, y, |
486 | 1.01M | tree_lookup, references, &wp_state); |
487 | 1.01M | uint64_t v = |
488 | 1.01M | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); |
489 | 1.01M | p[x] = make_pixel(v, res.multiplier, res.guess); |
490 | 1.01M | wp_state.UpdateErrors(p[x], x, y, channel.w); |
491 | 1.01M | } |
492 | 1.07M | } else { |
493 | 10.7M | for (size_t x = 0; x < channel.w; x++) { |
494 | 9.64M | PredictionResult res = |
495 | 9.64M | PredictTreeWP(&properties, channel.w, p + x, onerow, x, y, |
496 | 9.64M | tree_lookup, references, &wp_state); |
497 | 9.64M | uint64_t v = |
498 | 9.64M | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); |
499 | 9.64M | p[x] = make_pixel(v, res.multiplier, res.guess); |
500 | 9.64M | wp_state.UpdateErrors(p[x], x, y, channel.w); |
501 | 9.64M | } |
502 | 1.07M | } |
503 | 1.58M | } |
504 | 25.5k | } |
505 | 181k | return true; |
506 | 181k | } jxl::Status jxl::detail::DecodeModularChannelMAANS<true>(jxl::BitReader*, jxl::ANSSymbolReader*, std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > const&, std::__1::vector<jxl::PropertyDecisionNode, std::__1::allocator<jxl::PropertyDecisionNode> > const&, jxl::weighted::Header const&, int, unsigned long, jxl::TreeLut<unsigned char, false, false>&, jxl::Image*, unsigned int&, unsigned int&) Line | Count | Source | 156 | 131k | uint32_t &fl_v) { | 157 | 131k | JxlMemoryManager *memory_manager = image->memory_manager(); | 158 | 131k | Channel &channel = image->channel[chan]; | 159 | | | 160 | 131k | std::array<pixel_type, kNumStaticProperties> static_props = { | 161 | 131k | {chan, static_cast<int>(group_id)}}; | 162 | | // TODO(veluca): filter the tree according to static_props. | 163 | | | 164 | | // zero pixel channel? could happen | 165 | 131k | if (channel.w == 0 || channel.h == 0) return true; | 166 | | | 167 | 131k | bool tree_has_wp_prop_or_pred = false; | 168 | 131k | bool is_wp_only = false; | 169 | 131k | bool is_gradient_only = false; | 170 | 131k | size_t num_props; | 171 | 131k | FlatTree tree = | 172 | 131k | FilterTree(global_tree, static_props, &num_props, | 173 | 131k | &tree_has_wp_prop_or_pred, &is_wp_only, &is_gradient_only); | 174 | | | 175 | | // From here on, tree lookup returns a *clustered* context ID. | 176 | | // This avoids an extra memory lookup after tree traversal. | 177 | 151k | for (auto &node : tree) { | 178 | 151k | if (node.property0 == -1) { | 179 | 146k | node.childID = context_map[node.childID]; | 180 | 146k | } | 181 | 151k | } | 182 | | | 183 | 131k | JXL_DEBUG_V(3, "Decoded MA tree with %" PRIuS " nodes", tree.size()); | 184 | | | 185 | | // MAANS decode | 186 | 131k | const auto make_pixel = [](uint64_t v, pixel_type multiplier, | 187 | 131k | pixel_type_w offset) -> pixel_type { | 188 | 131k | JXL_DASSERT((v & 0xFFFFFFFF) == v); | 189 | 131k | pixel_type_w val = static_cast<pixel_type_w>(UnpackSigned(v)); | 190 | | // if it overflows, it overflows, and we have a problem anyway | 191 | 131k | return val * multiplier + offset; | 192 | 131k | }; | 193 | | | 194 | | // True iff every decision node in global_tree splits on a static property | 195 | | // (channel or group_id) and every leaf has Gradient predictor with identity | 196 | | // transform. When this holds, all channels collapse to a single-leaf | 197 | | // Gradient+noop tree regardless of channel index, so the shared fl_run/fl_v | 198 | | // RLE state remains consistent across channel calls. | 199 | 131k | const bool global_tree_is_all_gradient_noop = [&] { | 200 | 131k | for (const auto& n : global_tree) { | 201 | 131k | if (n.property == -1) { | 202 | 131k | if (n.predictor != Predictor::Gradient || n.predictor_offset != 0 || | 203 | 131k | n.multiplier != 1) | 204 | 131k | return false; | 205 | 131k | } else if (n.property >= kNumStaticProperties) { | 206 | 131k | return false; | 207 | 131k | } | 208 | 131k | } | 209 | 131k | return true; | 210 | 131k | }(); | 211 | | | 212 | 131k | if (tree.size() == 1) { | 213 | | // special optimized case: no meta-adaptation, so no need | 214 | | // to compute properties. | 215 | 126k | Predictor predictor = tree[0].predictor; | 216 | 126k | int64_t offset = tree[0].predictor_offset; | 217 | 126k | int32_t multiplier = tree[0].multiplier; | 218 | 126k | size_t ctx_id = tree[0].childID; | 219 | 126k | if (predictor == Predictor::Zero) { | 220 | 97.8k | uint32_t value; | 221 | 97.8k | if (reader->IsSingleValueAndAdvance(ctx_id, &value, | 222 | 97.8k | channel.w * channel.h)) { | 223 | | // Special-case: histogram has a single symbol, with no extra bits, and | 224 | | // we use ANS mode. | 225 | 18.5k | JXL_DEBUG_V(8, "Fastest track."); | 226 | 18.5k | pixel_type v = make_pixel(value, multiplier, offset); | 227 | 602k | for (size_t y = 0; y < channel.h; y++) { | 228 | 583k | pixel_type *JXL_RESTRICT r = channel.Row(y); | 229 | 583k | std::fill(r, r + channel.w, v); | 230 | 583k | } | 231 | 79.2k | } else { | 232 | 79.2k | JXL_DEBUG_V(8, "Fast track."); | 233 | 79.2k | if (multiplier == 1 && offset == 0) { | 234 | 1.11M | for (size_t y = 0; y < channel.h; y++) { | 235 | 1.04M | pixel_type *JXL_RESTRICT r = channel.Row(y); | 236 | 35.1M | for (size_t x = 0; x < channel.w; x++) { | 237 | 34.1M | uint32_t v = | 238 | 34.1M | reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br); | 239 | 34.1M | r[x] = UnpackSigned(v); | 240 | 34.1M | } | 241 | 1.04M | } | 242 | 68.3k | } else { | 243 | 197k | for (size_t y = 0; y < channel.h; y++) { | 244 | 186k | pixel_type *JXL_RESTRICT r = channel.Row(y); | 245 | 8.66M | for (size_t x = 0; x < channel.w; x++) { | 246 | 8.48M | uint32_t v = | 247 | 8.48M | reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>(ctx_id, | 248 | 8.48M | br); | 249 | 8.48M | r[x] = make_pixel(v, multiplier, offset); | 250 | 8.48M | } | 251 | 186k | } | 252 | 10.8k | } | 253 | 79.2k | } | 254 | 97.8k | return true; | 255 | 97.8k | } else if (uses_lz77 && reader->IsHuffRleOnly() && | 256 | 432 | global_tree_is_all_gradient_noop) { | 257 | 432 | JXL_DEBUG_V(8, "Gradient RLE (fjxl) very fast track."); | 258 | 432 | pixel_type_w sv = UnpackSigned(fl_v); | 259 | 6.04k | for (size_t y = 0; y < channel.h; y++) { | 260 | 5.60k | pixel_type *JXL_RESTRICT r = channel.Row(y); | 261 | 5.60k | const pixel_type *JXL_RESTRICT rtop = (y ? channel.Row(y - 1) : r - 1); | 262 | 5.60k | const pixel_type *JXL_RESTRICT rtopleft = | 263 | 5.60k | (y ? channel.Row(y - 1) - 1 : r - 1); | 264 | 5.60k | pixel_type_w guess_0 = (y ? rtop[0] : 0); | 265 | 5.60k | if (fl_run == 0) { | 266 | 5.60k | reader->ReadHybridUintClusteredHuffRleOnly(ctx_id, br, &fl_v, | 267 | 5.60k | &fl_run); | 268 | 5.60k | sv = UnpackSigned(fl_v); | 269 | 5.60k | } else { | 270 | 0 | fl_run--; | 271 | 0 | } | 272 | 5.60k | r[0] = sv + guess_0; | 273 | 215k | for (size_t x = 1; x < channel.w; x++) { | 274 | 210k | pixel_type left = r[x - 1]; | 275 | 210k | pixel_type top = rtop[x]; | 276 | 210k | pixel_type topleft = rtopleft[x]; | 277 | 210k | pixel_type_w guess = ClampedGradient(top, left, topleft); | 278 | 210k | if (!fl_run) { | 279 | 210k | reader->ReadHybridUintClusteredHuffRleOnly(ctx_id, br, &fl_v, | 280 | 210k | &fl_run); | 281 | 210k | sv = UnpackSigned(fl_v); | 282 | 210k | } else { | 283 | 0 | fl_run--; | 284 | 0 | } | 285 | 210k | r[x] = sv + guess; | 286 | 210k | } | 287 | 5.60k | } | 288 | 432 | return true; | 289 | 28.3k | } else if (predictor == Predictor::Gradient && offset == 0 && | 290 | 1.92k | multiplier == 1) { | 291 | 1.63k | JXL_DEBUG_V(8, "Gradient very fast track."); | 292 | 1.63k | const ptrdiff_t onerow = channel.plane.PixelsPerRow(); | 293 | 29.6k | for (size_t y = 0; y < channel.h; y++) { | 294 | 27.9k | pixel_type *JXL_RESTRICT r = channel.Row(y); | 295 | 1.81M | for (size_t x = 0; x < channel.w; x++) { | 296 | 1.79M | pixel_type left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); | 297 | 1.79M | pixel_type top = (y ? *(r + x - onerow) : left); | 298 | 1.79M | pixel_type topleft = (x && y ? *(r + x - 1 - onerow) : left); | 299 | 1.79M | pixel_type guess = ClampedGradient(top, left, topleft); | 300 | 1.79M | uint64_t v = reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>( | 301 | 1.79M | ctx_id, br); | 302 | 1.79M | r[x] = make_pixel(v, 1, guess); | 303 | 1.79M | } | 304 | 27.9k | } | 305 | 1.63k | return true; | 306 | 1.63k | } | 307 | 126k | } | 308 | | | 309 | | // Check if this tree is a WP-only tree with a small enough property value | 310 | | // range. | 311 | 31.1k | if (is_wp_only) { | 312 | 1.53k | is_wp_only = TreeToLookupTable(tree, tree_lut); | 313 | 1.53k | } | 314 | 31.1k | if (is_gradient_only) { | 315 | 1.29k | is_gradient_only = TreeToLookupTable(tree, tree_lut); | 316 | 1.29k | } | 317 | | | 318 | 31.1k | if (is_gradient_only) { | 319 | 157 | JXL_DEBUG_V(8, "Gradient fast track."); | 320 | 157 | const ptrdiff_t onerow = channel.plane.PixelsPerRow(); | 321 | 12.0k | for (size_t y = 0; y < channel.h; y++) { | 322 | 11.8k | pixel_type *JXL_RESTRICT r = channel.Row(y); | 323 | 1.87M | for (size_t x = 0; x < channel.w; x++) { | 324 | 1.86M | pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); | 325 | 1.86M | pixel_type_w top = (y ? *(r + x - onerow) : left); | 326 | 1.86M | pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); | 327 | 1.86M | int32_t guess = ClampedGradient(top, left, topleft); | 328 | 1.86M | uint32_t pos = | 329 | 1.86M | kPropRangeFast + | 330 | 1.86M | std::min<pixel_type_w>( | 331 | 1.86M | std::max<pixel_type_w>(-kPropRangeFast, top + left - topleft), | 332 | 1.86M | kPropRangeFast - 1); | 333 | 1.86M | uint32_t ctx_id = tree_lut.context_lookup[pos]; | 334 | 1.86M | uint64_t v = | 335 | 1.86M | reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>(ctx_id, br); | 336 | 1.86M | r[x] = make_pixel(v, 1, guess); | 337 | 1.86M | } | 338 | 11.8k | } | 339 | 31.0k | } else if (!uses_lz77 && is_wp_only && channel.w > 8) { | 340 | 0 | JXL_DEBUG_V(8, "WP fast track."); | 341 | 0 | weighted::State wp_state(wp_header, channel.w, channel.h); | 342 | 0 | Properties properties(1); | 343 | 0 | for (size_t y = 0; y < channel.h; y++) { | 344 | 0 | pixel_type *JXL_RESTRICT r = channel.Row(y); | 345 | 0 | const pixel_type *JXL_RESTRICT rtop = (y ? channel.Row(y - 1) : r - 1); | 346 | 0 | const pixel_type *JXL_RESTRICT rtoptop = | 347 | 0 | (y > 1 ? channel.Row(y - 2) : rtop); | 348 | 0 | const pixel_type *JXL_RESTRICT rtopleft = | 349 | 0 | (y ? channel.Row(y - 1) - 1 : r - 1); | 350 | 0 | const pixel_type *JXL_RESTRICT rtopright = | 351 | 0 | (y ? channel.Row(y - 1) + 1 : r - 1); | 352 | 0 | size_t x = 0; | 353 | 0 | { | 354 | 0 | size_t offset = 0; | 355 | 0 | pixel_type_w left = y ? rtop[x] : 0; | 356 | 0 | pixel_type_w toptop = y ? rtoptop[x] : 0; | 357 | 0 | pixel_type_w topright = (x + 1 < channel.w && y ? rtop[x + 1] : left); | 358 | 0 | int32_t guess = wp_state.Predict</*compute_properties=*/true>( | 359 | 0 | x, y, channel.w, left, left, topright, left, toptop, &properties, | 360 | 0 | offset); | 361 | 0 | uint32_t pos = | 362 | 0 | kPropRangeFast + | 363 | 0 | jxl::Clamp1(properties[0], -kPropRangeFast, kPropRangeFast - 1); | 364 | 0 | uint32_t ctx_id = tree_lut.context_lookup[pos]; | 365 | 0 | uint64_t v = | 366 | 0 | reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br); | 367 | 0 | r[x] = make_pixel(v, 1, guess); | 368 | 0 | wp_state.UpdateErrors(r[x], x, y, channel.w); | 369 | 0 | } | 370 | 0 | for (x = 1; x + 1 < channel.w; x++) { | 371 | 0 | size_t offset = 0; | 372 | 0 | int32_t guess = wp_state.Predict</*compute_properties=*/true>( | 373 | 0 | x, y, channel.w, rtop[x], r[x - 1], rtopright[x], rtopleft[x], | 374 | 0 | rtoptop[x], &properties, offset); | 375 | 0 | uint32_t pos = | 376 | 0 | kPropRangeFast + | 377 | 0 | jxl::Clamp1(properties[0], -kPropRangeFast, kPropRangeFast - 1); | 378 | 0 | uint32_t ctx_id = tree_lut.context_lookup[pos]; | 379 | 0 | uint64_t v = | 380 | 0 | reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br); | 381 | 0 | r[x] = make_pixel(v, 1, guess); | 382 | 0 | wp_state.UpdateErrors(r[x], x, y, channel.w); | 383 | 0 | } | 384 | 0 | { | 385 | 0 | size_t offset = 0; | 386 | 0 | int32_t guess = wp_state.Predict</*compute_properties=*/true>( | 387 | 0 | x, y, channel.w, rtop[x], r[x - 1], rtop[x], rtopleft[x], | 388 | 0 | rtoptop[x], &properties, offset); | 389 | 0 | uint32_t pos = | 390 | 0 | kPropRangeFast + | 391 | 0 | jxl::Clamp1(properties[0], -kPropRangeFast, kPropRangeFast - 1); | 392 | 0 | uint32_t ctx_id = tree_lut.context_lookup[pos]; | 393 | 0 | uint64_t v = | 394 | 0 | reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br); | 395 | 0 | r[x] = make_pixel(v, 1, guess); | 396 | 0 | wp_state.UpdateErrors(r[x], x, y, channel.w); | 397 | 0 | } | 398 | 0 | } | 399 | 31.0k | } else if (!tree_has_wp_prop_or_pred) { | 400 | | // special optimized case: the weighted predictor and its properties are not | 401 | | // used, so no need to compute weights and properties. | 402 | 27.0k | JXL_DEBUG_V(8, "Slow track."); | 403 | 27.0k | MATreeLookup tree_lookup(tree); | 404 | 27.0k | Properties properties = Properties(num_props); | 405 | 27.0k | const ptrdiff_t onerow = channel.plane.PixelsPerRow(); | 406 | 27.0k | JXL_ASSIGN_OR_RETURN( | 407 | 27.0k | Channel references, | 408 | 27.0k | Channel::Create(memory_manager, | 409 | 27.0k | properties.size() - kNumNonrefProperties, channel.w)); | 410 | 3.99M | for (size_t y = 0; y < channel.h; y++) { | 411 | 3.96M | pixel_type *JXL_RESTRICT p = channel.Row(y); | 412 | 3.96M | PrecomputeReferences(channel, y, *image, chan, &references); | 413 | 3.96M | InitPropsRow(&properties, static_props, y); | 414 | 3.96M | if (y > 1 && channel.w > 8 && references.w == 0) { | 415 | 11.3M | for (size_t x = 0; x < 2; x++) { | 416 | 7.53M | PredictionResult res = | 417 | 7.53M | PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y, | 418 | 7.53M | tree_lookup, references); | 419 | 7.53M | uint64_t v = | 420 | 7.53M | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); | 421 | 7.53M | p[x] = make_pixel(v, res.multiplier, res.guess); | 422 | 7.53M | } | 423 | 257M | for (size_t x = 2; x < channel.w - 2; x++) { | 424 | 253M | PredictionResult res = | 425 | 253M | PredictTreeNoWPNEC(&properties, channel.w, p + x, onerow, x, y, | 426 | 253M | tree_lookup, references); | 427 | 253M | uint64_t v = reader->ReadHybridUintClusteredInlined<uses_lz77>( | 428 | 253M | res.context, br); | 429 | 253M | p[x] = make_pixel(v, res.multiplier, res.guess); | 430 | 253M | } | 431 | 11.3M | for (size_t x = channel.w - 2; x < channel.w; x++) { | 432 | 7.53M | PredictionResult res = | 433 | 7.53M | PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y, | 434 | 7.53M | tree_lookup, references); | 435 | 7.53M | uint64_t v = | 436 | 7.53M | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); | 437 | 7.53M | p[x] = make_pixel(v, res.multiplier, res.guess); | 438 | 7.53M | } | 439 | 3.76M | } else { | 440 | 6.34M | for (size_t x = 0; x < channel.w; x++) { | 441 | 6.15M | PredictionResult res = | 442 | 6.15M | PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y, | 443 | 6.15M | tree_lookup, references); | 444 | 6.15M | uint64_t v = reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>( | 445 | 6.15M | res.context, br); | 446 | 6.15M | p[x] = make_pixel(v, res.multiplier, res.guess); | 447 | 6.15M | } | 448 | 195k | } | 449 | 3.96M | } | 450 | 27.0k | } else { | 451 | 3.96k | JXL_DEBUG_V(8, "Slowest track."); | 452 | 3.96k | MATreeLookup tree_lookup(tree); | 453 | 3.96k | Properties properties = Properties(num_props); | 454 | 3.96k | const ptrdiff_t onerow = channel.plane.PixelsPerRow(); | 455 | 3.96k | JXL_ASSIGN_OR_RETURN( | 456 | 3.96k | Channel references, | 457 | 3.96k | Channel::Create(memory_manager, | 458 | 3.96k | properties.size() - kNumNonrefProperties, channel.w)); | 459 | 3.96k | weighted::State wp_state(wp_header, channel.w, channel.h); | 460 | 52.7k | for (size_t y = 0; y < channel.h; y++) { | 461 | 48.7k | pixel_type *JXL_RESTRICT p = channel.Row(y); | 462 | 48.7k | InitPropsRow(&properties, static_props, y); | 463 | 48.7k | PrecomputeReferences(channel, y, *image, chan, &references); | 464 | 48.7k | if (!uses_lz77 && y > 1 && channel.w > 8 && references.w == 0) { | 465 | 0 | for (size_t x = 0; x < 2; x++) { | 466 | 0 | PredictionResult res = | 467 | 0 | PredictTreeWP(&properties, channel.w, p + x, onerow, x, y, | 468 | 0 | tree_lookup, references, &wp_state); | 469 | 0 | uint64_t v = | 470 | 0 | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); | 471 | 0 | p[x] = make_pixel(v, res.multiplier, res.guess); | 472 | 0 | wp_state.UpdateErrors(p[x], x, y, channel.w); | 473 | 0 | } | 474 | 0 | for (size_t x = 2; x < channel.w - 2; x++) { | 475 | 0 | PredictionResult res = | 476 | 0 | PredictTreeWPNEC(&properties, channel.w, p + x, onerow, x, y, | 477 | 0 | tree_lookup, references, &wp_state); | 478 | 0 | uint64_t v = reader->ReadHybridUintClusteredInlined<uses_lz77>( | 479 | 0 | res.context, br); | 480 | 0 | p[x] = make_pixel(v, res.multiplier, res.guess); | 481 | 0 | wp_state.UpdateErrors(p[x], x, y, channel.w); | 482 | 0 | } | 483 | 0 | for (size_t x = channel.w - 2; x < channel.w; x++) { | 484 | 0 | PredictionResult res = | 485 | 0 | PredictTreeWP(&properties, channel.w, p + x, onerow, x, y, | 486 | 0 | tree_lookup, references, &wp_state); | 487 | 0 | uint64_t v = | 488 | 0 | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); | 489 | 0 | p[x] = make_pixel(v, res.multiplier, res.guess); | 490 | 0 | wp_state.UpdateErrors(p[x], x, y, channel.w); | 491 | 0 | } | 492 | 48.7k | } else { | 493 | 1.91M | for (size_t x = 0; x < channel.w; x++) { | 494 | 1.86M | PredictionResult res = | 495 | 1.86M | PredictTreeWP(&properties, channel.w, p + x, onerow, x, y, | 496 | 1.86M | tree_lookup, references, &wp_state); | 497 | 1.86M | uint64_t v = | 498 | 1.86M | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); | 499 | 1.86M | p[x] = make_pixel(v, res.multiplier, res.guess); | 500 | 1.86M | wp_state.UpdateErrors(p[x], x, y, channel.w); | 501 | 1.86M | } | 502 | 48.7k | } | 503 | 48.7k | } | 504 | 3.96k | } | 505 | 31.1k | return true; | 506 | 31.1k | } |
jxl::Status jxl::detail::DecodeModularChannelMAANS<false>(jxl::BitReader*, jxl::ANSSymbolReader*, std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > const&, std::__1::vector<jxl::PropertyDecisionNode, std::__1::allocator<jxl::PropertyDecisionNode> > const&, jxl::weighted::Header const&, int, unsigned long, jxl::TreeLut<unsigned char, false, false>&, jxl::Image*, unsigned int&, unsigned int&) Line | Count | Source | 156 | 719k | uint32_t &fl_v) { | 157 | 719k | JxlMemoryManager *memory_manager = image->memory_manager(); | 158 | 719k | Channel &channel = image->channel[chan]; | 159 | | | 160 | 719k | std::array<pixel_type, kNumStaticProperties> static_props = { | 161 | 719k | {chan, static_cast<int>(group_id)}}; | 162 | | // TODO(veluca): filter the tree according to static_props. | 163 | | | 164 | | // zero pixel channel? could happen | 165 | 719k | if (channel.w == 0 || channel.h == 0) return true; | 166 | | | 167 | 719k | bool tree_has_wp_prop_or_pred = false; | 168 | 719k | bool is_wp_only = false; | 169 | 719k | bool is_gradient_only = false; | 170 | 719k | size_t num_props; | 171 | 719k | FlatTree tree = | 172 | 719k | FilterTree(global_tree, static_props, &num_props, | 173 | 719k | &tree_has_wp_prop_or_pred, &is_wp_only, &is_gradient_only); | 174 | | | 175 | | // From here on, tree lookup returns a *clustered* context ID. | 176 | | // This avoids an extra memory lookup after tree traversal. | 177 | 1.47M | for (auto &node : tree) { | 178 | 1.47M | if (node.property0 == -1) { | 179 | 1.28M | node.childID = context_map[node.childID]; | 180 | 1.28M | } | 181 | 1.47M | } | 182 | | | 183 | 719k | JXL_DEBUG_V(3, "Decoded MA tree with %" PRIuS " nodes", tree.size()); | 184 | | | 185 | | // MAANS decode | 186 | 719k | const auto make_pixel = [](uint64_t v, pixel_type multiplier, | 187 | 719k | pixel_type_w offset) -> pixel_type { | 188 | 719k | JXL_DASSERT((v & 0xFFFFFFFF) == v); | 189 | 719k | pixel_type_w val = static_cast<pixel_type_w>(UnpackSigned(v)); | 190 | | // if it overflows, it overflows, and we have a problem anyway | 191 | 719k | return val * multiplier + offset; | 192 | 719k | }; | 193 | | | 194 | | // True iff every decision node in global_tree splits on a static property | 195 | | // (channel or group_id) and every leaf has Gradient predictor with identity | 196 | | // transform. When this holds, all channels collapse to a single-leaf | 197 | | // Gradient+noop tree regardless of channel index, so the shared fl_run/fl_v | 198 | | // RLE state remains consistent across channel calls. | 199 | 719k | const bool global_tree_is_all_gradient_noop = [&] { | 200 | 719k | for (const auto& n : global_tree) { | 201 | 719k | if (n.property == -1) { | 202 | 719k | if (n.predictor != Predictor::Gradient || n.predictor_offset != 0 || | 203 | 719k | n.multiplier != 1) | 204 | 719k | return false; | 205 | 719k | } else if (n.property >= kNumStaticProperties) { | 206 | 719k | return false; | 207 | 719k | } | 208 | 719k | } | 209 | 719k | return true; | 210 | 719k | }(); | 211 | | | 212 | 719k | if (tree.size() == 1) { | 213 | | // special optimized case: no meta-adaptation, so no need | 214 | | // to compute properties. | 215 | 682k | Predictor predictor = tree[0].predictor; | 216 | 682k | int64_t offset = tree[0].predictor_offset; | 217 | 682k | int32_t multiplier = tree[0].multiplier; | 218 | 682k | size_t ctx_id = tree[0].childID; | 219 | 682k | if (predictor == Predictor::Zero) { | 220 | 562k | uint32_t value; | 221 | 562k | if (reader->IsSingleValueAndAdvance(ctx_id, &value, | 222 | 562k | channel.w * channel.h)) { | 223 | | // Special-case: histogram has a single symbol, with no extra bits, and | 224 | | // we use ANS mode. | 225 | 83.5k | JXL_DEBUG_V(8, "Fastest track."); | 226 | 83.5k | pixel_type v = make_pixel(value, multiplier, offset); | 227 | 1.45M | for (size_t y = 0; y < channel.h; y++) { | 228 | 1.37M | pixel_type *JXL_RESTRICT r = channel.Row(y); | 229 | 1.37M | std::fill(r, r + channel.w, v); | 230 | 1.37M | } | 231 | 478k | } else { | 232 | 478k | JXL_DEBUG_V(8, "Fast track."); | 233 | 478k | if (multiplier == 1 && offset == 0) { | 234 | 6.03M | for (size_t y = 0; y < channel.h; y++) { | 235 | 5.58M | pixel_type *JXL_RESTRICT r = channel.Row(y); | 236 | 283M | for (size_t x = 0; x < channel.w; x++) { | 237 | 277M | uint32_t v = | 238 | 277M | reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br); | 239 | 277M | r[x] = UnpackSigned(v); | 240 | 277M | } | 241 | 5.58M | } | 242 | 457k | } else { | 243 | 338k | for (size_t y = 0; y < channel.h; y++) { | 244 | 316k | pixel_type *JXL_RESTRICT r = channel.Row(y); | 245 | 16.9M | for (size_t x = 0; x < channel.w; x++) { | 246 | 16.6M | uint32_t v = | 247 | 16.6M | reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>(ctx_id, | 248 | 16.6M | br); | 249 | 16.6M | r[x] = make_pixel(v, multiplier, offset); | 250 | 16.6M | } | 251 | 316k | } | 252 | 21.7k | } | 253 | 478k | } | 254 | 562k | return true; | 255 | 562k | } else if (uses_lz77 && reader->IsHuffRleOnly() && | 256 | 0 | global_tree_is_all_gradient_noop) { | 257 | 0 | JXL_DEBUG_V(8, "Gradient RLE (fjxl) very fast track."); | 258 | 0 | pixel_type_w sv = UnpackSigned(fl_v); | 259 | 0 | for (size_t y = 0; y < channel.h; y++) { | 260 | 0 | pixel_type *JXL_RESTRICT r = channel.Row(y); | 261 | 0 | const pixel_type *JXL_RESTRICT rtop = (y ? channel.Row(y - 1) : r - 1); | 262 | 0 | const pixel_type *JXL_RESTRICT rtopleft = | 263 | 0 | (y ? channel.Row(y - 1) - 1 : r - 1); | 264 | 0 | pixel_type_w guess_0 = (y ? rtop[0] : 0); | 265 | 0 | if (fl_run == 0) { | 266 | 0 | reader->ReadHybridUintClusteredHuffRleOnly(ctx_id, br, &fl_v, | 267 | 0 | &fl_run); | 268 | 0 | sv = UnpackSigned(fl_v); | 269 | 0 | } else { | 270 | 0 | fl_run--; | 271 | 0 | } | 272 | 0 | r[0] = sv + guess_0; | 273 | 0 | for (size_t x = 1; x < channel.w; x++) { | 274 | 0 | pixel_type left = r[x - 1]; | 275 | 0 | pixel_type top = rtop[x]; | 276 | 0 | pixel_type topleft = rtopleft[x]; | 277 | 0 | pixel_type_w guess = ClampedGradient(top, left, topleft); | 278 | 0 | if (!fl_run) { | 279 | 0 | reader->ReadHybridUintClusteredHuffRleOnly(ctx_id, br, &fl_v, | 280 | 0 | &fl_run); | 281 | 0 | sv = UnpackSigned(fl_v); | 282 | 0 | } else { | 283 | 0 | fl_run--; | 284 | 0 | } | 285 | 0 | r[x] = sv + guess; | 286 | 0 | } | 287 | 0 | } | 288 | 0 | return true; | 289 | 120k | } else if (predictor == Predictor::Gradient && offset == 0 && | 290 | 6.99k | multiplier == 1) { | 291 | 6.50k | JXL_DEBUG_V(8, "Gradient very fast track."); | 292 | 6.50k | const ptrdiff_t onerow = channel.plane.PixelsPerRow(); | 293 | 160k | for (size_t y = 0; y < channel.h; y++) { | 294 | 153k | pixel_type *JXL_RESTRICT r = channel.Row(y); | 295 | 2.71M | for (size_t x = 0; x < channel.w; x++) { | 296 | 2.56M | pixel_type left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); | 297 | 2.56M | pixel_type top = (y ? *(r + x - onerow) : left); | 298 | 2.56M | pixel_type topleft = (x && y ? *(r + x - 1 - onerow) : left); | 299 | 2.56M | pixel_type guess = ClampedGradient(top, left, topleft); | 300 | 2.56M | uint64_t v = reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>( | 301 | 2.56M | ctx_id, br); | 302 | 2.56M | r[x] = make_pixel(v, 1, guess); | 303 | 2.56M | } | 304 | 153k | } | 305 | 6.50k | return true; | 306 | 6.50k | } | 307 | 682k | } | 308 | | | 309 | | // Check if this tree is a WP-only tree with a small enough property value | 310 | | // range. | 311 | 150k | if (is_wp_only) { | 312 | 14.5k | is_wp_only = TreeToLookupTable(tree, tree_lut); | 313 | 14.5k | } | 314 | 150k | if (is_gradient_only) { | 315 | 5.36k | is_gradient_only = TreeToLookupTable(tree, tree_lut); | 316 | 5.36k | } | 317 | | | 318 | 150k | if (is_gradient_only) { | 319 | 4.04k | JXL_DEBUG_V(8, "Gradient fast track."); | 320 | 4.04k | const ptrdiff_t onerow = channel.plane.PixelsPerRow(); | 321 | 200k | for (size_t y = 0; y < channel.h; y++) { | 322 | 195k | pixel_type *JXL_RESTRICT r = channel.Row(y); | 323 | 3.19M | for (size_t x = 0; x < channel.w; x++) { | 324 | 2.99M | pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); | 325 | 2.99M | pixel_type_w top = (y ? *(r + x - onerow) : left); | 326 | 2.99M | pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); | 327 | 2.99M | int32_t guess = ClampedGradient(top, left, topleft); | 328 | 2.99M | uint32_t pos = | 329 | 2.99M | kPropRangeFast + | 330 | 2.99M | std::min<pixel_type_w>( | 331 | 2.99M | std::max<pixel_type_w>(-kPropRangeFast, top + left - topleft), | 332 | 2.99M | kPropRangeFast - 1); | 333 | 2.99M | uint32_t ctx_id = tree_lut.context_lookup[pos]; | 334 | 2.99M | uint64_t v = | 335 | 2.99M | reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>(ctx_id, br); | 336 | 2.99M | r[x] = make_pixel(v, 1, guess); | 337 | 2.99M | } | 338 | 195k | } | 339 | 146k | } else if (!uses_lz77 && is_wp_only && channel.w > 8) { | 340 | 7.43k | JXL_DEBUG_V(8, "WP fast track."); | 341 | 7.43k | weighted::State wp_state(wp_header, channel.w, channel.h); | 342 | 7.43k | Properties properties(1); | 343 | 271k | for (size_t y = 0; y < channel.h; y++) { | 344 | 264k | pixel_type *JXL_RESTRICT r = channel.Row(y); | 345 | 264k | const pixel_type *JXL_RESTRICT rtop = (y ? channel.Row(y - 1) : r - 1); | 346 | 264k | const pixel_type *JXL_RESTRICT rtoptop = | 347 | 264k | (y > 1 ? channel.Row(y - 2) : rtop); | 348 | 264k | const pixel_type *JXL_RESTRICT rtopleft = | 349 | 264k | (y ? channel.Row(y - 1) - 1 : r - 1); | 350 | 264k | const pixel_type *JXL_RESTRICT rtopright = | 351 | 264k | (y ? channel.Row(y - 1) + 1 : r - 1); | 352 | 264k | size_t x = 0; | 353 | 264k | { | 354 | 264k | size_t offset = 0; | 355 | 264k | pixel_type_w left = y ? rtop[x] : 0; | 356 | 264k | pixel_type_w toptop = y ? rtoptop[x] : 0; | 357 | 264k | pixel_type_w topright = (x + 1 < channel.w && y ? rtop[x + 1] : left); | 358 | 264k | int32_t guess = wp_state.Predict</*compute_properties=*/true>( | 359 | 264k | x, y, channel.w, left, left, topright, left, toptop, &properties, | 360 | 264k | offset); | 361 | 264k | uint32_t pos = | 362 | 264k | kPropRangeFast + | 363 | 264k | jxl::Clamp1(properties[0], -kPropRangeFast, kPropRangeFast - 1); | 364 | 264k | uint32_t ctx_id = tree_lut.context_lookup[pos]; | 365 | 264k | uint64_t v = | 366 | 264k | reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br); | 367 | 264k | r[x] = make_pixel(v, 1, guess); | 368 | 264k | wp_state.UpdateErrors(r[x], x, y, channel.w); | 369 | 264k | } | 370 | 10.9M | for (x = 1; x + 1 < channel.w; x++) { | 371 | 10.7M | size_t offset = 0; | 372 | 10.7M | int32_t guess = wp_state.Predict</*compute_properties=*/true>( | 373 | 10.7M | x, y, channel.w, rtop[x], r[x - 1], rtopright[x], rtopleft[x], | 374 | 10.7M | rtoptop[x], &properties, offset); | 375 | 10.7M | uint32_t pos = | 376 | 10.7M | kPropRangeFast + | 377 | 10.7M | jxl::Clamp1(properties[0], -kPropRangeFast, kPropRangeFast - 1); | 378 | 10.7M | uint32_t ctx_id = tree_lut.context_lookup[pos]; | 379 | 10.7M | uint64_t v = | 380 | 10.7M | reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br); | 381 | 10.7M | r[x] = make_pixel(v, 1, guess); | 382 | 10.7M | wp_state.UpdateErrors(r[x], x, y, channel.w); | 383 | 10.7M | } | 384 | 264k | { | 385 | 264k | size_t offset = 0; | 386 | 264k | int32_t guess = wp_state.Predict</*compute_properties=*/true>( | 387 | 264k | x, y, channel.w, rtop[x], r[x - 1], rtop[x], rtopleft[x], | 388 | 264k | rtoptop[x], &properties, offset); | 389 | 264k | uint32_t pos = | 390 | 264k | kPropRangeFast + | 391 | 264k | jxl::Clamp1(properties[0], -kPropRangeFast, kPropRangeFast - 1); | 392 | 264k | uint32_t ctx_id = tree_lut.context_lookup[pos]; | 393 | 264k | uint64_t v = | 394 | 264k | reader->ReadHybridUintClusteredInlined<uses_lz77>(ctx_id, br); | 395 | 264k | r[x] = make_pixel(v, 1, guess); | 396 | 264k | wp_state.UpdateErrors(r[x], x, y, channel.w); | 397 | 264k | } | 398 | 264k | } | 399 | 139k | } else if (!tree_has_wp_prop_or_pred) { | 400 | | // special optimized case: the weighted predictor and its properties are not | 401 | | // used, so no need to compute weights and properties. | 402 | 117k | JXL_DEBUG_V(8, "Slow track."); | 403 | 117k | MATreeLookup tree_lookup(tree); | 404 | 117k | Properties properties = Properties(num_props); | 405 | 117k | const ptrdiff_t onerow = channel.plane.PixelsPerRow(); | 406 | 117k | JXL_ASSIGN_OR_RETURN( | 407 | 117k | Channel references, | 408 | 117k | Channel::Create(memory_manager, | 409 | 117k | properties.size() - kNumNonrefProperties, channel.w)); | 410 | 4.12M | for (size_t y = 0; y < channel.h; y++) { | 411 | 4.00M | pixel_type *JXL_RESTRICT p = channel.Row(y); | 412 | 4.00M | PrecomputeReferences(channel, y, *image, chan, &references); | 413 | 4.00M | InitPropsRow(&properties, static_props, y); | 414 | 4.00M | if (y > 1 && channel.w > 8 && references.w == 0) { | 415 | 10.1M | for (size_t x = 0; x < 2; x++) { | 416 | 6.76M | PredictionResult res = | 417 | 6.76M | PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y, | 418 | 6.76M | tree_lookup, references); | 419 | 6.76M | uint64_t v = | 420 | 6.76M | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); | 421 | 6.76M | p[x] = make_pixel(v, res.multiplier, res.guess); | 422 | 6.76M | } | 423 | 175M | for (size_t x = 2; x < channel.w - 2; x++) { | 424 | 171M | PredictionResult res = | 425 | 171M | PredictTreeNoWPNEC(&properties, channel.w, p + x, onerow, x, y, | 426 | 171M | tree_lookup, references); | 427 | 171M | uint64_t v = reader->ReadHybridUintClusteredInlined<uses_lz77>( | 428 | 171M | res.context, br); | 429 | 171M | p[x] = make_pixel(v, res.multiplier, res.guess); | 430 | 171M | } | 431 | 10.1M | for (size_t x = channel.w - 2; x < channel.w; x++) { | 432 | 6.76M | PredictionResult res = | 433 | 6.76M | PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y, | 434 | 6.76M | tree_lookup, references); | 435 | 6.76M | uint64_t v = | 436 | 6.76M | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); | 437 | 6.76M | p[x] = make_pixel(v, res.multiplier, res.guess); | 438 | 6.76M | } | 439 | 3.38M | } else { | 440 | 13.2M | for (size_t x = 0; x < channel.w; x++) { | 441 | 12.5M | PredictionResult res = | 442 | 12.5M | PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y, | 443 | 12.5M | tree_lookup, references); | 444 | 12.5M | uint64_t v = reader->ReadHybridUintClusteredMaybeInlined<uses_lz77>( | 445 | 12.5M | res.context, br); | 446 | 12.5M | p[x] = make_pixel(v, res.multiplier, res.guess); | 447 | 12.5M | } | 448 | 624k | } | 449 | 4.00M | } | 450 | 117k | } else { | 451 | 21.6k | JXL_DEBUG_V(8, "Slowest track."); | 452 | 21.6k | MATreeLookup tree_lookup(tree); | 453 | 21.6k | Properties properties = Properties(num_props); | 454 | 21.6k | const ptrdiff_t onerow = channel.plane.PixelsPerRow(); | 455 | 21.6k | JXL_ASSIGN_OR_RETURN( | 456 | 21.6k | Channel references, | 457 | 21.6k | Channel::Create(memory_manager, | 458 | 21.6k | properties.size() - kNumNonrefProperties, channel.w)); | 459 | 21.6k | weighted::State wp_state(wp_header, channel.w, channel.h); | 460 | 1.55M | for (size_t y = 0; y < channel.h; y++) { | 461 | 1.53M | pixel_type *JXL_RESTRICT p = channel.Row(y); | 462 | 1.53M | InitPropsRow(&properties, static_props, y); | 463 | 1.53M | PrecomputeReferences(channel, y, *image, chan, &references); | 464 | 1.53M | if (!uses_lz77 && y > 1 && channel.w > 8 && references.w == 0) { | 465 | 1.52M | for (size_t x = 0; x < 2; x++) { | 466 | 1.01M | PredictionResult res = | 467 | 1.01M | PredictTreeWP(&properties, channel.w, p + x, onerow, x, y, | 468 | 1.01M | tree_lookup, references, &wp_state); | 469 | 1.01M | uint64_t v = | 470 | 1.01M | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); | 471 | 1.01M | p[x] = make_pixel(v, res.multiplier, res.guess); | 472 | 1.01M | wp_state.UpdateErrors(p[x], x, y, channel.w); | 473 | 1.01M | } | 474 | 101M | for (size_t x = 2; x < channel.w - 2; x++) { | 475 | 100M | PredictionResult res = | 476 | 100M | PredictTreeWPNEC(&properties, channel.w, p + x, onerow, x, y, | 477 | 100M | tree_lookup, references, &wp_state); | 478 | 100M | uint64_t v = reader->ReadHybridUintClusteredInlined<uses_lz77>( | 479 | 100M | res.context, br); | 480 | 100M | p[x] = make_pixel(v, res.multiplier, res.guess); | 481 | 100M | wp_state.UpdateErrors(p[x], x, y, channel.w); | 482 | 100M | } | 483 | 1.52M | for (size_t x = channel.w - 2; x < channel.w; x++) { | 484 | 1.01M | PredictionResult res = | 485 | 1.01M | PredictTreeWP(&properties, channel.w, p + x, onerow, x, y, | 486 | 1.01M | tree_lookup, references, &wp_state); | 487 | 1.01M | uint64_t v = | 488 | 1.01M | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); | 489 | 1.01M | p[x] = make_pixel(v, res.multiplier, res.guess); | 490 | 1.01M | wp_state.UpdateErrors(p[x], x, y, channel.w); | 491 | 1.01M | } | 492 | 1.02M | } else { | 493 | 8.80M | for (size_t x = 0; x < channel.w; x++) { | 494 | 7.77M | PredictionResult res = | 495 | 7.77M | PredictTreeWP(&properties, channel.w, p + x, onerow, x, y, | 496 | 7.77M | tree_lookup, references, &wp_state); | 497 | 7.77M | uint64_t v = | 498 | 7.77M | reader->ReadHybridUintClustered<uses_lz77>(res.context, br); | 499 | 7.77M | p[x] = make_pixel(v, res.multiplier, res.guess); | 500 | 7.77M | wp_state.UpdateErrors(p[x], x, y, channel.w); | 501 | 7.77M | } | 502 | 1.02M | } | 503 | 1.53M | } | 504 | 21.6k | } | 505 | 150k | return true; | 506 | 150k | } |
|
507 | | } // namespace detail |
508 | | |
509 | | Status DecodeModularChannelMAANS(BitReader *br, ANSSymbolReader *reader, |
510 | | const std::vector<uint8_t> &context_map, |
511 | | const Tree &global_tree, |
512 | | const weighted::Header &wp_header, |
513 | | pixel_type chan, size_t group_id, |
514 | | TreeLut<uint8_t, false, false> &tree_lut, |
515 | | Image *image, uint32_t &fl_run, |
516 | 850k | uint32_t &fl_v) { |
517 | 850k | if (reader->UsesLZ77()) { |
518 | 131k | return detail::DecodeModularChannelMAANS</*uses_lz77=*/true>( |
519 | 131k | br, reader, context_map, global_tree, wp_header, chan, group_id, |
520 | 131k | tree_lut, image, fl_run, fl_v); |
521 | 719k | } else { |
522 | 719k | return detail::DecodeModularChannelMAANS</*uses_lz77=*/false>( |
523 | 719k | br, reader, context_map, global_tree, wp_header, chan, group_id, |
524 | 719k | tree_lut, image, fl_run, fl_v); |
525 | 719k | } |
526 | 850k | } |
527 | | |
528 | 750k | GroupHeader::GroupHeader() { Bundle::Init(this); } |
529 | | |
530 | | Status ValidateChannelDimensions(const Image &image, |
531 | 188k | const ModularOptions &options) { |
532 | 188k | size_t nb_channels = image.channel.size(); |
533 | 377k | for (bool is_dc : {true, false}) { |
534 | 377k | size_t group_dim = options.group_dim * (is_dc ? kBlockDim : 1); |
535 | 377k | size_t c = image.nb_meta_channels; |
536 | 2.35M | for (; c < nb_channels; c++) { |
537 | 1.98M | const Channel &ch = image.channel[c]; |
538 | 1.98M | if (ch.w > options.group_dim || ch.h > options.group_dim) break; |
539 | 1.98M | } |
540 | 433k | for (; c < nb_channels; c++) { |
541 | 56.3k | const Channel &ch = image.channel[c]; |
542 | 56.3k | if (ch.w == 0 || ch.h == 0) continue; // skip empty |
543 | 55.4k | bool is_dc_channel = std::min(ch.hshift, ch.vshift) >= 3; |
544 | 55.4k | if (is_dc_channel != is_dc) continue; |
545 | 27.7k | size_t tile_dim = group_dim >> std::max(ch.hshift, ch.vshift); |
546 | 27.7k | if (tile_dim == 0) { |
547 | 7 | return JXL_FAILURE("Inconsistent transforms"); |
548 | 7 | } |
549 | 27.7k | } |
550 | 377k | } |
551 | 188k | return true; |
552 | 188k | } |
553 | | |
554 | | Status ModularDecode(BitReader *br, Image &image, GroupHeader &header, |
555 | | size_t group_id, ModularOptions *options, |
556 | | const Tree *global_tree, const ANSCode *global_code, |
557 | | const std::vector<uint8_t> *global_ctx_map, |
558 | 305k | const bool allow_truncated_group) { |
559 | 305k | if (image.channel.empty()) return true; |
560 | 214k | JxlMemoryManager *memory_manager = image.memory_manager(); |
561 | | |
562 | | // decode transforms |
563 | 214k | Status status = Bundle::Read(br, &header); |
564 | 214k | if (!allow_truncated_group) JXL_RETURN_IF_ERROR(status); |
565 | 191k | if (status.IsFatalError()) return status; |
566 | 191k | if (!br->AllReadsWithinBounds()) { |
567 | | // Don't do/undo transforms if header is incomplete. |
568 | 0 | header.transforms.clear(); |
569 | 0 | image.transform = header.transforms; |
570 | 0 | for (auto &ch : image.channel) { |
571 | 0 | ZeroFillImage(&ch.plane); |
572 | 0 | } |
573 | 0 | return JXL_NOT_ENOUGH_BYTES("Read overrun before ModularDecode"); |
574 | 0 | } |
575 | | |
576 | 191k | JXL_DEBUG_V(3, "Image data underwent %" PRIuS " transformations: ", |
577 | 191k | header.transforms.size()); |
578 | 191k | image.transform = header.transforms; |
579 | 191k | for (Transform &transform : image.transform) { |
580 | 46.7k | JXL_RETURN_IF_ERROR(transform.MetaApply(image)); |
581 | 46.7k | } |
582 | 186k | if (image.error) { |
583 | 0 | return JXL_FAILURE("Corrupt file. Aborting."); |
584 | 0 | } |
585 | 186k | JXL_RETURN_IF_ERROR(ValidateChannelDimensions(image, *options)); |
586 | | |
587 | 186k | size_t nb_channels = image.channel.size(); |
588 | | |
589 | 186k | size_t num_chans = 0; |
590 | 186k | size_t distance_multiplier = 0; |
591 | 1.17M | for (size_t i = 0; i < nb_channels; i++) { |
592 | 992k | Channel &channel = image.channel[i]; |
593 | 992k | if (i >= image.nb_meta_channels && (channel.w > options->max_chan_size || |
594 | 982k | channel.h > options->max_chan_size)) { |
595 | 4.69k | break; |
596 | 4.69k | } |
597 | 987k | if (!channel.w || !channel.h) { |
598 | 35.4k | continue; // skip empty channels |
599 | 35.4k | } |
600 | 952k | if (channel.w > distance_multiplier) { |
601 | 229k | distance_multiplier = channel.w; |
602 | 229k | } |
603 | 952k | num_chans++; |
604 | 952k | } |
605 | 186k | if (num_chans == 0) return true; |
606 | | |
607 | 183k | size_t next_channel = 0; |
608 | 183k | auto scope_guard = MakeScopeGuard([&]() { |
609 | 144k | for (size_t c = next_channel; c < image.channel.size(); c++) { |
610 | 111k | ZeroFillImage(&image.channel[c].plane); |
611 | 111k | } |
612 | 32.5k | }); |
613 | | // Do not do anything if truncated groups are not allowed. |
614 | 183k | if (allow_truncated_group) scope_guard.Disarm(); |
615 | | |
616 | | // Read tree. |
617 | 183k | Tree tree_storage; |
618 | 183k | std::vector<uint8_t> context_map_storage; |
619 | 183k | ANSCode code_storage; |
620 | 183k | const Tree *tree = &tree_storage; |
621 | 183k | const ANSCode *code = &code_storage; |
622 | 183k | const std::vector<uint8_t> *context_map = &context_map_storage; |
623 | 183k | if (!header.use_global_tree) { |
624 | 136k | uint64_t max_tree_size = 1024; |
625 | 760k | for (size_t i = 0; i < nb_channels; i++) { |
626 | 624k | Channel &channel = image.channel[i]; |
627 | 624k | if (i >= image.nb_meta_channels && (channel.w > options->max_chan_size || |
628 | 621k | channel.h > options->max_chan_size)) { |
629 | 85 | break; |
630 | 85 | } |
631 | 624k | uint64_t pixels = channel.w * channel.h; |
632 | 624k | max_tree_size += pixels; |
633 | 624k | } |
634 | 136k | max_tree_size = std::min(static_cast<uint64_t>(1 << 20), max_tree_size); |
635 | 136k | JXL_RETURN_IF_ERROR( |
636 | 136k | DecodeTree(memory_manager, br, &tree_storage, max_tree_size)); |
637 | 115k | JXL_RETURN_IF_ERROR(DecodeHistograms(memory_manager, br, |
638 | 115k | (tree_storage.size() + 1) / 2, |
639 | 115k | &code_storage, &context_map_storage)); |
640 | 115k | } else { |
641 | 47.5k | if (!global_tree || !global_code || !global_ctx_map || |
642 | 47.5k | global_tree->empty()) { |
643 | 949 | return JXL_FAILURE("No global tree available but one was requested"); |
644 | 949 | } |
645 | 46.6k | tree = global_tree; |
646 | 46.6k | code = global_code; |
647 | 46.6k | context_map = global_ctx_map; |
648 | 46.6k | } |
649 | | |
650 | | // Read channels |
651 | 317k | JXL_ASSIGN_OR_RETURN(ANSSymbolReader reader, |
652 | 317k | ANSSymbolReader::Create(code, br, distance_multiplier)); |
653 | 317k | auto tree_lut = jxl::make_unique<TreeLut<uint8_t, false, false>>(); |
654 | 317k | uint32_t fl_run = 0; |
655 | 317k | uint32_t fl_v = 0; |
656 | 1.03M | for (; next_channel < nb_channels; next_channel++) { |
657 | 886k | Channel &channel = image.channel[next_channel]; |
658 | 886k | if (next_channel >= image.nb_meta_channels && |
659 | 880k | (channel.w > options->max_chan_size || |
660 | 879k | channel.h > options->max_chan_size)) { |
661 | 1.91k | break; |
662 | 1.91k | } |
663 | 884k | if (!channel.w || !channel.h) { |
664 | 33.7k | continue; // skip empty channels |
665 | 33.7k | } |
666 | 850k | JXL_RETURN_IF_ERROR(DecodeModularChannelMAANS( |
667 | 850k | br, &reader, *context_map, *tree, header.wp_header, next_channel, |
668 | 850k | group_id, *tree_lut, &image, fl_run, fl_v)); |
669 | | |
670 | | // Truncated group. |
671 | 850k | if (!br->AllReadsWithinBounds()) { |
672 | 7.79k | if (!allow_truncated_group) return JXL_FAILURE("Truncated input"); |
673 | 0 | return JXL_NOT_ENOUGH_BYTES("Read overrun in ModularDecode"); |
674 | 7.79k | } |
675 | 850k | } |
676 | | |
677 | | // Make sure no zero-filling happens even if next_channel < nb_channels. |
678 | 151k | scope_guard.Disarm(); |
679 | | |
680 | 151k | if (!reader.CheckANSFinalState()) { |
681 | 0 | return JXL_FAILURE("ANS decode final state failed"); |
682 | 0 | } |
683 | 151k | return true; |
684 | 151k | } |
685 | | |
686 | | Status ModularGenericDecompress(BitReader *br, Image &image, |
687 | | GroupHeader *header, size_t group_id, |
688 | | ModularOptions *options, bool undo_transforms, |
689 | | const Tree *tree, const ANSCode *code, |
690 | | const std::vector<uint8_t> *ctx_map, |
691 | 305k | bool allow_truncated_group) { |
692 | 305k | std::vector<std::pair<size_t, size_t>> req_sizes; |
693 | 305k | req_sizes.reserve(image.channel.size()); |
694 | 723k | for (const auto &c : image.channel) { |
695 | 723k | req_sizes.emplace_back(c.w, c.h); |
696 | 723k | } |
697 | 305k | GroupHeader local_header; |
698 | 305k | if (header == nullptr) header = &local_header; |
699 | 305k | size_t bit_pos = br->TotalBitsConsumed(); |
700 | 305k | auto dec_status = ModularDecode(br, image, *header, group_id, options, tree, |
701 | 305k | code, ctx_map, allow_truncated_group); |
702 | 305k | if (!allow_truncated_group) JXL_RETURN_IF_ERROR(dec_status); |
703 | 245k | if (dec_status.IsFatalError()) return dec_status; |
704 | 245k | if (undo_transforms) image.undo_transforms(header->wp_header); |
705 | 245k | if (image.error) return JXL_FAILURE("Corrupt file. Aborting."); |
706 | 245k | JXL_DEBUG_V(4, |
707 | 245k | "Modular-decoded a %" PRIuS "x%" PRIuS " nbchans=%" PRIuS |
708 | 245k | " image from %" PRIuS " bytes", |
709 | 245k | image.w, image.h, image.channel.size(), |
710 | 245k | (br->TotalBitsConsumed() - bit_pos) / 8); |
711 | 245k | JXL_DEBUG_V(5, "Modular image: %s", image.DebugString().c_str()); |
712 | 245k | (void)bit_pos; |
713 | | // Check that after applying all transforms we are back to the requested |
714 | | // image sizes, otherwise there's a programming error with the |
715 | | // transformations. |
716 | 245k | if (undo_transforms) { |
717 | 103k | JXL_ENSURE(image.channel.size() == req_sizes.size()); |
718 | 465k | for (size_t c = 0; c < req_sizes.size(); c++) { |
719 | 362k | JXL_ENSURE(req_sizes[c].first == image.channel[c].w); |
720 | 362k | JXL_ENSURE(req_sizes[c].second == image.channel[c].h); |
721 | 362k | } |
722 | 103k | } |
723 | 245k | return dec_status; |
724 | 245k | } |
725 | | |
726 | | } // namespace jxl |