/src/libjxl/lib/jxl/dec_cache.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #ifndef LIB_JXL_DEC_CACHE_H_ |
7 | | #define LIB_JXL_DEC_CACHE_H_ |
8 | | |
9 | | #include <jxl/decode.h> |
10 | | #include <jxl/memory_manager.h> |
11 | | #include <jxl/types.h> |
12 | | |
13 | | #include <atomic> |
14 | | #include <cmath> |
15 | | #include <cstdint> |
16 | | #include <hwy/base.h> // HWY_ALIGN_MAX |
17 | | #include <memory> |
18 | | #include <vector> |
19 | | |
20 | | #include "lib/jxl/base/common.h" // kMaxNumPasses |
21 | | #include "lib/jxl/base/compiler_specific.h" |
22 | | #include "lib/jxl/base/data_parallel.h" |
23 | | #include "lib/jxl/base/status.h" |
24 | | #include "lib/jxl/common.h" |
25 | | #include "lib/jxl/dct_util.h" |
26 | | #include "lib/jxl/dec_ans.h" |
27 | | #include "lib/jxl/dec_xyb.h" |
28 | | #include "lib/jxl/frame_dimensions.h" |
29 | | #include "lib/jxl/frame_header.h" |
30 | | #include "lib/jxl/image.h" |
31 | | #include "lib/jxl/image_bundle.h" |
32 | | #include "lib/jxl/image_metadata.h" |
33 | | #include "lib/jxl/memory_manager_internal.h" |
34 | | #include "lib/jxl/passes_state.h" |
35 | | #include "lib/jxl/render_pipeline/render_pipeline.h" |
36 | | #include "lib/jxl/render_pipeline/render_pipeline_stage.h" |
37 | | #include "lib/jxl/render_pipeline/stage_upsampling.h" |
38 | | |
39 | | namespace jxl { |
40 | | |
41 | | constexpr size_t kSigmaBorder = 1; |
42 | | constexpr size_t kSigmaPadding = 2; |
43 | | |
44 | | struct PixelCallback { |
45 | 36.5k | PixelCallback() = default; |
46 | | PixelCallback(JxlImageOutInitCallback init, JxlImageOutRunCallback run, |
47 | | JxlImageOutDestroyCallback destroy, void* init_opaque) |
48 | 4.39k | : init(init), run(run), destroy(destroy), init_opaque(init_opaque) { |
49 | | #if (JXL_IS_DEBUG_BUILD) |
50 | | const bool has_init = (init != nullptr); |
51 | | const bool has_run = (run != nullptr); |
52 | | const bool has_destroy = (destroy != nullptr); |
53 | | const bool healthy = (has_init == has_run) && (has_run == has_destroy); |
54 | | JXL_DASSERT(healthy); |
55 | | #endif |
56 | 4.39k | } |
57 | | |
58 | 36.7k | bool IsPresent() const { return run != nullptr; } |
59 | | |
60 | 0 | void* Init(size_t num_threads, size_t num_pixels) const { |
61 | 0 | return init(init_opaque, num_threads, num_pixels); |
62 | 0 | } |
63 | | |
64 | | JxlImageOutInitCallback init = nullptr; |
65 | | JxlImageOutRunCallback run = nullptr; |
66 | | JxlImageOutDestroyCallback destroy = nullptr; |
67 | | void* init_opaque = nullptr; |
68 | | }; |
69 | | |
70 | | struct ImageOutput { |
71 | | // Pixel format of the output pixels, used for buffer and callback output. |
72 | | JxlPixelFormat format; |
73 | | // Output bit depth for unsigned data types, used for float to int conversion. |
74 | | size_t bits_per_sample; |
75 | | // Callback for line-by-line output. |
76 | | PixelCallback callback; |
77 | | // Pixel buffer for image output. |
78 | | void* buffer; |
79 | | size_t buffer_size; |
80 | | // Length of a row of image_buffer in bytes (based on oriented width). |
81 | | size_t stride; |
82 | | }; |
83 | | |
84 | | // Per-frame decoder state. All the images here should be accessed through a |
85 | | // group rect (either with block units or pixel units). |
86 | | struct PassesDecoderState { |
87 | | explicit PassesDecoderState(JxlMemoryManager* memory_manager) |
88 | 10.0k | : shared_storage(memory_manager), |
89 | 10.0k | frame_storage_for_referencing(memory_manager) {} |
90 | | |
91 | | PassesSharedState shared_storage; |
92 | | // Allows avoiding copies for encoder loop. |
93 | | const PassesSharedState* JXL_RESTRICT shared = &shared_storage; |
94 | | |
95 | | // 8x upsampling stage for DC. |
96 | | std::unique_ptr<RenderPipelineStage> upsampler8x; |
97 | | |
98 | | // For ANS decoding. |
99 | | std::vector<ANSCode> code; |
100 | | std::vector<std::vector<uint8_t>> context_map; |
101 | | |
102 | | // Multiplier to be applied to the quant matrices of the x channel. |
103 | | float x_dm_multiplier; |
104 | | float b_dm_multiplier; |
105 | | |
106 | | // Sigma values for EPF. |
107 | | ImageF sigma; |
108 | | |
109 | | // Image dimensions before applying undo_orientation. |
110 | | size_t width; |
111 | | size_t height; |
112 | | ImageOutput main_output; |
113 | | std::vector<ImageOutput> extra_output; |
114 | | |
115 | | // Whether to use int16 float-XYB-to-uint8-srgb conversion. |
116 | | bool fast_xyb_srgb8_conversion; |
117 | | |
118 | | // If true, the RGBA output will be unpremultiplied before writing to the |
119 | | // output. |
120 | | bool unpremul_alpha; |
121 | | |
122 | | // The render pipeline will apply this orientation to bring the image to the |
123 | | // intended display orientation. |
124 | | Orientation undo_orientation; |
125 | | |
126 | | // Used for seeding noise. |
127 | | size_t visible_frame_index = 0; |
128 | | size_t nonvisible_frame_index = 0; |
129 | | |
130 | | // Keep track of the transform types used. |
131 | | std::atomic<uint32_t> used_acs{0}; |
132 | | |
133 | | // Storage for coefficients if in "accumulate" mode. |
134 | | std::unique_ptr<ACImage> coefficients = make_unique<ACImageT<int32_t>>(); |
135 | | |
136 | | // Rendering pipeline. |
137 | | std::unique_ptr<RenderPipeline> render_pipeline; |
138 | | |
139 | | // Storage for the current frame if it can be referenced by future frames. |
140 | | ImageBundle frame_storage_for_referencing; |
141 | | |
142 | | struct PipelineOptions { |
143 | | bool use_slow_render_pipeline; |
144 | | bool coalescing; |
145 | | bool render_spotcolors; |
146 | | bool render_noise; |
147 | | }; |
148 | | |
149 | 196k | JxlMemoryManager* memory_manager() const { return shared->memory_manager; } |
150 | | |
151 | | Status PreparePipeline(const FrameHeader& frame_header, |
152 | | const ImageMetadata* metadata, ImageBundle* decoded, |
153 | | PipelineOptions options); |
154 | | |
155 | | // Information for colour conversions. |
156 | | OutputEncodingInfo output_encoding_info; |
157 | | |
158 | | // Initializes decoder-specific structures using information from *shared. |
159 | 26.4k | Status Init(const FrameHeader& frame_header) { |
160 | 26.4k | JxlMemoryManager* memory_manager = this->memory_manager(); |
161 | 26.4k | x_dm_multiplier = std::pow(1 / (1.25f), frame_header.x_qm_scale - 2.0f); |
162 | 26.4k | b_dm_multiplier = std::pow(1 / (1.25f), frame_header.b_qm_scale - 2.0f); |
163 | | |
164 | 26.4k | main_output.callback = PixelCallback(); |
165 | 26.4k | main_output.buffer = nullptr; |
166 | 26.4k | extra_output.clear(); |
167 | | |
168 | 26.4k | fast_xyb_srgb8_conversion = false; |
169 | 26.4k | unpremul_alpha = false; |
170 | 26.4k | undo_orientation = Orientation::kIdentity; |
171 | | |
172 | 26.4k | used_acs = 0; |
173 | | |
174 | 26.4k | upsampler8x = GetUpsamplingStage(shared->metadata->transform_data, 0, 3); |
175 | 26.4k | if (frame_header.loop_filter.epf_iters > 0) { |
176 | 15.2k | JXL_ASSIGN_OR_RETURN( |
177 | 15.2k | sigma, |
178 | 15.2k | ImageF::Create(memory_manager, |
179 | 15.2k | shared->frame_dim.xsize_blocks + 2 * kSigmaPadding, |
180 | 15.2k | shared->frame_dim.ysize_blocks + 2 * kSigmaPadding)); |
181 | 15.2k | } |
182 | 26.4k | return true; |
183 | 26.4k | } |
184 | | |
185 | | // Initialize the decoder state after all of DC is decoded. |
186 | | Status InitForAC(size_t num_passes, ThreadPool* pool); |
187 | | }; |
188 | | |
189 | | // Temp images required for decoding a single group. Reduces memory allocations |
190 | | // for large images because we only initialize min(#threads, #groups) instances. |
191 | | struct HWY_ALIGN_MAX GroupDecCache { |
192 | | Status InitOnce(JxlMemoryManager* memory_manager, size_t num_passes, |
193 | | size_t used_acs); |
194 | | |
195 | 0 | Status InitDCBufferOnce(JxlMemoryManager* memory_manager) { |
196 | 0 | if (dc_buffer.xsize() == 0) { |
197 | 0 | JXL_ASSIGN_OR_RETURN( |
198 | 0 | dc_buffer, |
199 | 0 | ImageF::Create(memory_manager, |
200 | 0 | kGroupDimInBlocks + kRenderPipelineXOffset * 2, |
201 | 0 | kGroupDimInBlocks + 4)); |
202 | 0 | } |
203 | 0 | return true; |
204 | 0 | } |
205 | | |
206 | | // Scratch space used by DecGroupImpl(). |
207 | | float* dec_group_block; |
208 | | int32_t* dec_group_qblock; |
209 | | int16_t* dec_group_qblock16; |
210 | | |
211 | | // For TransformToPixels. |
212 | | float* scratch_space; |
213 | | // Note that scratch_space is never used at the same time as dec_group_qblock. |
214 | | // Moreover, only one of dec_group_qblock16 is ever used. |
215 | | // TODO(veluca): figure out if we can save allocations. |
216 | | |
217 | | // AC decoding |
218 | | Image3I num_nzeroes[kMaxNumPasses]; |
219 | | |
220 | | // Buffer for DC upsampling. |
221 | | ImageF dc_buffer; |
222 | | |
223 | | private: |
224 | | AlignedMemory float_memory_; |
225 | | AlignedMemory int32_memory_; |
226 | | AlignedMemory int16_memory_; |
227 | | size_t max_block_area_ = 0; |
228 | | }; |
229 | | |
230 | | } // namespace jxl |
231 | | |
232 | | #endif // LIB_JXL_DEC_CACHE_H_ |