/src/libjxl/lib/jxl/dec_noise.cc
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/dec_noise.h" |
7 | | |
8 | | #include <cstdint> |
9 | | #include <cstdlib> |
10 | | #include <cstring> |
11 | | #include <utility> |
12 | | |
13 | | #include "lib/jxl/base/status.h" |
14 | | #include "lib/jxl/dec_bit_reader.h" |
15 | | #include "lib/jxl/dec_cache.h" |
16 | | #include "lib/jxl/frame_header.h" |
17 | | #include "lib/jxl/noise.h" |
18 | | #include "lib/jxl/render_pipeline/render_pipeline.h" |
19 | | |
20 | | #undef HWY_TARGET_INCLUDE |
21 | | #define HWY_TARGET_INCLUDE "lib/jxl/dec_noise.cc" |
22 | | #include <hwy/foreach_target.h> |
23 | | #include <hwy/highway.h> |
24 | | |
25 | | #include "lib/jxl/base/compiler_specific.h" |
26 | | #include "lib/jxl/base/rect.h" |
27 | | #include "lib/jxl/frame_dimensions.h" |
28 | | #include "lib/jxl/image.h" |
29 | | #include "lib/jxl/xorshift128plus-inl.h" |
30 | | |
31 | | HWY_BEFORE_NAMESPACE(); |
32 | | namespace jxl { |
33 | | namespace HWY_NAMESPACE { |
34 | | |
35 | | // These templates are not found via ADL. |
36 | | using hwy::HWY_NAMESPACE::Or; |
37 | | using hwy::HWY_NAMESPACE::Rebind; |
38 | | using hwy::HWY_NAMESPACE::ShiftRight; |
39 | | using hwy::HWY_NAMESPACE::Vec; |
40 | | |
41 | | using D = HWY_CAPPED(float, kBlockDim); |
42 | | using DI = Rebind<int, D>; |
43 | | |
44 | | // Converts one vector's worth of random bits to floats in [1, 2). |
45 | | // NOTE: as the convolution kernel sums to 0, it doesn't matter if inputs are in |
46 | | // [0, 1) or in [1, 2). |
47 | | void BitsToFloat(const uint32_t* JXL_RESTRICT random_bits, |
48 | 110M | float* JXL_RESTRICT floats) { |
49 | 110M | const HWY_FULL(float) df; |
50 | 110M | const HWY_FULL(uint32_t) du; |
51 | | |
52 | 110M | const auto bits = Load(du, random_bits); |
53 | | // 1.0 + 23 random mantissa bits = [1, 2) |
54 | 110M | const auto rand12 = BitCast(df, Or(ShiftRight<9>(bits), Set(du, 0x3F800000))); |
55 | 110M | Store(rand12, df, floats); |
56 | 110M | } |
57 | | |
58 | | void RandomImage(Xorshift128Plus* rng, const Rect& rect, |
59 | 19.6k | ImageF* JXL_RESTRICT noise) { |
60 | 19.6k | const size_t xsize = rect.xsize(); |
61 | 19.6k | const size_t ysize = rect.ysize(); |
62 | | |
63 | | // May exceed the vector size, hence we have two loops over x below. |
64 | 19.6k | constexpr size_t kFloatsPerBatch = |
65 | 19.6k | Xorshift128Plus::N * sizeof(uint64_t) / sizeof(float); |
66 | 19.6k | HWY_ALIGN uint64_t batch64[Xorshift128Plus::N] = {}; |
67 | 19.6k | HWY_ALIGN uint32_t batch32[2 * Xorshift128Plus::N]; |
68 | | |
69 | 19.6k | const HWY_FULL(float) df; |
70 | 19.6k | const size_t N = Lanes(df); |
71 | | |
72 | 792k | for (size_t y = 0; y < ysize; ++y) { |
73 | 773k | float* JXL_RESTRICT row = rect.Row(noise, y); |
74 | | |
75 | 773k | size_t x = 0; |
76 | | // Only entire batches (avoids exceeding the image padding). |
77 | 7.31M | for (; x + kFloatsPerBatch < xsize; x += kFloatsPerBatch) { |
78 | 6.54M | rng->Fill(batch64); |
79 | | // Workaround for https://github.com/llvm/llvm-project/issues/121229 |
80 | 6.54M | memcpy(batch32, batch64, sizeof(batch32)); |
81 | 110M | for (size_t i = 0; i < kFloatsPerBatch; i += Lanes(df)) { |
82 | 104M | BitsToFloat(batch32 + i, row + x + i); |
83 | 104M | } |
84 | 6.54M | } |
85 | | |
86 | | // Any remaining pixels, rounded up to vectors (safe due to padding). |
87 | 773k | rng->Fill(batch64); |
88 | | // Workaround for https://github.com/llvm/llvm-project/issues/121229 |
89 | 773k | memcpy(batch32, batch64, sizeof(batch32)); |
90 | 773k | size_t batch_pos = 0; // < kFloatsPerBatch |
91 | 9.46M | for (; x < xsize; x += N) { |
92 | 8.69M | BitsToFloat(batch32 + batch_pos, row + x); |
93 | 8.69M | batch_pos += N; |
94 | 8.69M | } |
95 | 773k | } |
96 | 19.6k | } |
97 | | void Random3Planes(size_t visible_frame_index, size_t nonvisible_frame_index, |
98 | | size_t x0, size_t y0, const std::pair<ImageF*, Rect>& plane0, |
99 | | const std::pair<ImageF*, Rect>& plane1, |
100 | 6.53k | const std::pair<ImageF*, Rect>& plane2) { |
101 | 6.53k | HWY_ALIGN Xorshift128Plus rng(visible_frame_index, nonvisible_frame_index, x0, |
102 | 6.53k | y0); |
103 | 6.53k | RandomImage(&rng, plane0.second, plane0.first); |
104 | 6.53k | RandomImage(&rng, plane1.second, plane1.first); |
105 | 6.53k | RandomImage(&rng, plane2.second, plane2.first); |
106 | 6.53k | } |
107 | | |
108 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
109 | | } // namespace HWY_NAMESPACE |
110 | | } // namespace jxl |
111 | | HWY_AFTER_NAMESPACE(); |
112 | | |
113 | | #if HWY_ONCE |
114 | | namespace jxl { |
115 | | |
116 | | namespace { |
117 | | HWY_EXPORT(Random3Planes); |
118 | | } // namespace |
119 | | |
120 | | void PrepareNoiseInput(const PassesDecoderState& dec_state, |
121 | | const FrameDimensions& frame_dim, |
122 | | const FrameHeader& frame_header, size_t group_index, |
123 | 3.19k | size_t thread) { |
124 | 3.19k | size_t group_dim = frame_dim.group_dim; |
125 | 3.19k | const size_t gx = group_index % frame_dim.xsize_groups; |
126 | 3.19k | const size_t gy = group_index / frame_dim.xsize_groups; |
127 | 3.19k | RenderPipelineInput input = |
128 | 3.19k | dec_state.render_pipeline->GetInputBuffers(group_index, thread); |
129 | 3.19k | size_t noise_c_start = |
130 | 3.19k | 3 + frame_header.nonserialized_metadata->m.num_extra_channels; |
131 | | // When the color channels are downsampled, we need to generate more noise |
132 | | // input for the current group than just the group dimensions. |
133 | 3.19k | std::pair<ImageF*, Rect> rects[3]; |
134 | 7.05k | for (size_t iy = 0; iy < frame_header.upsampling; iy++) { |
135 | 10.4k | for (size_t ix = 0; ix < frame_header.upsampling; ix++) { |
136 | 26.1k | for (size_t c = 0; c < 3; c++) { |
137 | 19.6k | auto r = input.GetBuffer(noise_c_start + c); |
138 | 19.6k | rects[c].first = r.first; |
139 | 19.6k | size_t x1 = r.second.x0() + r.second.xsize(); |
140 | 19.6k | size_t y1 = r.second.y0() + r.second.ysize(); |
141 | 19.6k | rects[c].second = |
142 | 19.6k | Rect(r.second.x0() + ix * group_dim, r.second.y0() + iy * group_dim, |
143 | 19.6k | group_dim, group_dim, x1, y1); |
144 | 19.6k | } |
145 | 6.53k | HWY_DYNAMIC_DISPATCH(Random3Planes) |
146 | 6.53k | (dec_state.visible_frame_index, dec_state.nonvisible_frame_index, |
147 | 6.53k | (gx * frame_header.upsampling + ix) * group_dim, |
148 | 6.53k | (gy * frame_header.upsampling + iy) * group_dim, rects[0], rects[1], |
149 | 6.53k | rects[2]); |
150 | 6.53k | } |
151 | 3.86k | } |
152 | 3.19k | } |
153 | | |
154 | 16.6k | void DecodeFloatParam(float precision, float* val, BitReader* br) { |
155 | 16.6k | const int absval_quant = br->ReadFixedBits<10>(); |
156 | 16.6k | *val = absval_quant / precision; |
157 | 16.6k | } |
158 | | |
159 | 2.08k | Status DecodeNoise(BitReader* br, NoiseParams* noise_params) { |
160 | 16.6k | for (float& i : noise_params->lut) { |
161 | 16.6k | DecodeFloatParam(kNoisePrecision, &i, br); |
162 | 16.6k | } |
163 | 2.08k | return true; |
164 | 2.08k | } |
165 | | |
166 | | } // namespace jxl |
167 | | #endif // HWY_ONCE |