/src/libjxl/lib/jxl/dec_noise.cc
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/dec_noise.h" |
7 | | |
8 | | #include <cstdint> |
9 | | #include <cstdlib> |
10 | | #include <utility> |
11 | | |
12 | | #undef HWY_TARGET_INCLUDE |
13 | | #define HWY_TARGET_INCLUDE "lib/jxl/dec_noise.cc" |
14 | | #include <hwy/foreach_target.h> |
15 | | #include <hwy/highway.h> |
16 | | |
17 | | #include "lib/jxl/base/compiler_specific.h" |
18 | | #include "lib/jxl/base/rect.h" |
19 | | #include "lib/jxl/frame_dimensions.h" |
20 | | #include "lib/jxl/image.h" |
21 | | #include "lib/jxl/xorshift128plus-inl.h" |
22 | | |
23 | | HWY_BEFORE_NAMESPACE(); |
24 | | namespace jxl { |
25 | | namespace HWY_NAMESPACE { |
26 | | |
27 | | // These templates are not found via ADL. |
28 | | using hwy::HWY_NAMESPACE::Or; |
29 | | using hwy::HWY_NAMESPACE::ShiftRight; |
30 | | using hwy::HWY_NAMESPACE::Vec; |
31 | | |
32 | | using D = HWY_CAPPED(float, kBlockDim); |
33 | | using DI = hwy::HWY_NAMESPACE::Rebind<int, D>; |
34 | | using DI8 = hwy::HWY_NAMESPACE::Repartition<uint8_t, D>; |
35 | | |
36 | | // Converts one vector's worth of random bits to floats in [1, 2). |
37 | | // NOTE: as the convolution kernel sums to 0, it doesn't matter if inputs are in |
38 | | // [0, 1) or in [1, 2). |
39 | | void BitsToFloat(const uint32_t* JXL_RESTRICT random_bits, |
40 | 428M | float* JXL_RESTRICT floats) { |
41 | 428M | const HWY_FULL(float) df; |
42 | 428M | const HWY_FULL(uint32_t) du; |
43 | | |
44 | 428M | const auto bits = Load(du, random_bits); |
45 | | // 1.0 + 23 random mantissa bits = [1, 2) |
46 | 428M | const auto rand12 = BitCast(df, Or(ShiftRight<9>(bits), Set(du, 0x3F800000))); |
47 | 428M | Store(rand12, df, floats); |
48 | 428M | } |
49 | | |
50 | | void RandomImage(Xorshift128Plus* rng, const Rect& rect, |
51 | 62.9k | ImageF* JXL_RESTRICT noise) { |
52 | 62.9k | const size_t xsize = rect.xsize(); |
53 | 62.9k | const size_t ysize = rect.ysize(); |
54 | | |
55 | | // May exceed the vector size, hence we have two loops over x below. |
56 | 62.9k | constexpr size_t kFloatsPerBatch = |
57 | 62.9k | Xorshift128Plus::N * sizeof(uint64_t) / sizeof(float); |
58 | 62.9k | HWY_ALIGN uint64_t batch[Xorshift128Plus::N] = {}; |
59 | | |
60 | 62.9k | const HWY_FULL(float) df; |
61 | 62.9k | const size_t N = Lanes(df); |
62 | | |
63 | 2.82M | for (size_t y = 0; y < ysize; ++y) { |
64 | 2.75M | float* JXL_RESTRICT row = rect.Row(noise, y); |
65 | | |
66 | 2.75M | size_t x = 0; |
67 | | // Only entire batches (avoids exceeding the image padding). |
68 | 29.8M | for (; x + kFloatsPerBatch < xsize; x += kFloatsPerBatch) { |
69 | 27.1M | rng->Fill(batch); |
70 | 433M | for (size_t i = 0; i < kFloatsPerBatch; i += Lanes(df)) { |
71 | 405M | BitsToFloat(reinterpret_cast<const uint32_t*>(batch) + i, row + x + i); |
72 | 405M | } |
73 | 27.1M | } |
74 | | |
75 | | // Any remaining pixels, rounded up to vectors (safe due to padding). |
76 | 2.75M | rng->Fill(batch); |
77 | 2.75M | size_t batch_pos = 0; // < kFloatsPerBatch |
78 | 35.0M | for (; x < xsize; x += N) { |
79 | 32.2M | BitsToFloat(reinterpret_cast<const uint32_t*>(batch) + batch_pos, |
80 | 32.2M | row + x); |
81 | 32.2M | batch_pos += N; |
82 | 32.2M | } |
83 | 2.75M | } |
84 | 62.9k | } |
85 | | void Random3Planes(size_t visible_frame_index, size_t nonvisible_frame_index, |
86 | | size_t x0, size_t y0, const std::pair<ImageF*, Rect>& plane0, |
87 | | const std::pair<ImageF*, Rect>& plane1, |
88 | 20.9k | const std::pair<ImageF*, Rect>& plane2) { |
89 | 20.9k | HWY_ALIGN Xorshift128Plus rng(visible_frame_index, nonvisible_frame_index, x0, |
90 | 20.9k | y0); |
91 | 20.9k | RandomImage(&rng, plane0.second, plane0.first); |
92 | 20.9k | RandomImage(&rng, plane1.second, plane1.first); |
93 | 20.9k | RandomImage(&rng, plane2.second, plane2.first); |
94 | 20.9k | } |
95 | | |
96 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
97 | | } // namespace HWY_NAMESPACE |
98 | | } // namespace jxl |
99 | | HWY_AFTER_NAMESPACE(); |
100 | | |
101 | | #if HWY_ONCE |
102 | | namespace jxl { |
103 | | |
104 | | namespace { |
105 | | HWY_EXPORT(Random3Planes); |
106 | | } // namespace |
107 | | |
108 | | void PrepareNoiseInput(const PassesDecoderState& dec_state, |
109 | | const FrameDimensions& frame_dim, |
110 | | const FrameHeader& frame_header, size_t group_index, |
111 | 8.24k | size_t thread) { |
112 | 8.24k | size_t group_dim = frame_dim.group_dim; |
113 | 8.24k | const size_t gx = group_index % frame_dim.xsize_groups; |
114 | 8.24k | const size_t gy = group_index / frame_dim.xsize_groups; |
115 | 8.24k | RenderPipelineInput input = |
116 | 8.24k | dec_state.render_pipeline->GetInputBuffers(group_index, thread); |
117 | 8.24k | size_t noise_c_start = |
118 | 8.24k | 3 + frame_header.nonserialized_metadata->m.num_extra_channels; |
119 | | // When the color channels are downsampled, we need to generate more noise |
120 | | // input for the current group than just the group dimensions. |
121 | 8.24k | std::pair<ImageF*, Rect> rects[3]; |
122 | 18.7k | for (size_t iy = 0; iy < frame_header.upsampling; iy++) { |
123 | 31.4k | for (size_t ix = 0; ix < frame_header.upsampling; ix++) { |
124 | 83.9k | for (size_t c = 0; c < 3; c++) { |
125 | 62.9k | auto r = input.GetBuffer(noise_c_start + c); |
126 | 62.9k | rects[c].first = r.first; |
127 | 62.9k | size_t x1 = r.second.x0() + r.second.xsize(); |
128 | 62.9k | size_t y1 = r.second.y0() + r.second.ysize(); |
129 | 62.9k | rects[c].second = |
130 | 62.9k | Rect(r.second.x0() + ix * group_dim, r.second.y0() + iy * group_dim, |
131 | 62.9k | group_dim, group_dim, x1, y1); |
132 | 62.9k | } |
133 | 20.9k | HWY_DYNAMIC_DISPATCH(Random3Planes) |
134 | 20.9k | (dec_state.visible_frame_index, dec_state.nonvisible_frame_index, |
135 | 20.9k | (gx * frame_header.upsampling + ix) * group_dim, |
136 | 20.9k | (gy * frame_header.upsampling + iy) * group_dim, rects[0], rects[1], |
137 | 20.9k | rects[2]); |
138 | 20.9k | } |
139 | 10.4k | } |
140 | 8.24k | } |
141 | | |
142 | 50.3k | void DecodeFloatParam(float precision, float* val, BitReader* br) { |
143 | 50.3k | const int absval_quant = br->ReadFixedBits<10>(); |
144 | 50.3k | *val = absval_quant / precision; |
145 | 50.3k | } |
146 | | |
147 | 6.29k | Status DecodeNoise(BitReader* br, NoiseParams* noise_params) { |
148 | 50.3k | for (float& i : noise_params->lut) { |
149 | 50.3k | DecodeFloatParam(kNoisePrecision, &i, br); |
150 | 50.3k | } |
151 | 6.29k | return true; |
152 | 6.29k | } |
153 | | |
154 | | } // namespace jxl |
155 | | #endif // HWY_ONCE |