/src/libjxl/lib/jxl/image.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/image.h" |
7 | | |
8 | | #include <algorithm> // swap |
9 | | |
10 | | #undef HWY_TARGET_INCLUDE |
11 | | #define HWY_TARGET_INCLUDE "lib/jxl/image.cc" |
12 | | #include <hwy/foreach_target.h> |
13 | | #include <hwy/highway.h> |
14 | | |
15 | | #include "lib/jxl/common.h" |
16 | | #include "lib/jxl/image_ops.h" |
17 | | #include "lib/jxl/sanitizers.h" |
18 | | |
19 | | HWY_BEFORE_NAMESPACE(); |
20 | | namespace jxl { |
21 | | |
22 | | namespace HWY_NAMESPACE { |
23 | 0 | size_t GetVectorSize() { return HWY_LANES(uint8_t); } Unexecuted instantiation: jxl::N_SSE4::GetVectorSize() Unexecuted instantiation: jxl::N_AVX2::GetVectorSize() Unexecuted instantiation: jxl::N_AVX3::GetVectorSize() Unexecuted instantiation: jxl::N_AVX3_ZEN4::GetVectorSize() Unexecuted instantiation: jxl::N_AVX3_SPR::GetVectorSize() Unexecuted instantiation: jxl::N_SSE2::GetVectorSize() |
24 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
25 | | } // namespace HWY_NAMESPACE |
26 | | |
27 | | } // namespace jxl |
28 | | HWY_AFTER_NAMESPACE(); |
29 | | |
30 | | #if HWY_ONCE |
31 | | namespace jxl { |
32 | | namespace { |
33 | | |
34 | | HWY_EXPORT(GetVectorSize); // Local function. |
35 | | |
36 | | // Returns distance [bytes] between the start of two consecutive rows, a |
37 | | // multiple of vector/cache line size but NOT CacheAligned::kAlias - see below. |
38 | 0 | size_t BytesPerRow(const size_t xsize, const size_t sizeof_t) { |
39 | 0 | const size_t vec_size = VectorSize(); |
40 | 0 | size_t valid_bytes = xsize * sizeof_t; |
41 | | |
42 | | // Allow unaligned accesses starting at the last valid value - this may raise |
43 | | // msan errors unless the user calls InitializePaddingForUnalignedAccesses. |
44 | | // Skip for the scalar case because no extra lanes will be loaded. |
45 | 0 | if (vec_size != 0) { |
46 | 0 | valid_bytes += vec_size - sizeof_t; |
47 | 0 | } |
48 | | |
49 | | // Round up to vector and cache line size. |
50 | 0 | const size_t align = std::max(vec_size, CacheAligned::kAlignment); |
51 | 0 | size_t bytes_per_row = RoundUpTo(valid_bytes, align); |
52 | | |
53 | | // During the lengthy window before writes are committed to memory, CPUs |
54 | | // guard against read after write hazards by checking the address, but |
55 | | // only the lower 11 bits. We avoid a false dependency between writes to |
56 | | // consecutive rows by ensuring their sizes are not multiples of 2 KiB. |
57 | | // Avoid2K prevents the same problem for the planes of an Image3. |
58 | 0 | if (bytes_per_row % CacheAligned::kAlias == 0) { |
59 | 0 | bytes_per_row += align; |
60 | 0 | } |
61 | |
|
62 | 0 | JXL_ASSERT(bytes_per_row % align == 0); |
63 | 0 | return bytes_per_row; |
64 | 0 | } |
65 | | |
66 | | } // namespace |
67 | | |
68 | 0 | size_t VectorSize() { |
69 | 0 | static size_t bytes = HWY_DYNAMIC_DISPATCH(GetVectorSize)(); |
70 | 0 | return bytes; |
71 | 0 | } |
72 | | |
73 | | PlaneBase::PlaneBase(const size_t xsize, const size_t ysize, |
74 | | const size_t sizeof_t) |
75 | | : xsize_(static_cast<uint32_t>(xsize)), |
76 | | ysize_(static_cast<uint32_t>(ysize)), |
77 | | orig_xsize_(static_cast<uint32_t>(xsize)), |
78 | 0 | orig_ysize_(static_cast<uint32_t>(ysize)) { |
79 | 0 | JXL_CHECK(xsize == xsize_); |
80 | 0 | JXL_CHECK(ysize == ysize_); |
81 | | |
82 | 0 | JXL_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8); |
83 | | |
84 | 0 | bytes_per_row_ = 0; |
85 | | // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate |
86 | | // if nonzero, because "zero" bytes still have padding/bookkeeping overhead. |
87 | 0 | if (xsize != 0 && ysize != 0) { |
88 | 0 | bytes_per_row_ = BytesPerRow(xsize, sizeof_t); |
89 | 0 | bytes_ = AllocateArray(bytes_per_row_ * ysize); |
90 | 0 | JXL_CHECK(bytes_.get()); |
91 | 0 | InitializePadding(sizeof_t, Padding::kRoundUp); |
92 | 0 | } |
93 | 0 | } |
94 | | |
95 | 0 | void PlaneBase::InitializePadding(const size_t sizeof_t, Padding padding) { |
96 | | #if defined(MEMORY_SANITIZER) || HWY_IDE |
97 | | if (xsize_ == 0 || ysize_ == 0) return; |
98 | | |
99 | | const size_t vec_size = VectorSize(); |
100 | | if (vec_size == 0) return; // Scalar mode: no padding needed |
101 | | |
102 | | const size_t valid_size = xsize_ * sizeof_t; |
103 | | const size_t initialize_size = padding == Padding::kRoundUp |
104 | | ? RoundUpTo(valid_size, vec_size) |
105 | | : valid_size + vec_size - sizeof_t; |
106 | | if (valid_size == initialize_size) return; |
107 | | |
108 | | for (size_t y = 0; y < ysize_; ++y) { |
109 | | uint8_t* JXL_RESTRICT row = static_cast<uint8_t*>(VoidRow(y)); |
110 | | #if defined(__clang__) && \ |
111 | | ((!defined(__apple_build_version__) && __clang_major__ <= 6) || \ |
112 | | (defined(__apple_build_version__) && \ |
113 | | __apple_build_version__ <= 10001145)) |
114 | | // There's a bug in msan in clang-6 when handling AVX2 operations. This |
115 | | // workaround allows tests to pass on msan, although it is slower and |
116 | | // prevents msan warnings from uninitialized images. |
117 | | std::fill(row, msan::kSanitizerSentinelByte, initialize_size); |
118 | | #else |
119 | | memset(row + valid_size, msan::kSanitizerSentinelByte, |
120 | | initialize_size - valid_size); |
121 | | #endif // clang6 |
122 | | } |
123 | | #endif // MEMORY_SANITIZER |
124 | 0 | } |
125 | | |
126 | 0 | void PlaneBase::Swap(PlaneBase& other) { |
127 | 0 | std::swap(xsize_, other.xsize_); |
128 | 0 | std::swap(ysize_, other.ysize_); |
129 | 0 | std::swap(orig_xsize_, other.orig_xsize_); |
130 | 0 | std::swap(orig_ysize_, other.orig_ysize_); |
131 | 0 | std::swap(bytes_per_row_, other.bytes_per_row_); |
132 | 0 | std::swap(bytes_, other.bytes_); |
133 | 0 | } |
134 | | |
135 | | void PadImageToBlockMultipleInPlace(Image3F* JXL_RESTRICT in, |
136 | 0 | size_t block_dim) { |
137 | 0 | const size_t xsize_orig = in->xsize(); |
138 | 0 | const size_t ysize_orig = in->ysize(); |
139 | 0 | const size_t xsize = RoundUpTo(xsize_orig, block_dim); |
140 | 0 | const size_t ysize = RoundUpTo(ysize_orig, block_dim); |
141 | | // Expands image size to the originally-allocated size. |
142 | 0 | in->ShrinkTo(xsize, ysize); |
143 | 0 | for (size_t c = 0; c < 3; c++) { |
144 | 0 | for (size_t y = 0; y < ysize_orig; y++) { |
145 | 0 | float* JXL_RESTRICT row = in->PlaneRow(c, y); |
146 | 0 | for (size_t x = xsize_orig; x < xsize; x++) { |
147 | 0 | row[x] = row[xsize_orig - 1]; |
148 | 0 | } |
149 | 0 | } |
150 | 0 | const float* JXL_RESTRICT row_src = in->ConstPlaneRow(c, ysize_orig - 1); |
151 | 0 | for (size_t y = ysize_orig; y < ysize; y++) { |
152 | 0 | memcpy(in->PlaneRow(c, y), row_src, xsize * sizeof(float)); |
153 | 0 | } |
154 | 0 | } |
155 | 0 | } |
156 | | |
157 | | static void DownsampleImage(const ImageF& input, size_t factor, |
158 | 0 | ImageF* output) { |
159 | 0 | JXL_ASSERT(factor != 1); |
160 | 0 | output->ShrinkTo(DivCeil(input.xsize(), factor), |
161 | 0 | DivCeil(input.ysize(), factor)); |
162 | 0 | size_t in_stride = input.PixelsPerRow(); |
163 | 0 | for (size_t y = 0; y < output->ysize(); y++) { |
164 | 0 | float* row_out = output->Row(y); |
165 | 0 | const float* row_in = input.Row(factor * y); |
166 | 0 | for (size_t x = 0; x < output->xsize(); x++) { |
167 | 0 | size_t cnt = 0; |
168 | 0 | float sum = 0; |
169 | 0 | for (size_t iy = 0; iy < factor && iy + factor * y < input.ysize(); |
170 | 0 | iy++) { |
171 | 0 | for (size_t ix = 0; ix < factor && ix + factor * x < input.xsize(); |
172 | 0 | ix++) { |
173 | 0 | sum += row_in[iy * in_stride + x * factor + ix]; |
174 | 0 | cnt++; |
175 | 0 | } |
176 | 0 | } |
177 | 0 | row_out[x] = sum / cnt; |
178 | 0 | } |
179 | 0 | } |
180 | 0 | } |
181 | | |
182 | 0 | void DownsampleImage(ImageF* image, size_t factor) { |
183 | | // Allocate extra space to avoid a reallocation when padding. |
184 | 0 | ImageF downsampled(DivCeil(image->xsize(), factor) + kBlockDim, |
185 | 0 | DivCeil(image->ysize(), factor) + kBlockDim); |
186 | 0 | DownsampleImage(*image, factor, &downsampled); |
187 | 0 | *image = std::move(downsampled); |
188 | 0 | } |
189 | | |
190 | 0 | void DownsampleImage(Image3F* opsin, size_t factor) { |
191 | 0 | JXL_ASSERT(factor != 1); |
192 | | // Allocate extra space to avoid a reallocation when padding. |
193 | 0 | Image3F downsampled(DivCeil(opsin->xsize(), factor) + kBlockDim, |
194 | 0 | DivCeil(opsin->ysize(), factor) + kBlockDim); |
195 | 0 | downsampled.ShrinkTo(downsampled.xsize() - kBlockDim, |
196 | 0 | downsampled.ysize() - kBlockDim); |
197 | 0 | for (size_t c = 0; c < 3; c++) { |
198 | 0 | DownsampleImage(opsin->Plane(c), factor, &downsampled.Plane(c)); |
199 | 0 | } |
200 | 0 | *opsin = std::move(downsampled); |
201 | 0 | } |
202 | | |
203 | | } // namespace jxl |
204 | | #endif // HWY_ONCE |