/src/libjxl/lib/jxl/memory_manager_internal.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/memory_manager_internal.h" |
7 | | |
8 | | #include <jxl/memory_manager.h> |
9 | | #include <jxl/types.h> |
10 | | |
11 | | #include <atomic> |
12 | | #include <cstddef> |
13 | | #include <cstdio> |
14 | | #include <cstdlib> |
15 | | #include <cstring> // memcpy |
16 | | #include <hwy/base.h> // kMaxVectorSize |
17 | | |
18 | | #include "lib/jxl/base/common.h" |
19 | | #include "lib/jxl/base/status.h" |
20 | | #include "lib/jxl/simd_util.h" |
21 | | |
22 | | namespace jxl { |
23 | | |
24 | | namespace { |
25 | | |
26 | | // To avoid RFOs, match L2 fill size (pairs of lines); 2 x cache line size. |
27 | | constexpr size_t kAlignment = 2 * 64; |
28 | | static_assert((kAlignment & (kAlignment - 1)) == 0, |
29 | | "kAlignment must be a power of 2"); |
30 | | |
31 | | // Minimum multiple for which cache set conflicts and/or loads blocked by |
32 | | // preceding stores can occur. |
33 | | constexpr size_t kNumAlignmentGroups = 16; |
34 | | constexpr size_t kAlias = kNumAlignmentGroups * kAlignment; |
35 | | static_assert((kNumAlignmentGroups & (kNumAlignmentGroups - 1)) == 0, |
36 | | "kNumAlignmentGroups must be a power of 2"); |
37 | | |
38 | 0 | void* MemoryManagerDefaultAlloc(void* opaque, size_t size) { |
39 | 0 | return malloc(size); |
40 | 0 | } |
41 | | |
42 | 0 | void MemoryManagerDefaultFree(void* opaque, void* address) { free(address); } |
43 | | |
44 | | } // namespace |
45 | | |
46 | 0 | void* MemoryManagerAlloc(const JxlMemoryManager* memory_manager, size_t size) { |
47 | 0 | return memory_manager->alloc(memory_manager->opaque, size); |
48 | 0 | } |
49 | | |
50 | 0 | void MemoryManagerFree(const JxlMemoryManager* memory_manager, void* address) { |
51 | 0 | memory_manager->free(memory_manager->opaque, address); |
52 | 0 | } |
53 | | |
54 | | Status MemoryManagerInit(JxlMemoryManager* self, |
55 | 0 | const JxlMemoryManager* memory_manager) { |
56 | 0 | if (memory_manager) { |
57 | 0 | *self = *memory_manager; |
58 | 0 | } else { |
59 | 0 | memset(self, 0, sizeof(*self)); |
60 | 0 | } |
61 | 0 | bool is_default_alloc = (self->alloc == nullptr); |
62 | 0 | bool is_default_free = (self->free == nullptr); |
63 | 0 | if (is_default_alloc != is_default_free) { |
64 | 0 | return false; |
65 | 0 | } |
66 | 0 | if (is_default_alloc) self->alloc = jxl::MemoryManagerDefaultAlloc; |
67 | 0 | if (is_default_free) self->free = jxl::MemoryManagerDefaultFree; |
68 | |
|
69 | 0 | return true; |
70 | 0 | } |
71 | | |
72 | 0 | size_t BytesPerRow(const size_t xsize, const size_t sizeof_t) { |
73 | | // Special case: we don't allow any ops -> don't need extra padding/ |
74 | 0 | if (xsize == 0) { |
75 | 0 | return 0; |
76 | 0 | } |
77 | | |
78 | 0 | const size_t vec_size = MaxVectorSize(); |
79 | 0 | size_t valid_bytes = xsize * sizeof_t; |
80 | | |
81 | | // Allow unaligned accesses starting at the last valid value. |
82 | | // Skip for the scalar case because no extra lanes will be loaded. |
83 | 0 | if (vec_size != 0) { |
84 | 0 | valid_bytes += vec_size - sizeof_t; |
85 | 0 | } |
86 | | |
87 | | // Round up to vector and cache line size. |
88 | 0 | const size_t align = std::max(vec_size, kAlignment); |
89 | 0 | size_t bytes_per_row = RoundUpTo(valid_bytes, align); |
90 | | |
91 | | // During the lengthy window before writes are committed to memory, CPUs |
92 | | // guard against read after write hazards by checking the address, but |
93 | | // only the lower 11 bits. We avoid a false dependency between writes to |
94 | | // consecutive rows by ensuring their sizes are not multiples of 2 KiB. |
95 | | // Avoid2K prevents the same problem for the planes of an Image3. |
96 | 0 | if (bytes_per_row % kAlias == 0) { |
97 | 0 | bytes_per_row += align; |
98 | 0 | } |
99 | |
|
100 | 0 | JXL_ASSERT(bytes_per_row % align == 0); |
101 | 0 | return bytes_per_row; |
102 | 0 | } |
103 | | |
104 | | StatusOr<AlignedMemory> AlignedMemory::Create(JxlMemoryManager* memory_manager, |
105 | 920k | size_t size) { |
106 | 920k | size_t allocation_size = size + kAlias; |
107 | 920k | if (size > allocation_size) { |
108 | 0 | return JXL_FAILURE("Requested allocation is too large"); |
109 | 0 | } |
110 | 920k | JXL_CHECK(memory_manager); |
111 | 920k | void* allocated = |
112 | 920k | memory_manager->alloc(memory_manager->opaque, allocation_size); |
113 | 920k | if (allocated == nullptr) { |
114 | 0 | return JXL_FAILURE("Allocation failed"); |
115 | 0 | } |
116 | 920k | return AlignedMemory{memory_manager, allocated}; |
117 | 920k | } |
118 | | |
119 | | AlignedMemory::AlignedMemory(JxlMemoryManager* memory_manager, void* allocation) |
120 | 920k | : allocation_(allocation), memory_manager_(memory_manager) { |
121 | | // Congruence to `offset` (mod kAlias) reduces cache conflicts and load/store |
122 | | // stalls, especially with large allocations that would otherwise have similar |
123 | | // alignments. |
124 | 920k | static std::atomic<uint32_t> next_group{0}; |
125 | 920k | size_t group = |
126 | 920k | static_cast<size_t>(next_group.fetch_add(1, std::memory_order_relaxed)); |
127 | 920k | group &= (kNumAlignmentGroups - 1); |
128 | 920k | size_t offset = kAlignment * group; |
129 | | |
130 | | // Actual allocation. |
131 | 920k | uintptr_t address = reinterpret_cast<uintptr_t>(allocation); |
132 | | |
133 | | // Aligned address, but might land before allocation (50%/50%). |
134 | 920k | uintptr_t aligned_address = (address & ~(kAlias - 1)) + offset; |
135 | 920k | if (aligned_address < address) aligned_address += kAlias; |
136 | | |
137 | 920k | address_ = reinterpret_cast<void*>(aligned_address); // NOLINT |
138 | 920k | } |
139 | | |
140 | 1.84M | AlignedMemory::AlignedMemory(AlignedMemory&& other) noexcept { |
141 | 1.84M | allocation_ = other.allocation_; |
142 | 1.84M | memory_manager_ = other.memory_manager_; |
143 | 1.84M | address_ = other.address_; |
144 | 1.84M | other.memory_manager_ = nullptr; |
145 | 1.84M | } |
146 | | |
147 | 920k | AlignedMemory& AlignedMemory::operator=(AlignedMemory&& other) noexcept { |
148 | 920k | if (this == &other) return *this; |
149 | 920k | if (memory_manager_ && allocation_) { |
150 | 81.6k | memory_manager_->free(memory_manager_->opaque, allocation_); |
151 | 81.6k | } |
152 | 920k | allocation_ = other.allocation_; |
153 | 920k | memory_manager_ = other.memory_manager_; |
154 | 920k | address_ = other.address_; |
155 | 920k | other.memory_manager_ = nullptr; |
156 | 920k | return *this; |
157 | 920k | } |
158 | | |
159 | 71.5M | AlignedMemory::~AlignedMemory() { |
160 | 71.5M | if (memory_manager_ == nullptr) return; |
161 | 838k | memory_manager_->free(memory_manager_->opaque, allocation_); |
162 | 838k | } |
163 | | |
164 | | } // namespace jxl |