/src/LPM/external.protobuf/include/google/protobuf/serial_arena.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Protocol Buffers - Google's data interchange format |
2 | | // Copyright 2022 Google Inc. All rights reserved. |
3 | | // |
4 | | // Use of this source code is governed by a BSD-style |
5 | | // license that can be found in the LICENSE file or at |
6 | | // https://developers.google.com/open-source/licenses/bsd |
7 | | // |
8 | | // This file defines the internal class SerialArena |
9 | | |
10 | | #ifndef GOOGLE_PROTOBUF_SERIAL_ARENA_H__ |
11 | | #define GOOGLE_PROTOBUF_SERIAL_ARENA_H__ |
12 | | |
13 | | #include <algorithm> |
14 | | #include <atomic> |
15 | | #include <cstddef> |
16 | | #include <cstdint> |
17 | | #include <string> |
18 | | #include <vector> |
19 | | |
20 | | #include "absl/base/attributes.h" |
21 | | #include "absl/base/optimization.h" |
22 | | #include "absl/base/prefetch.h" |
23 | | #include "absl/log/absl_check.h" |
24 | | #include "absl/numeric/bits.h" |
25 | | #include "google/protobuf/arena_align.h" |
26 | | #include "google/protobuf/arena_cleanup.h" |
27 | | #include "google/protobuf/port.h" |
28 | | #include "google/protobuf/string_block.h" |
29 | | |
30 | | // Must be included last. |
31 | | #include "google/protobuf/port_def.inc" |
32 | | |
33 | | namespace google { |
34 | | namespace protobuf { |
35 | | namespace internal { |
36 | | |
37 | | // Arena blocks are variable length malloc-ed objects. The following structure |
38 | | // describes the common header for all blocks. |
39 | | struct ArenaBlock { |
40 | | // For the sentry block with zero-size where ptr_/limit_ both point to `this`. |
41 | 0 | constexpr ArenaBlock() : next(nullptr), size(0) {} |
42 | | |
43 | 0 | ArenaBlock(ArenaBlock* next, size_t size) : next(next), size(size) { |
44 | 0 | ABSL_DCHECK_GT(size, sizeof(ArenaBlock)); |
45 | 0 | } |
46 | | |
47 | 0 | char* Pointer(size_t n) { |
48 | 0 | ABSL_DCHECK_LE(n, size); |
49 | 0 | return reinterpret_cast<char*>(this) + n; |
50 | 0 | } |
51 | 0 | char* Limit() { return Pointer(size & static_cast<size_t>(-8)); } |
52 | | |
53 | 0 | bool IsSentry() const { return size == 0; } |
54 | | |
55 | | ArenaBlock* const next; |
56 | | const size_t size; |
57 | | // data follows |
58 | | }; |
59 | | |
60 | | enum class AllocationClient { kDefault, kArray }; |
61 | | |
62 | | class ThreadSafeArena; |
63 | | |
64 | | // Tag type used to invoke the constructor of the first SerialArena. |
65 | | struct FirstSerialArena { |
66 | | explicit FirstSerialArena() = default; |
67 | | }; |
68 | | |
69 | | // A simple arena allocator. Calls to allocate functions must be properly |
70 | | // serialized by the caller, hence this class cannot be used as a general |
71 | | // purpose allocator in a multi-threaded program. It serves as a building block |
72 | | // for ThreadSafeArena, which provides a thread-safe arena allocator. |
73 | | // |
74 | | // This class manages |
75 | | // 1) Arena bump allocation + owning memory blocks. |
76 | | // 2) Maintaining a cleanup list. |
77 | | // It delegates the actual memory allocation back to ThreadSafeArena, which |
78 | | // contains the information on block growth policy and backing memory allocation |
79 | | // used. |
80 | | class PROTOBUF_EXPORT SerialArena { |
81 | | public: |
82 | | static constexpr size_t kBlockHeaderSize = |
83 | | ArenaAlignDefault::Ceil(sizeof(ArenaBlock)); |
84 | | |
85 | 0 | void CleanupList() { cleanup_list_.Cleanup(*this); } |
86 | 0 | uint64_t SpaceAllocated() const { |
87 | 0 | return space_allocated_.load(std::memory_order_relaxed); |
88 | 0 | } |
89 | | uint64_t SpaceUsed() const; |
90 | | |
91 | | // See comments on `cached_blocks_` member for details. |
92 | 0 | PROTOBUF_ALWAYS_INLINE void* TryAllocateFromCachedBlock(size_t size) { |
93 | 0 | if (PROTOBUF_PREDICT_FALSE(size < 16)) return nullptr; |
94 | 0 | // We round up to the next larger block in case the memory doesn't match |
95 | 0 | // the pattern we are looking for. |
96 | 0 | const size_t index = absl::bit_width(size - 1) - 4; |
97 | 0 |
|
98 | 0 | if (PROTOBUF_PREDICT_FALSE(index >= cached_block_length_)) return nullptr; |
99 | 0 | auto& cached_head = cached_blocks_[index]; |
100 | 0 | if (cached_head == nullptr) return nullptr; |
101 | 0 |
|
102 | 0 | void* ret = cached_head; |
103 | 0 | PROTOBUF_UNPOISON_MEMORY_REGION(ret, size); |
104 | 0 | cached_head = cached_head->next; |
105 | 0 | return ret; |
106 | 0 | } |
107 | | |
108 | | // In kArray mode we look through cached blocks. |
109 | | // We do not do this by default because most non-array allocations will not |
110 | | // have the right size and will fail to find an appropriate cached block. |
111 | | // |
112 | | // TODO: Evaluate if we should use cached blocks for message types of |
113 | | // the right size. We can statically know if the allocation size can benefit |
114 | | // from it. |
115 | | template <AllocationClient alloc_client = AllocationClient::kDefault> |
116 | | void* AllocateAligned(size_t n) { |
117 | | ABSL_DCHECK(internal::ArenaAlignDefault::IsAligned(n)); |
118 | | ABSL_DCHECK_GE(limit_, ptr()); |
119 | | |
120 | | if (alloc_client == AllocationClient::kArray) { |
121 | | if (void* res = TryAllocateFromCachedBlock(n)) { |
122 | | return res; |
123 | | } |
124 | | } |
125 | | |
126 | | void* ptr; |
127 | | if (PROTOBUF_PREDICT_TRUE(MaybeAllocateAligned(n, &ptr))) { |
128 | | return ptr; |
129 | | } |
130 | | return AllocateAlignedFallback(n); |
131 | | } |
132 | | |
133 | | private: |
134 | | static inline PROTOBUF_ALWAYS_INLINE constexpr size_t AlignUpTo(size_t n, |
135 | 0 | size_t a) { |
136 | 0 | // We are wasting space by over allocating align - 8 bytes. Compared to a |
137 | 0 | // dedicated function that takes current alignment in consideration. Such a |
138 | 0 | // scheme would only waste (align - 8)/2 bytes on average, but requires a |
139 | 0 | // dedicated function in the outline arena allocation functions. Possibly |
140 | 0 | // re-evaluate tradeoffs later. |
141 | 0 | return a <= 8 ? ArenaAlignDefault::Ceil(n) : ArenaAlignAs(a).Padded(n); |
142 | 0 | } |
143 | | |
144 | 0 | static inline PROTOBUF_ALWAYS_INLINE void* AlignTo(void* p, size_t a) { |
145 | 0 | return (a <= ArenaAlignDefault::align) |
146 | 0 | ? ArenaAlignDefault::CeilDefaultAligned(p) |
147 | 0 | : ArenaAlignAs(a).CeilDefaultAligned(p); |
148 | 0 | } |
149 | | |
150 | | // See comments on `cached_blocks_` member for details. |
151 | 0 | void ReturnArrayMemory(void* p, size_t size) { |
152 | 0 | // We only need to check for 32-bit platforms. |
153 | 0 | // In 64-bit platforms the minimum allocation size from Repeated*Field will |
154 | 0 | // be 16 guaranteed. |
155 | 0 | if (sizeof(void*) < 8) { |
156 | 0 | if (PROTOBUF_PREDICT_FALSE(size < 16)) return; |
157 | 0 | } else { |
158 | 0 | PROTOBUF_ASSUME(size >= 16); |
159 | 0 | } |
160 | 0 |
|
161 | 0 | // We round down to the next smaller block in case the memory doesn't match |
162 | 0 | // the pattern we are looking for. eg, someone might have called Reserve() |
163 | 0 | // on the repeated field. |
164 | 0 | const size_t index = absl::bit_width(size) - 5; |
165 | 0 |
|
166 | 0 | if (PROTOBUF_PREDICT_FALSE(index >= cached_block_length_)) { |
167 | 0 | // We can't put this object on the freelist so make this object the |
168 | 0 | // freelist. It is guaranteed it is larger than the one we have, and |
169 | 0 | // large enough to hold another allocation of `size`. |
170 | 0 | CachedBlock** new_list = static_cast<CachedBlock**>(p); |
171 | 0 | size_t new_size = size / sizeof(CachedBlock*); |
172 | 0 |
|
173 | 0 | std::copy(cached_blocks_, cached_blocks_ + cached_block_length_, |
174 | 0 | new_list); |
175 | 0 |
|
176 | 0 | // We need to unpoison this memory before filling it in case it has been |
177 | 0 | // poisoned by another sanitizer client. |
178 | 0 | PROTOBUF_UNPOISON_MEMORY_REGION( |
179 | 0 | new_list + cached_block_length_, |
180 | 0 | (new_size - cached_block_length_) * sizeof(CachedBlock*)); |
181 | 0 |
|
182 | 0 | std::fill(new_list + cached_block_length_, new_list + new_size, nullptr); |
183 | 0 |
|
184 | 0 | cached_blocks_ = new_list; |
185 | 0 | // Make the size fit in uint8_t. This is the power of two, so we don't |
186 | 0 | // need anything larger. |
187 | 0 | cached_block_length_ = |
188 | 0 | static_cast<uint8_t>(std::min(size_t{64}, new_size)); |
189 | 0 |
|
190 | 0 | return; |
191 | 0 | } |
192 | 0 |
|
193 | 0 | auto& cached_head = cached_blocks_[index]; |
194 | 0 | auto* new_node = static_cast<CachedBlock*>(p); |
195 | 0 | new_node->next = cached_head; |
196 | 0 | cached_head = new_node; |
197 | 0 | PROTOBUF_POISON_MEMORY_REGION(p, size); |
198 | 0 | } |
199 | | |
200 | | public: |
201 | | // Allocate space if the current region provides enough space. |
202 | 0 | bool MaybeAllocateAligned(size_t n, void** out) { |
203 | 0 | ABSL_DCHECK(internal::ArenaAlignDefault::IsAligned(n)); |
204 | 0 | ABSL_DCHECK_GE(limit_, ptr()); |
205 | 0 | char* ret = ptr(); |
206 | 0 | // ret + n may point out of the block bounds, or ret may be nullptr. |
207 | 0 | // Both computations have undefined behavior when done on pointers, |
208 | 0 | // so do them on uintptr_t instead. |
209 | 0 | if (PROTOBUF_PREDICT_FALSE(reinterpret_cast<uintptr_t>(ret) + n > |
210 | 0 | reinterpret_cast<uintptr_t>(limit_))) { |
211 | 0 | return false; |
212 | 0 | } |
213 | 0 | PROTOBUF_UNPOISON_MEMORY_REGION(ret, n); |
214 | 0 | *out = ret; |
215 | 0 | char* next = ret + n; |
216 | 0 | set_ptr(next); |
217 | 0 | MaybePrefetchData(next); |
218 | 0 | return true; |
219 | 0 | } |
220 | | |
221 | | // If there is enough space in the current block, allocate space for one |
222 | | // std::string object and register for destruction. The object has not been |
223 | | // constructed and the memory returned is uninitialized. |
224 | 0 | PROTOBUF_ALWAYS_INLINE void* MaybeAllocateStringWithCleanup() { |
225 | 0 | void* p; |
226 | 0 | return MaybeAllocateString(p) ? p : nullptr; |
227 | 0 | } |
228 | | |
229 | | PROTOBUF_ALWAYS_INLINE |
230 | | void* AllocateAlignedWithCleanup(size_t n, size_t align, |
231 | 0 | void (*destructor)(void*)) { |
232 | 0 | n = ArenaAlignDefault::Ceil(n); |
233 | 0 | char* ret = ArenaAlignAs(align).CeilDefaultAligned(ptr()); |
234 | 0 | // See the comment in MaybeAllocateAligned re uintptr_t. |
235 | 0 | if (PROTOBUF_PREDICT_FALSE(reinterpret_cast<uintptr_t>(ret) + n > |
236 | 0 | reinterpret_cast<uintptr_t>(limit_))) { |
237 | 0 | return AllocateAlignedWithCleanupFallback(n, align, destructor); |
238 | 0 | } |
239 | 0 | PROTOBUF_UNPOISON_MEMORY_REGION(ret, n); |
240 | 0 | char* next = ret + n; |
241 | 0 | set_ptr(next); |
242 | 0 | AddCleanup(ret, destructor); |
243 | 0 | ABSL_DCHECK_GE(limit_, ptr()); |
244 | 0 | MaybePrefetchData(next); |
245 | 0 | return ret; |
246 | 0 | } |
247 | | |
248 | | PROTOBUF_ALWAYS_INLINE |
249 | 0 | void AddCleanup(void* elem, void (*destructor)(void*)) { |
250 | 0 | cleanup_list_.Add(elem, destructor, *this); |
251 | 0 | MaybePrefetchCleanup(); |
252 | 0 | } |
253 | | |
254 | | ABSL_ATTRIBUTE_RETURNS_NONNULL void* AllocateFromStringBlock(); |
255 | | |
256 | | std::vector<void*> PeekCleanupListForTesting(); |
257 | | |
258 | | private: |
259 | | friend class ThreadSafeArena; |
260 | | friend class cleanup::ChunkList; |
261 | | |
262 | | // See comments for cached_blocks_. |
263 | | struct CachedBlock { |
264 | | // Simple linked list. |
265 | | CachedBlock* next; |
266 | | }; |
267 | | |
268 | | static constexpr ptrdiff_t kPrefetchDataDegree = ABSL_CACHELINE_SIZE * 16; |
269 | | static constexpr ptrdiff_t kPrefetchCleanupDegree = ABSL_CACHELINE_SIZE * 6; |
270 | | |
271 | | // Constructor is private as only New() should be used. |
272 | | inline SerialArena(ArenaBlock* b, ThreadSafeArena& parent); |
273 | | |
274 | | // Constructors to handle the first SerialArena. |
275 | | inline explicit SerialArena(ThreadSafeArena& parent); |
276 | | inline SerialArena(FirstSerialArena, ArenaBlock* b, ThreadSafeArena& parent); |
277 | | |
278 | | bool MaybeAllocateString(void*& p); |
279 | | ABSL_ATTRIBUTE_RETURNS_NONNULL void* AllocateFromStringBlockFallback(); |
280 | | |
281 | | // Prefetch the next prefetch_degree bytes after `prefetch_ptr` and |
282 | | // up to `limit`, if `next` is within prefetch_degree bytes of `prefetch_ptr`. |
283 | | PROTOBUF_ALWAYS_INLINE |
284 | | static const char* MaybePrefetchImpl(const ptrdiff_t prefetch_degree, |
285 | | const char* next, const char* limit, |
286 | 0 | const char* prefetch_ptr) { |
287 | 0 | if (PROTOBUF_PREDICT_TRUE(prefetch_ptr - next > prefetch_degree)) |
288 | 0 | return prefetch_ptr; |
289 | 0 | if (PROTOBUF_PREDICT_TRUE(prefetch_ptr < limit)) { |
290 | 0 | prefetch_ptr = std::max(next, prefetch_ptr); |
291 | 0 | ABSL_DCHECK(prefetch_ptr != nullptr); |
292 | 0 | const char* end = std::min(limit, prefetch_ptr + prefetch_degree); |
293 | 0 | for (; prefetch_ptr < end; prefetch_ptr += ABSL_CACHELINE_SIZE) { |
294 | 0 | absl::PrefetchToLocalCacheForWrite(prefetch_ptr); |
295 | 0 | } |
296 | 0 | } |
297 | 0 | return prefetch_ptr; |
298 | 0 | } |
299 | | PROTOBUF_ALWAYS_INLINE |
300 | 0 | void MaybePrefetchData(const char* next) { |
301 | 0 | ABSL_DCHECK(static_cast<const void*>(prefetch_ptr_) == nullptr || |
302 | 0 | static_cast<const void*>(prefetch_ptr_) >= head()); |
303 | 0 | prefetch_ptr_ = |
304 | 0 | MaybePrefetchImpl(kPrefetchDataDegree, next, limit_, prefetch_ptr_); |
305 | 0 | } |
306 | | PROTOBUF_ALWAYS_INLINE |
307 | 0 | void MaybePrefetchCleanup() { |
308 | 0 | ABSL_DCHECK(static_cast<const void*>(cleanup_list_.prefetch_ptr_) == |
309 | 0 | nullptr || |
310 | 0 | static_cast<const void*>(cleanup_list_.prefetch_ptr_) >= |
311 | 0 | cleanup_list_.head_); |
312 | 0 | cleanup_list_.prefetch_ptr_ = MaybePrefetchImpl( |
313 | 0 | kPrefetchCleanupDegree, reinterpret_cast<char*>(cleanup_list_.next_), |
314 | 0 | reinterpret_cast<char*>(cleanup_list_.limit_), |
315 | 0 | cleanup_list_.prefetch_ptr_); |
316 | 0 | } |
317 | | |
318 | | // Creates a new SerialArena inside mem using the remaining memory as for |
319 | | // future allocations. |
320 | | // The `parent` arena must outlive the serial arena, which is guaranteed |
321 | | // because the parent manages the lifetime of the serial arenas. |
322 | | static SerialArena* New(SizedPtr mem, ThreadSafeArena& parent); |
323 | | // Free SerialArena returning the memory passed in to New. |
324 | | template <typename Deallocator> |
325 | | SizedPtr Free(Deallocator deallocator); |
326 | | |
327 | 0 | size_t FreeStringBlocks() { |
328 | 0 | // On the active block delete all strings skipping the unused instances. |
329 | 0 | size_t unused_bytes = string_block_unused_.load(std::memory_order_relaxed); |
330 | 0 | if (StringBlock* sb = string_block_.load(std::memory_order_relaxed)) { |
331 | 0 | return FreeStringBlocks(sb, unused_bytes); |
332 | 0 | } |
333 | 0 | return 0; |
334 | 0 | } |
335 | | static size_t FreeStringBlocks(StringBlock* string_block, size_t unused); |
336 | | |
337 | | // Adds 'used` to space_used_ in relaxed atomic order. |
338 | 0 | void AddSpaceUsed(size_t space_used) { |
339 | 0 | space_used_.store(space_used_.load(std::memory_order_relaxed) + space_used, |
340 | 0 | std::memory_order_relaxed); |
341 | 0 | } |
342 | | |
343 | | // Adds 'allocated` to space_allocated_ in relaxed atomic order. |
344 | 0 | void AddSpaceAllocated(size_t space_allocated) { |
345 | 0 | space_allocated_.store( |
346 | 0 | space_allocated_.load(std::memory_order_relaxed) + space_allocated, |
347 | 0 | std::memory_order_relaxed); |
348 | 0 | } |
349 | | |
350 | | // Helper getters/setters to handle relaxed operations on atomic variables. |
351 | 0 | ArenaBlock* head() { return head_.load(std::memory_order_relaxed); } |
352 | 0 | const ArenaBlock* head() const { |
353 | 0 | return head_.load(std::memory_order_relaxed); |
354 | 0 | } |
355 | | |
356 | 0 | char* ptr() { return ptr_.load(std::memory_order_relaxed); } |
357 | 0 | const char* ptr() const { return ptr_.load(std::memory_order_relaxed); } |
358 | 0 | void set_ptr(char* ptr) { return ptr_.store(ptr, std::memory_order_relaxed); } |
359 | 0 | PROTOBUF_ALWAYS_INLINE void set_range(char* ptr, char* limit) { |
360 | 0 | set_ptr(ptr); |
361 | 0 | prefetch_ptr_ = ptr; |
362 | 0 | limit_ = limit; |
363 | 0 | } |
364 | | |
365 | | void* AllocateAlignedFallback(size_t n); |
366 | | void* AllocateAlignedWithCleanupFallback(size_t n, size_t align, |
367 | | void (*destructor)(void*)); |
368 | | void AddCleanupFallback(void* elem, void (*destructor)(void*)); |
369 | | inline void AllocateNewBlock(size_t n); |
370 | | inline void Init(ArenaBlock* b, size_t offset); |
371 | | |
372 | | // Members are declared here to track sizeof(SerialArena) and hotness |
373 | | // centrally. They are (roughly) laid out in descending order of hotness. |
374 | | |
375 | | // Next pointer to allocate from. Always 8-byte aligned. Points inside |
376 | | // head_ (and head_->pos will always be non-canonical). We keep these |
377 | | // here to reduce indirection. |
378 | | std::atomic<char*> ptr_{nullptr}; |
379 | | // Limiting address up to which memory can be allocated from the head block. |
380 | | char* limit_ = nullptr; |
381 | | // Current prefetch positions. Data from `ptr_` up to but not including |
382 | | // `prefetch_ptr_` is software prefetched. |
383 | | const char* prefetch_ptr_ = nullptr; |
384 | | |
385 | | // Chunked linked list for managing cleanup for arena elements. |
386 | | cleanup::ChunkList cleanup_list_; |
387 | | |
388 | | // The active string block. |
389 | | std::atomic<StringBlock*> string_block_{nullptr}; |
390 | | |
391 | | // The number of unused bytes in string_block_. |
392 | | // We allocate from `effective_size()` down to 0 inside `string_block_`. |
393 | | // `unused == 0` means that `string_block_` is exhausted. (or null). |
394 | | std::atomic<size_t> string_block_unused_{0}; |
395 | | |
396 | | std::atomic<ArenaBlock*> head_{nullptr}; // Head of linked list of blocks. |
397 | | std::atomic<size_t> space_used_{0}; // Necessary for metrics. |
398 | | std::atomic<size_t> space_allocated_{0}; |
399 | | ThreadSafeArena& parent_; |
400 | | |
401 | | // Repeated*Field and Arena play together to reduce memory consumption by |
402 | | // reusing blocks. Currently, natural growth of the repeated field types makes |
403 | | // them allocate blocks of size `8 + 2^N, N>=3`. |
404 | | // When the repeated field grows returns the previous block and we put it in |
405 | | // this free list. |
406 | | // `cached_blocks_[i]` points to the free list for blocks of size `8+2^(i+3)`. |
407 | | // The array of freelists is grown when needed in `ReturnArrayMemory()`. |
408 | | uint8_t cached_block_length_ = 0; |
409 | | CachedBlock** cached_blocks_ = nullptr; |
410 | | }; |
411 | | |
412 | 0 | inline PROTOBUF_ALWAYS_INLINE bool SerialArena::MaybeAllocateString(void*& p) { |
413 | 0 | // Check how many unused instances are in the current block. |
414 | 0 | size_t unused_bytes = string_block_unused_.load(std::memory_order_relaxed); |
415 | 0 | if (PROTOBUF_PREDICT_TRUE(unused_bytes != 0)) { |
416 | 0 | unused_bytes -= sizeof(std::string); |
417 | 0 | string_block_unused_.store(unused_bytes, std::memory_order_relaxed); |
418 | 0 | p = string_block_.load(std::memory_order_relaxed)->AtOffset(unused_bytes); |
419 | 0 | return true; |
420 | 0 | } |
421 | 0 | return false; |
422 | 0 | } |
423 | | |
424 | | ABSL_ATTRIBUTE_RETURNS_NONNULL inline PROTOBUF_ALWAYS_INLINE void* |
425 | 0 | SerialArena::AllocateFromStringBlock() { |
426 | 0 | void* p; |
427 | 0 | if (ABSL_PREDICT_TRUE(MaybeAllocateString(p))) return p; |
428 | 0 | return AllocateFromStringBlockFallback(); |
429 | 0 | } |
430 | | |
431 | | } // namespace internal |
432 | | } // namespace protobuf |
433 | | } // namespace google |
434 | | |
435 | | #include "google/protobuf/port_undef.inc" |
436 | | |
437 | | #endif // GOOGLE_PROTOBUF_SERIAL_ARENA_H__ |