/src/botan/src/lib/utils/mem_pool/mem_pool.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * (C) 2018,2019 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #include <botan/internal/mem_pool.h> |
8 | | #include <botan/mem_ops.h> |
9 | | #include <algorithm> |
10 | | |
11 | | #if defined(BOTAN_MEM_POOL_USE_MMU_PROTECTIONS) |
12 | | #include <botan/internal/os_utils.h> |
13 | | #endif |
14 | | |
15 | | namespace Botan { |
16 | | |
17 | | /* |
18 | | * Memory pool theory of operation |
19 | | * |
20 | | * This allocator is not useful for general purpose but works well within the |
21 | | * context of allocating cryptographic keys. It makes several assumptions which |
22 | | * don't work for implementing malloc but simplify and speed up the implementation: |
23 | | * |
24 | | * - There is some set of pages, which cannot be expanded later. These are pages |
25 | | * which were allocated, mlocked and passed to the Memory_Pool constructor. |
26 | | * |
27 | | * - The allocator is allowed to return null anytime it feels like not servicing |
28 | | * a request, in which case the request will be sent to calloc instead. In |
29 | | * particular, requests which are too small or too large are rejected. |
30 | | * |
31 | | * - Most allocations are powers of 2, the remainder are usually a multiple of 8 |
32 | | * |
33 | | * - Free requests include the size of the allocation, so there is no need to |
34 | | * track this within the pool. |
35 | | * |
36 | | * - Alignment is important to the caller. For this allocator, any allocation of |
37 | | * size N is aligned evenly at N bytes. |
38 | | * |
39 | | * Initially each page is in the free page list. Each page is used for just one |
40 | | * size of allocation, with requests bucketed into a small number of common |
41 | | * sizes. If the allocation would be too big or too small it is rejected by the pool. |
42 | | * |
43 | | * The free list is maintained by a bitmap, one per page/Bucket. Since each |
44 | | * Bucket only maintains objects of a single size, each bit set or clear |
45 | | * indicates the status of one object. |
46 | | * |
47 | | * An allocation walks the list of buckets and asks each in turn if there is |
48 | | * space. If a Bucket does not have any space, it sets a boolean flag m_is_full |
49 | | * so that it does not need to rescan when asked again. The flag is cleared on |
50 | | * first free from that bucket. If no bucket has space, but there are some free |
51 | | * pages left, a free page is claimed as a new Bucket for that size. In this case |
52 | | * it is pushed to the front of the list so it is first in line to service new |
53 | | * requests. |
54 | | * |
55 | | * A deallocation also walks the list of buckets for the size and asks each |
56 | | * Bucket in turn if it recognizes the pointer. When a Bucket becomes empty as a |
57 | | * result of a deallocation, it is recycled back into the free pool. When this |
58 | | * happens, the Buckets page goes to the end of the free list. All pages on the |
59 | | * free list are marked in the MMU as noaccess, so anything touching them will |
60 | | * immediately crash. They are only marked R/W once placed into a new bucket. |
61 | | * Making the free list FIFO maximizes the time between the last free of a bucket |
62 | | * and that page being writable again, maximizing chances of crashing after a |
63 | | * use-after-free. |
64 | | * |
65 | | * Future work |
66 | | * ------------- |
67 | | * |
68 | | * The allocator is protected by a global lock. It would be good to break this |
69 | | * up, since almost all of the work can actually be done in parallel especially |
70 | | * when allocating objects of different sizes (which can't possibly share a |
71 | | * bucket). |
72 | | * |
73 | | * It may be worthwhile to optimize deallocation by storing the Buckets in order |
74 | | * (by pointer value) which would allow binary search to find the owning bucket. |
75 | | * |
76 | | * A useful addition would be to randomize the allocations. Memory_Pool would be |
77 | | * changed to receive also a RandomNumberGenerator& object (presumably the system |
78 | | * RNG, or maybe a ChaCha_RNG seeded with system RNG). Then the bucket to use and |
79 | | * the offset within the bucket would be chosen randomly, instead of using first fit. |
80 | | * |
81 | | * Right now we don't make any provision for threading, so if two threads both |
82 | | * allocate 32 byte values one after the other, the two allocations will likely |
83 | | * share a cache line. Ensuring that distinct threads will (tend to) use distinct |
84 | | * buckets would reduce this. |
85 | | * |
86 | | * Supporting a realloc-style API may be useful. |
87 | | */ |
88 | | |
89 | | namespace { |
90 | | |
91 | | size_t choose_bucket(size_t n) |
92 | 260k | { |
93 | 260k | const size_t MINIMUM_ALLOCATION = 16; |
94 | 260k | const size_t MAXIMUM_ALLOCATION = 256; |
95 | | |
96 | 260k | if(n < MINIMUM_ALLOCATION || n > MAXIMUM_ALLOCATION) |
97 | 5.80k | return 0; |
98 | | |
99 | | // Need to tune these |
100 | | |
101 | 254k | const size_t buckets[] = { |
102 | 254k | 16, 24, 32, 48, 64, 80, 96, 112, 128, 160, 192, 256, 0, |
103 | 254k | }; |
104 | | |
105 | 936k | for(size_t i = 0; buckets[i]; ++i) |
106 | 936k | { |
107 | 936k | if(n <= buckets[i]) |
108 | 254k | { |
109 | 254k | return buckets[i]; |
110 | 254k | } |
111 | 936k | } |
112 | | |
113 | 0 | return 0; |
114 | 254k | } |
115 | | |
116 | | inline bool ptr_in_pool(const void* pool_ptr, size_t poolsize, |
117 | | const void* buf_ptr, size_t bufsize) |
118 | 107k | { |
119 | 107k | const uintptr_t pool = reinterpret_cast<uintptr_t>(pool_ptr); |
120 | 107k | const uintptr_t buf = reinterpret_cast<uintptr_t>(buf_ptr); |
121 | 107k | return (buf >= pool) && (buf + bufsize <= pool + poolsize); |
122 | 107k | } |
123 | | |
124 | | // return index of first set bit |
125 | | template<typename T> |
126 | | size_t find_set_bit(T b) |
127 | 134k | { |
128 | 134k | size_t s = 8*sizeof(T) / 2; |
129 | 134k | size_t bit = 0; |
130 | | |
131 | | // In this context we don't need to be const-time |
132 | 938k | while(s > 0) |
133 | 804k | { |
134 | 804k | const T mask = (static_cast<T>(1) << s) - 1; |
135 | 804k | if((b & mask) == 0) |
136 | 164k | { |
137 | 164k | bit += s; |
138 | 164k | b >>= s; |
139 | 164k | } |
140 | 804k | s /= 2; |
141 | 804k | } |
142 | | |
143 | 134k | return bit; |
144 | 134k | } |
145 | | |
146 | | class BitMap final |
147 | | { |
148 | | public: |
149 | | explicit BitMap(size_t bits) : m_len(bits) |
150 | 57.8k | { |
151 | 57.8k | m_bits.resize((bits + BITMASK_BITS - 1) / BITMASK_BITS); |
152 | 57.8k | m_main_mask = static_cast<bitmask_type>(~0); |
153 | 57.8k | m_last_mask = m_main_mask; |
154 | | |
155 | 57.8k | if(bits % BITMASK_BITS != 0) |
156 | 21.0k | m_last_mask = (static_cast<bitmask_type>(1) << (bits % BITMASK_BITS)) - 1; |
157 | 57.8k | } |
158 | | |
159 | | bool find_free(size_t* bit); |
160 | | |
161 | | void free(size_t bit) |
162 | 102k | { |
163 | 102k | BOTAN_ASSERT_NOMSG(bit <= m_len); |
164 | 102k | const size_t w = bit / BITMASK_BITS; |
165 | 102k | BOTAN_ASSERT_NOMSG(w < m_bits.size()); |
166 | 102k | const bitmask_type mask = static_cast<bitmask_type>(1) << (bit % BITMASK_BITS); |
167 | 102k | m_bits[w] = m_bits[w] & (~mask); |
168 | 102k | } |
169 | | |
170 | | bool empty() const |
171 | 102k | { |
172 | 102k | for(auto bitset : m_bits) |
173 | 205k | { |
174 | 205k | if(bitset != 0) |
175 | 47.4k | { |
176 | 47.4k | return false; |
177 | 47.4k | } |
178 | 205k | } |
179 | | |
180 | 55.5k | return true; |
181 | 102k | } |
182 | | |
183 | | private: |
184 | | #if defined(BOTAN_ENABLE_DEBUG_ASSERTS) |
185 | | typedef uint8_t bitmask_type; |
186 | | enum { BITMASK_BITS = 8 }; |
187 | | #else |
188 | | typedef word bitmask_type; |
189 | | enum { BITMASK_BITS = BOTAN_MP_WORD_BITS }; |
190 | | #endif |
191 | | |
192 | | size_t m_len; |
193 | | bitmask_type m_main_mask; |
194 | | bitmask_type m_last_mask; |
195 | | std::vector<bitmask_type> m_bits; |
196 | | }; |
197 | | |
198 | | bool BitMap::find_free(size_t* bit) |
199 | 134k | { |
200 | 156k | for(size_t i = 0; i != m_bits.size(); ++i) |
201 | 155k | { |
202 | 155k | const bitmask_type mask = (i == m_bits.size() - 1) ? m_last_mask : m_main_mask; |
203 | 155k | if((m_bits[i] & mask) != mask) |
204 | 134k | { |
205 | 134k | const size_t free_bit = find_set_bit(~m_bits[i]); |
206 | 134k | const bitmask_type bmask = static_cast<bitmask_type>(1) << (free_bit % BITMASK_BITS); |
207 | 134k | BOTAN_ASSERT_NOMSG((m_bits[i] & bmask) == 0); |
208 | 134k | m_bits[i] |= bmask; |
209 | 134k | *bit = BITMASK_BITS*i + free_bit; |
210 | 134k | return true; |
211 | 134k | } |
212 | 155k | } |
213 | | |
214 | 566 | return false; |
215 | 134k | } |
216 | | |
217 | | } |
218 | | |
219 | | class Bucket final |
220 | | { |
221 | | public: |
222 | | Bucket(uint8_t* mem, size_t mem_size, size_t item_size) : |
223 | | m_item_size(item_size), |
224 | | m_page_size(mem_size), |
225 | | m_range(mem), |
226 | | m_bitmap(mem_size / item_size), |
227 | | m_is_full(false) |
228 | 57.8k | { |
229 | 57.8k | } |
230 | | |
231 | | uint8_t* alloc() |
232 | 143k | { |
233 | 143k | if(m_is_full) |
234 | 8.44k | { |
235 | | // I know I am full |
236 | 8.44k | return nullptr; |
237 | 8.44k | } |
238 | | |
239 | 134k | size_t offset; |
240 | 134k | if(!m_bitmap.find_free(&offset)) |
241 | 566 | { |
242 | | // I just found out I am full |
243 | 566 | m_is_full = true; |
244 | 566 | return nullptr; |
245 | 566 | } |
246 | | |
247 | 134k | BOTAN_ASSERT(offset * m_item_size < m_page_size, "Offset is in range"); |
248 | 134k | return m_range + m_item_size*offset; |
249 | 134k | } |
250 | | |
251 | | bool free(void* p) |
252 | 107k | { |
253 | 107k | if(!in_this_bucket(p)) |
254 | 4.34k | return false; |
255 | | |
256 | | /* |
257 | | Zero also any trailing bytes, which should not have been written to, |
258 | | but maybe the user was bad and wrote past the end. |
259 | | */ |
260 | 102k | std::memset(p, 0, m_item_size); |
261 | | |
262 | 102k | const size_t offset = (reinterpret_cast<uintptr_t>(p) - reinterpret_cast<uintptr_t>(m_range)) / m_item_size; |
263 | | |
264 | 102k | m_bitmap.free(offset); |
265 | 102k | m_is_full = false; |
266 | | |
267 | 102k | return true; |
268 | 107k | } |
269 | | |
270 | | bool in_this_bucket(void* p) const |
271 | 107k | { |
272 | 107k | return ptr_in_pool(m_range, m_page_size, p, m_item_size); |
273 | 107k | } |
274 | | |
275 | | bool empty() const |
276 | 102k | { |
277 | 102k | return m_bitmap.empty(); |
278 | 102k | } |
279 | | |
280 | | uint8_t* ptr() const |
281 | 55.5k | { |
282 | 55.5k | return m_range; |
283 | 55.5k | } |
284 | | |
285 | | private: |
286 | | size_t m_item_size; |
287 | | size_t m_page_size; |
288 | | uint8_t* m_range; |
289 | | BitMap m_bitmap; |
290 | | bool m_is_full; |
291 | | }; |
292 | | |
293 | | Memory_Pool::Memory_Pool(const std::vector<void*>& pages, size_t page_size) : |
294 | | m_page_size(page_size) |
295 | 1.12k | { |
296 | 1.12k | m_min_page_ptr = ~static_cast<uintptr_t>(0); |
297 | 1.12k | m_max_page_ptr = 0; |
298 | | |
299 | 1.12k | for(auto page : pages) |
300 | 4.50k | { |
301 | 4.50k | const uintptr_t p = reinterpret_cast<uintptr_t>(page); |
302 | | |
303 | 4.50k | m_min_page_ptr = std::min(p, m_min_page_ptr); |
304 | 4.50k | m_max_page_ptr = std::max(p, m_max_page_ptr); |
305 | | |
306 | 4.50k | clear_bytes(page, m_page_size); |
307 | | #if defined(BOTAN_MEM_POOL_USE_MMU_PROTECTIONS) |
308 | | OS::page_prohibit_access(page); |
309 | | #endif |
310 | 4.50k | m_free_pages.push_back(static_cast<uint8_t*>(page)); |
311 | 4.50k | } |
312 | | |
313 | | /* |
314 | | Right now this points to the start of the last page, adjust it to instead |
315 | | point to the first byte of the following page |
316 | | */ |
317 | 1.12k | m_max_page_ptr += page_size; |
318 | 1.12k | } |
319 | | |
320 | | Memory_Pool::~Memory_Pool() |
321 | 1.12k | { |
322 | | #if defined(BOTAN_MEM_POOL_USE_MMU_PROTECTIONS) |
323 | | for(size_t i = 0; i != m_free_pages.size(); ++i) |
324 | | { |
325 | | OS::page_allow_access(m_free_pages[i]); |
326 | | } |
327 | | #endif |
328 | 1.12k | } |
329 | | |
330 | | void* Memory_Pool::allocate(size_t n) |
331 | 157k | { |
332 | 157k | if(n > m_page_size) |
333 | 0 | return nullptr; |
334 | | |
335 | 157k | const size_t n_bucket = choose_bucket(n); |
336 | | |
337 | 157k | if(n_bucket > 0) |
338 | 151k | { |
339 | 151k | lock_guard_type<mutex_type> lock(m_mutex); |
340 | | |
341 | 151k | std::deque<Bucket>& buckets = m_buckets_for[n_bucket]; |
342 | | |
343 | | /* |
344 | | It would be optimal to pick the bucket with the most usage, |
345 | | since a bucket with say 1 item allocated out of it has a high |
346 | | chance of becoming later freed and then the whole page can be |
347 | | recycled. |
348 | | */ |
349 | 151k | for(auto& bucket : buckets) |
350 | 85.2k | { |
351 | 85.2k | if(uint8_t* p = bucket.alloc()) |
352 | 76.2k | return p; |
353 | | |
354 | | // If the bucket is full, maybe move it to the end of the list? |
355 | | // Otoh bucket search should be very fast |
356 | 85.2k | } |
357 | | |
358 | 75.7k | if(!m_free_pages.empty()) |
359 | 57.8k | { |
360 | 57.8k | uint8_t* ptr = m_free_pages[0]; |
361 | 57.8k | m_free_pages.pop_front(); |
362 | | #if defined(BOTAN_MEM_POOL_USE_MMU_PROTECTIONS) |
363 | | OS::page_allow_access(ptr); |
364 | | #endif |
365 | 57.8k | buckets.push_front(Bucket(ptr, m_page_size, n_bucket)); |
366 | 57.8k | void* p = buckets[0].alloc(); |
367 | 57.8k | BOTAN_ASSERT_NOMSG(p != nullptr); |
368 | 57.8k | return p; |
369 | 57.8k | } |
370 | 75.7k | } |
371 | | |
372 | | // out of room |
373 | 23.6k | return nullptr; |
374 | 157k | } |
375 | | |
376 | | bool Memory_Pool::deallocate(void* p, size_t len) noexcept |
377 | 102k | { |
378 | | // Do a fast range check first, before taking the lock |
379 | 102k | const uintptr_t p_val = reinterpret_cast<uintptr_t>(p); |
380 | 102k | if(p_val < m_min_page_ptr || p_val > m_max_page_ptr) |
381 | 0 | return false; |
382 | | |
383 | 102k | const size_t n_bucket = choose_bucket(len); |
384 | | |
385 | 102k | if(n_bucket != 0) |
386 | 102k | { |
387 | 102k | try |
388 | 102k | { |
389 | 102k | lock_guard_type<mutex_type> lock(m_mutex); |
390 | | |
391 | 102k | std::deque<Bucket>& buckets = m_buckets_for[n_bucket]; |
392 | | |
393 | 107k | for(size_t i = 0; i != buckets.size(); ++i) |
394 | 107k | { |
395 | 107k | Bucket& bucket = buckets[i]; |
396 | 107k | if(bucket.free(p)) |
397 | 102k | { |
398 | 102k | if(bucket.empty()) |
399 | 55.5k | { |
400 | | #if defined(BOTAN_MEM_POOL_USE_MMU_PROTECTIONS) |
401 | | OS::page_prohibit_access(bucket.ptr()); |
402 | | #endif |
403 | 55.5k | m_free_pages.push_back(bucket.ptr()); |
404 | | |
405 | 55.5k | if(i != buckets.size() - 1) |
406 | 483 | std::swap(buckets.back(), buckets[i]); |
407 | 55.5k | buckets.pop_back(); |
408 | 55.5k | } |
409 | 102k | return true; |
410 | 102k | } |
411 | 107k | } |
412 | 102k | } |
413 | 102k | catch(...) |
414 | 102k | { |
415 | | /* |
416 | | * The only exception throws that can occur in the above code are from |
417 | | * either the STL or BOTAN_ASSERT failures. In either case, such an |
418 | | * error indicates a logic error or data corruption in the memory |
419 | | * allocator such that it is no longer safe to continue executing. |
420 | | * |
421 | | * Since this function is noexcept, simply letting the exception escape |
422 | | * is sufficient for terminate to be called. However in this scenario |
423 | | * it is implementation defined if any stack unwinding is performed. |
424 | | * Since stack unwinding could cause further memory deallocations this |
425 | | * could result in further corruption in this allocator state. To prevent |
426 | | * this, call terminate directly. |
427 | | */ |
428 | 0 | std::terminate(); |
429 | 0 | } |
430 | 102k | } |
431 | | |
432 | 0 | return false; |
433 | 102k | } |
434 | | |
435 | | } |