Coverage Report

Created: 2022-06-23 06:44

/src/botan/src/lib/utils/mem_pool/mem_pool.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2018,2019 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/internal/mem_pool.h>
8
#include <botan/mem_ops.h>
9
#include <algorithm>
10
11
#if defined(BOTAN_MEM_POOL_USE_MMU_PROTECTIONS)
12
   #include <botan/internal/os_utils.h>
13
#endif
14
15
namespace Botan {
16
17
/*
18
* Memory pool theory of operation
19
*
20
* This allocator is not useful for general purpose but works well within the
21
* context of allocating cryptographic keys. It makes several assumptions which
22
* don't work for implementing malloc but simplify and speed up the implementation:
23
*
24
* - There is some set of pages, which cannot be expanded later. These are pages
25
*   which were allocated, mlocked and passed to the Memory_Pool constructor.
26
*
27
* - The allocator is allowed to return null anytime it feels like not servicing
28
*   a request, in which case the request will be sent to calloc instead. In
29
*   particular, requests which are too small or too large are rejected.
30
*
31
* - Most allocations are powers of 2, the remainder are usually a multiple of 8
32
*
33
* - Free requests include the size of the allocation, so there is no need to
34
*   track this within the pool.
35
*
36
* - Alignment is important to the caller. For this allocator, any allocation of
37
*   size N is aligned evenly at N bytes.
38
*
39
* Initially each page is in the free page list. Each page is used for just one
40
* size of allocation, with requests bucketed into a small number of common
41
* sizes. If the allocation would be too big or too small it is rejected by the pool.
42
*
43
* The free list is maintained by a bitmap, one per page/Bucket. Since each
44
* Bucket only maintains objects of a single size, each bit set or clear
45
* indicates the status of one object.
46
*
47
* An allocation walks the list of buckets and asks each in turn if there is
48
* space. If a Bucket does not have any space, it sets a boolean flag m_is_full
49
* so that it does not need to rescan when asked again. The flag is cleared on
50
* first free from that bucket. If no bucket has space, but there are some free
51
* pages left, a free page is claimed as a new Bucket for that size. In this case
52
* it is pushed to the front of the list so it is first in line to service new
53
* requests.
54
*
55
* A deallocation also walks the list of buckets for the size and asks each
56
* Bucket in turn if it recognizes the pointer. When a Bucket becomes empty as a
57
* result of a deallocation, it is recycled back into the free pool. When this
58
* happens, the Buckets page goes to the end of the free list. All pages on the
59
* free list are marked in the MMU as noaccess, so anything touching them will
60
* immediately crash. They are only marked R/W once placed into a new bucket.
61
* Making the free list FIFO maximizes the time between the last free of a bucket
62
* and that page being writable again, maximizing chances of crashing after a
63
* use-after-free.
64
*
65
* Future work
66
* -------------
67
*
68
* The allocator is protected by a global lock. It would be good to break this
69
* up, since almost all of the work can actually be done in parallel especially
70
* when allocating objects of different sizes (which can't possibly share a
71
* bucket).
72
*
73
* It may be worthwhile to optimize deallocation by storing the Buckets in order
74
* (by pointer value) which would allow binary search to find the owning bucket.
75
*
76
* A useful addition would be to randomize the allocations. Memory_Pool would be
77
* changed to receive also a RandomNumberGenerator& object (presumably the system
78
* RNG, or maybe a ChaCha_RNG seeded with system RNG). Then the bucket to use and
79
* the offset within the bucket would be chosen randomly, instead of using first fit.
80
*
81
* Right now we don't make any provision for threading, so if two threads both
82
* allocate 32 byte values one after the other, the two allocations will likely
83
* share a cache line. Ensuring that distinct threads will (tend to) use distinct
84
* buckets would reduce this.
85
*
86
* Supporting a realloc-style API may be useful.
87
*/
88
89
namespace {
90
91
size_t choose_bucket(size_t n)
92
260k
   {
93
260k
   const size_t MINIMUM_ALLOCATION = 16;
94
260k
   const size_t MAXIMUM_ALLOCATION = 256;
95
96
260k
   if(n < MINIMUM_ALLOCATION || n > MAXIMUM_ALLOCATION)
97
5.80k
      return 0;
98
99
   // Need to tune these
100
101
254k
   const size_t buckets[] = {
102
254k
      16, 24, 32, 48, 64, 80, 96, 112, 128, 160, 192, 256, 0,
103
254k
   };
104
105
936k
   for(size_t i = 0; buckets[i]; ++i)
106
936k
      {
107
936k
      if(n <= buckets[i])
108
254k
         {
109
254k
         return buckets[i];
110
254k
         }
111
936k
      }
112
113
0
   return 0;
114
254k
   }
115
116
inline bool ptr_in_pool(const void* pool_ptr, size_t poolsize,
117
                        const void* buf_ptr, size_t bufsize)
118
107k
   {
119
107k
   const uintptr_t pool = reinterpret_cast<uintptr_t>(pool_ptr);
120
107k
   const uintptr_t buf = reinterpret_cast<uintptr_t>(buf_ptr);
121
107k
   return (buf >= pool) && (buf + bufsize <= pool + poolsize);
122
107k
   }
123
124
// return index of first set bit
125
template<typename T>
126
size_t find_set_bit(T b)
127
134k
   {
128
134k
   size_t s = 8*sizeof(T) / 2;
129
134k
   size_t bit = 0;
130
131
   // In this context we don't need to be const-time
132
938k
   while(s > 0)
133
804k
      {
134
804k
      const T mask = (static_cast<T>(1) << s) - 1;
135
804k
      if((b & mask) == 0)
136
164k
         {
137
164k
         bit += s;
138
164k
         b >>= s;
139
164k
         }
140
804k
      s /= 2;
141
804k
      }
142
143
134k
   return bit;
144
134k
   }
145
146
class BitMap final
147
   {
148
   public:
149
      explicit BitMap(size_t bits) : m_len(bits)
150
57.8k
         {
151
57.8k
         m_bits.resize((bits + BITMASK_BITS - 1) / BITMASK_BITS);
152
57.8k
         m_main_mask = static_cast<bitmask_type>(~0);
153
57.8k
         m_last_mask = m_main_mask;
154
155
57.8k
         if(bits % BITMASK_BITS != 0)
156
21.0k
            m_last_mask = (static_cast<bitmask_type>(1) << (bits % BITMASK_BITS)) - 1;
157
57.8k
         }
158
159
      bool find_free(size_t* bit);
160
161
      void free(size_t bit)
162
102k
         {
163
102k
         BOTAN_ASSERT_NOMSG(bit <= m_len);
164
102k
         const size_t w = bit / BITMASK_BITS;
165
102k
         BOTAN_ASSERT_NOMSG(w < m_bits.size());
166
102k
         const bitmask_type mask = static_cast<bitmask_type>(1) << (bit % BITMASK_BITS);
167
102k
         m_bits[w] = m_bits[w] & (~mask);
168
102k
         }
169
170
      bool empty() const
171
102k
         {
172
102k
         for(auto bitset : m_bits)
173
205k
            {
174
205k
            if(bitset != 0)
175
47.4k
               {
176
47.4k
               return false;
177
47.4k
               }
178
205k
            }
179
180
55.5k
         return true;
181
102k
         }
182
183
  private:
184
#if defined(BOTAN_ENABLE_DEBUG_ASSERTS)
185
      typedef uint8_t bitmask_type;
186
      enum { BITMASK_BITS = 8 };
187
#else
188
      typedef word bitmask_type;
189
      enum { BITMASK_BITS = BOTAN_MP_WORD_BITS };
190
#endif
191
192
      size_t m_len;
193
      bitmask_type m_main_mask;
194
      bitmask_type m_last_mask;
195
      std::vector<bitmask_type> m_bits;
196
   };
197
198
bool BitMap::find_free(size_t* bit)
199
134k
   {
200
156k
   for(size_t i = 0; i != m_bits.size(); ++i)
201
155k
      {
202
155k
      const bitmask_type mask = (i == m_bits.size() - 1) ? m_last_mask : m_main_mask;
203
155k
      if((m_bits[i] & mask) != mask)
204
134k
         {
205
134k
         const size_t free_bit = find_set_bit(~m_bits[i]);
206
134k
         const bitmask_type bmask = static_cast<bitmask_type>(1) << (free_bit % BITMASK_BITS);
207
134k
         BOTAN_ASSERT_NOMSG((m_bits[i] & bmask) == 0);
208
134k
         m_bits[i] |= bmask;
209
134k
         *bit = BITMASK_BITS*i + free_bit;
210
134k
         return true;
211
134k
         }
212
155k
      }
213
214
566
   return false;
215
134k
   }
216
217
}
218
219
class Bucket final
220
   {
221
   public:
222
      Bucket(uint8_t* mem, size_t mem_size, size_t item_size) :
223
         m_item_size(item_size),
224
         m_page_size(mem_size),
225
         m_range(mem),
226
         m_bitmap(mem_size / item_size),
227
         m_is_full(false)
228
57.8k
         {
229
57.8k
         }
230
231
      uint8_t* alloc()
232
143k
         {
233
143k
         if(m_is_full)
234
8.44k
            {
235
            // I know I am full
236
8.44k
            return nullptr;
237
8.44k
            }
238
239
134k
         size_t offset;
240
134k
         if(!m_bitmap.find_free(&offset))
241
566
            {
242
            // I just found out I am full
243
566
            m_is_full = true;
244
566
            return nullptr;
245
566
            }
246
247
134k
         BOTAN_ASSERT(offset * m_item_size < m_page_size, "Offset is in range");
248
134k
         return m_range + m_item_size*offset;
249
134k
         }
250
251
      bool free(void* p)
252
107k
         {
253
107k
         if(!in_this_bucket(p))
254
4.34k
            return false;
255
256
         /*
257
         Zero also any trailing bytes, which should not have been written to,
258
         but maybe the user was bad and wrote past the end.
259
         */
260
102k
         std::memset(p, 0, m_item_size);
261
262
102k
         const size_t offset = (reinterpret_cast<uintptr_t>(p) - reinterpret_cast<uintptr_t>(m_range)) / m_item_size;
263
264
102k
         m_bitmap.free(offset);
265
102k
         m_is_full = false;
266
267
102k
         return true;
268
107k
         }
269
270
      bool in_this_bucket(void* p) const
271
107k
         {
272
107k
         return ptr_in_pool(m_range, m_page_size, p, m_item_size);
273
107k
         }
274
275
      bool empty() const
276
102k
         {
277
102k
         return m_bitmap.empty();
278
102k
         }
279
280
      uint8_t* ptr() const
281
55.5k
         {
282
55.5k
         return m_range;
283
55.5k
         }
284
285
   private:
286
      size_t m_item_size;
287
      size_t m_page_size;
288
      uint8_t* m_range;
289
      BitMap m_bitmap;
290
      bool m_is_full;
291
   };
292
293
Memory_Pool::Memory_Pool(const std::vector<void*>& pages, size_t page_size) :
294
   m_page_size(page_size)
295
1.12k
   {
296
1.12k
   m_min_page_ptr = ~static_cast<uintptr_t>(0);
297
1.12k
   m_max_page_ptr = 0;
298
299
1.12k
   for(auto page : pages)
300
4.50k
      {
301
4.50k
      const uintptr_t p = reinterpret_cast<uintptr_t>(page);
302
303
4.50k
      m_min_page_ptr = std::min(p, m_min_page_ptr);
304
4.50k
      m_max_page_ptr = std::max(p, m_max_page_ptr);
305
306
4.50k
      clear_bytes(page, m_page_size);
307
#if defined(BOTAN_MEM_POOL_USE_MMU_PROTECTIONS)
308
      OS::page_prohibit_access(page);
309
#endif
310
4.50k
      m_free_pages.push_back(static_cast<uint8_t*>(page));
311
4.50k
      }
312
313
   /*
314
   Right now this points to the start of the last page, adjust it to instead
315
   point to the first byte of the following page
316
   */
317
1.12k
   m_max_page_ptr += page_size;
318
1.12k
   }
319
320
Memory_Pool::~Memory_Pool()
321
1.12k
   {
322
#if defined(BOTAN_MEM_POOL_USE_MMU_PROTECTIONS)
323
   for(size_t i = 0; i != m_free_pages.size(); ++i)
324
      {
325
      OS::page_allow_access(m_free_pages[i]);
326
      }
327
#endif
328
1.12k
   }
329
330
void* Memory_Pool::allocate(size_t n)
331
157k
   {
332
157k
   if(n > m_page_size)
333
0
      return nullptr;
334
335
157k
   const size_t n_bucket = choose_bucket(n);
336
337
157k
   if(n_bucket > 0)
338
151k
      {
339
151k
      lock_guard_type<mutex_type> lock(m_mutex);
340
341
151k
      std::deque<Bucket>& buckets = m_buckets_for[n_bucket];
342
343
      /*
344
      It would be optimal to pick the bucket with the most usage,
345
      since a bucket with say 1 item allocated out of it has a high
346
      chance of becoming later freed and then the whole page can be
347
      recycled.
348
      */
349
151k
      for(auto& bucket : buckets)
350
85.2k
         {
351
85.2k
         if(uint8_t* p = bucket.alloc())
352
76.2k
            return p;
353
354
         // If the bucket is full, maybe move it to the end of the list?
355
         // Otoh bucket search should be very fast
356
85.2k
         }
357
358
75.7k
      if(!m_free_pages.empty())
359
57.8k
         {
360
57.8k
         uint8_t* ptr = m_free_pages[0];
361
57.8k
         m_free_pages.pop_front();
362
#if defined(BOTAN_MEM_POOL_USE_MMU_PROTECTIONS)
363
         OS::page_allow_access(ptr);
364
#endif
365
57.8k
         buckets.push_front(Bucket(ptr, m_page_size, n_bucket));
366
57.8k
         void* p = buckets[0].alloc();
367
57.8k
         BOTAN_ASSERT_NOMSG(p != nullptr);
368
57.8k
         return p;
369
57.8k
         }
370
75.7k
      }
371
372
   // out of room
373
23.6k
   return nullptr;
374
157k
   }
375
376
bool Memory_Pool::deallocate(void* p, size_t len) noexcept
377
102k
   {
378
   // Do a fast range check first, before taking the lock
379
102k
   const uintptr_t p_val = reinterpret_cast<uintptr_t>(p);
380
102k
   if(p_val < m_min_page_ptr || p_val > m_max_page_ptr)
381
0
      return false;
382
383
102k
   const size_t n_bucket = choose_bucket(len);
384
385
102k
   if(n_bucket != 0)
386
102k
      {
387
102k
      try
388
102k
         {
389
102k
         lock_guard_type<mutex_type> lock(m_mutex);
390
391
102k
         std::deque<Bucket>& buckets = m_buckets_for[n_bucket];
392
393
107k
         for(size_t i = 0; i != buckets.size(); ++i)
394
107k
            {
395
107k
            Bucket& bucket = buckets[i];
396
107k
            if(bucket.free(p))
397
102k
               {
398
102k
               if(bucket.empty())
399
55.5k
                  {
400
#if defined(BOTAN_MEM_POOL_USE_MMU_PROTECTIONS)
401
                  OS::page_prohibit_access(bucket.ptr());
402
#endif
403
55.5k
                  m_free_pages.push_back(bucket.ptr());
404
405
55.5k
                  if(i != buckets.size() - 1)
406
483
                     std::swap(buckets.back(), buckets[i]);
407
55.5k
                  buckets.pop_back();
408
55.5k
                  }
409
102k
               return true;
410
102k
               }
411
107k
            }
412
102k
         }
413
102k
      catch(...)
414
102k
         {
415
         /*
416
         * The only exception throws that can occur in the above code are from
417
         * either the STL or BOTAN_ASSERT failures. In either case, such an
418
         * error indicates a logic error or data corruption in the memory
419
         * allocator such that it is no longer safe to continue executing.
420
         *
421
         * Since this function is noexcept, simply letting the exception escape
422
         * is sufficient for terminate to be called. However in this scenario
423
         * it is implementation defined if any stack unwinding is performed.
424
         * Since stack unwinding could cause further memory deallocations this
425
         * could result in further corruption in this allocator state. To prevent
426
         * this, call terminate directly.
427
         */
428
0
         std::terminate();
429
0
         }
430
102k
      }
431
432
0
   return false;
433
102k
   }
434
435
}