/proc/self/cwd/cpp/htmlparser/allocator.h
Line | Count | Source (jump to first uncovered line) |
1 | | // An average HTML DOM tree consists of hundreds or thousands of nodes and |
2 | | // their attributes. The malloc/new and free/delete are very expensive |
3 | | // operations for small objects and htmlparser spends majority of the time in |
4 | | // allocation and deallocation of these small objects. |
5 | | // |
6 | | // Usage: |
7 | | // One Allocator for each type. This approach simplifies the design and |
8 | | // alignment requirements of different types. |
9 | | // |
10 | | // // Allocator for Node with 8k blocks, and 8 byte alignment boundary. |
11 | | // Allocator<Node> node_allocator(8192); |
12 | | // |
13 | | // // Allocator for Attributes with 4k blocks, and 1 byte alignment. |
14 | | // Allocator<Attribute> attr_allocator(4096); |
15 | | // |
16 | | // // Default block size. |
17 | | // Allocator<Node> node_allocator; // Block size = OS page_size. |
18 | | // |
19 | | // The allocator takes care of alignment requirements of the objects. The blocks |
20 | | // are always created in the multiples of page_size. For the following Allocator |
21 | | // the block size is upgraded to 4096 for 4k page_size or 8192 for 8k page_size. |
22 | | // |
23 | | // Allocator<Node> node_allocator(3000); // Block size = 4096. |
24 | | // |
25 | | // To construct the object: |
26 | | // Node* node = node_allocator.Construct(NodeType::ELEMENT_NODE); |
27 | | // |
28 | | // The construct method supports varargs. Following forwards 4 parameters to the |
29 | | // AboutMe constructor. |
30 | | // Allocator<AboutMe> alloc; |
31 | | // AboutMe* name = alloc.Construct("John", "Doe", 94043, MALE); |
32 | | // |
33 | | // THREAD SAFETY: Allocator is not thread safe. Multiple threads cannot call |
34 | | // Allocate() simultaneously. In multi-threaded program a thread local allocator |
35 | | // per thread is recommended. |
36 | | // |
37 | | // |
38 | | // --------------------------------------------------------------------------- |
39 | | // Internals for code reviewers: |
40 | | // --------------------------------------------------------------------------- |
41 | | // The allocator maintains linked list of blocks of memory buffers for |
42 | | // allocation of small objects. It allocates blocks on demand, one block at a |
43 | | // time. |
44 | | // |
45 | | // The block is always multiple of page size. It is rounded to nearest page size |
46 | | // multiple in case caller provided a different size which is not multiple of |
47 | | // page size. So a 3000 block size request becomes 4096 block and 5000 block |
48 | | // size request becomes 8192 block size respectively on a 4k page size kernel. |
49 | | // |
50 | | // The block_size is the only configurable parameter, and depends on three |
51 | | // fators: A) Object size, B) Number of objects and C) Alignment boundary. |
52 | | // |
53 | | // |
54 | | // Alignment requirements: |
55 | | // Every object type has a property called alignment requirement, which is an |
56 | | // integer value of type std::size_t representing the number of bytes between |
57 | | // successive addresses at which objects of this type can be allocated. |
58 | | // The valid alignment values are non-negative integral powers of two. |
59 | | // |
60 | | // We call an object naturally aligned if its address is aligned to its size. |
61 | | // It's called misaligned otherwise. For example, an 8-byte floating-point data |
62 | | // is naturally aligned if the address used to identify it has an 8-byte |
63 | | // alignment. |
64 | | // |
65 | | // Following data struture contains members totaling 13 bytes, but it's actual |
66 | | // size is 24 bytes due to 8 byte alignment. |
67 | | // |
68 | | // Alignment is always equal to the largest sized element in the structure. |
69 | | // |
70 | | // struct { |
71 | | // int32_t a; // 4 bytes. |
72 | | // int64_t b; // 8 bytes. |
73 | | // char c; // 1 byte. |
74 | | // }; |
75 | | // |
76 | | // The above data structure will be padded by compiler to satisfy alignment |
77 | | // requirement. |
78 | | // struct { |
79 | | // int32_t a; // 4 byte 0x0000 |
80 | | // char _pad[4]; // compiler added 8 byte padding. |
81 | | // int64_t b; // 8 byte (now at 8 byte alignment). 0x0008 |
82 | | // char c; // 1 byte (also at 8 byte alignment). 0x0010 |
83 | | // char _pad[7] // compiler added padding |
84 | | // }; |
85 | | // |
86 | | // The same data structure if fields are re-organized will consume 16 bytes, |
87 | | // still at 8 byte alignment. |
88 | | // struct { |
89 | | // int32_t a; // 4 byte at 0x00000 (like above). 0x0000 |
90 | | // char c; // 1 byte char. 0x0001 |
91 | | // chr _pad[3] // 3 bytes for 8 byte alignment. |
92 | | // int64_t b; // at 8 byte alignment. 0x0010 |
93 | | // }; |
94 | | // |
95 | | // Allocator computes the alignment requirement of the Type and starts the block |
96 | | // at the address which is multiple of 8, and hence we say the block is aligned. |
97 | | // |
98 | | // +-------------------------------------------------------------------------+ |
99 | | // | Address | Block 1 | Address | Block 2 | Address | Block 3 | |
100 | | // +----------+-------------+-----------+------------+----------+------------+ |
101 | | // | 10 | waste | 120 | Obj22 | 164 | waste | |
102 | | // | 11 | waste | 121 | | 165 | waste | |
103 | | // | 12 | waste | 122 | | 166 | waste | |
104 | | // | 13 | waste | 123 | | 167 | waste | |
105 | | // | 14 | waste | 124 | | 168 | Obj33 | |
106 | | // | 15 | waste | 125 | | 169 | | |
107 | | // | 16 | Obj1 | 126 | | 170 | | |
108 | | // | 17 | | 127 | | 171 | | |
109 | | // | N | | N | | N | | |
110 | | // +-------------------------------------------------------------------------+ |
111 | | // |
112 | | // In the above diagram there are three blocks with alignment of 8. |
113 | | // |
114 | | // Block one start address is 16 so 6 bytes are skipped (or wasted). |
115 | | // Block two start address is 120 since it is multiple of 8. |
116 | | // Block three start address is 68 causing 4 bytes to skip (or wasted). |
117 | | // |
118 | | // The block alignment is done by std::align or AlignFreeAddress method. |
119 | | // |
120 | | // The block is considered full when the remaining bytes are less than the |
121 | | // object size. So the remaining bytes in the block are skipped and new block |
122 | | // created. In our tests for 500 objects of 144 bytes and 4096 bytes block |
123 | | // about 1024 bytes wasted. |
124 | | // |
125 | | // Since objects size is always multiple of alignment, subsequent objects can be |
126 | | // allocated sequentially in the block. So if the object size is 96 bytes, the |
127 | | // second object in block 1 will be allocated 112 address in block 1. |
128 | | // |
129 | | // A block is allocated using operator new at the time of block initialization |
130 | | // and start address aligned. See NewBlock(); |
131 | | // |
132 | | // A block is freed at the time of destruction of the allocator object. |
133 | | // The allocator takes complete ownership of the objects it allocates, client is |
134 | | // not responsible for deallocating or destroying the objects allocated by this |
135 | | // allocator. Allocator will call the destructors of the objects it allocated. |
136 | | // See ~Allocator() and Destroy() method. |
137 | | // |
138 | | // IMPORTANT: Tree like structure must not destroy the child nodes or sibling |
139 | | // nodes. Allocator destroys all the objects and call its destructor, it is an |
140 | | // error to invoke destructors on objects allocated by this allocator. Allocator |
141 | | // is the master owner of all the objects. Client treats all objects as const |
142 | | // pointer as far as destruction goes. |
143 | | // |
144 | | // It is not possible to destroy random objects or free up the slots to be |
145 | | // re-used for allocation of objects. This allocator is used for html parsing |
146 | | // which should free up the resources after document parsing. A singleton |
147 | | // allocator for example will keep growing unless Reset() is called upon each |
148 | | // new parsing. |
149 | | // |
150 | | // Bit shifting syntax used in this source: |
151 | | // A) To round the block size to nearest page size (4096) multiple: |
152 | | // ((block_size - 1) | (page_size - 1) + 1. |
153 | | // Examples: |
154 | | // - block_size = 3000; |
155 | | // (3000 - 1) | (4096 - 1) + 1 = 4096. |
156 | | // - block_size = 5000; |
157 | | // (5000 - 1) | (4096 - 1) + 1 = 8096. |
158 | | // |
159 | | // B) If N is multiple of A. |
160 | | // (N & (A - 1)) == 0 |
161 | | // This can also be achieved by modulo operator N % A == 0. |
162 | | // Examples: |
163 | | // - 40 is multiple of 8? |
164 | | // 40 & (8 - 1) == 0; |
165 | | // - 62 is multiple of 4? |
166 | | // 60 & (4 - 1) != 0; |
167 | | |
168 | | #ifndef CPP_HTMLPARSER_ALLOCATOR_H_ |
169 | | #define CPP_HTMLPARSER_ALLOCATOR_H_ |
170 | | |
171 | | #include <unistd.h> // For getpagesize() |
172 | | |
173 | | #include <array> |
174 | | #include <cstring> |
175 | | #include <memory> |
176 | | #include <tuple> |
177 | | #include <vector> |
178 | | |
179 | | namespace htmlparser { |
180 | | |
181 | | template <class T> |
182 | | class Allocator { |
183 | | public: |
184 | | explicit Allocator(std::size_t block_size = 0) : |
185 | | alignment_(std::alignment_of_v<T>), |
186 | | // Rounds the block_size_ to page size multiple. |
187 | | // 3000 becomes 4096 and 5000 becomes 8192 for 4k page size OS. |
188 | | block_size_( |
189 | | ((block_size < 1 ? 0 : block_size - 1) | (getpagesize() - 1)) + 1), |
190 | | object_size_(sizeof(T)), |
191 | | remaining_(0), |
192 | | next_free_(nullptr), |
193 | | blocks_allocated_(0), |
194 | 11.5k | block_(nullptr) {} |
195 | | |
196 | 11.5k | ~Allocator() { |
197 | 11.5k | FreeBlocks(); |
198 | 11.5k | } |
199 | | |
200 | | Allocator(const Allocator&) = delete; |
201 | | Allocator& operator=(const Allocator&) = delete; |
202 | | |
203 | | // Allocates memory of same size required to construct object of type T. |
204 | | // Returns nullptr if alloction failed. |
205 | 24.3M | void* Allocate() { |
206 | | // Checks if remaining bytes in block are less than object size, or |
207 | | // reamining bytes after alignment is less than object size. |
208 | | // Add a new block. |
209 | 24.3M | if (object_size_ > remaining_ || !AlignFreeAddress()) { |
210 | 24.4k | if (!NewBlock()) return nullptr; |
211 | 24.4k | } |
212 | | |
213 | | // Get the address for object allocation. |
214 | 24.3M | last_alloc_ = next_free_; |
215 | 24.3M | remaining_ -= object_size_; |
216 | | // Move the pointer for next object. |
217 | 24.3M | next_free_ += object_size_; |
218 | 24.3M | return static_cast<void*>(last_alloc_); |
219 | 24.3M | } |
220 | | |
221 | | // Allocates memory and constructs the object. |
222 | | // Returns nullptr if memory allocation failed. |
223 | | template <typename ...Args> |
224 | 24.3M | T* Construct(Args&& ...args) { |
225 | 24.3M | void* mem = Allocate(); |
226 | | // New object or nullptr if memory allocation failed. |
227 | 24.3M | return mem ? new (mem) T(std::forward<Args>(args)...) : nullptr; |
228 | 24.3M | } |
229 | | |
230 | | // Default constructor. |
231 | | T* Construct() { |
232 | | void* mem = Allocate(); |
233 | | return mem ? new (mem) T() : nullptr; |
234 | | } |
235 | | |
236 | | // Deallocates the blocks and restores the allocator for reuse. |
237 | | void Reset() { |
238 | | FreeBlocks(); |
239 | | next_free_ = nullptr; |
240 | | remaining_ = 0; |
241 | | } |
242 | | |
243 | | // Used only by test or development environment. |
244 | | std::tuple<int /*alignment*/, |
245 | | int /*block_size*/, |
246 | | int /*object_size*/, |
247 | | unsigned char* /*last_alloc*/, |
248 | | unsigned char* /*next_free*/, |
249 | | std::size_t /*remaining*/, |
250 | | uint32_t /*blocks_allocated*/> DebugInfo() const { |
251 | | return {alignment_, block_size_, object_size_, last_alloc_, next_free_, |
252 | | remaining_, blocks_allocated_}; |
253 | | } |
254 | | |
255 | | private: |
256 | | struct Block { |
257 | | Block* previous; |
258 | | void* buf; |
259 | | }; |
260 | | |
261 | | // Deallocates the blocks. |
262 | 11.5k | void FreeBlocks() { |
263 | 11.5k | Block* current_block = block_; |
264 | 11.5k | bool partial = true; |
265 | 35.9k | while (current_block != nullptr) { |
266 | 24.4k | Block* previous = current_block->previous; |
267 | 24.4k | Destroy(current_block, partial); |
268 | 24.4k | partial = false; |
269 | 24.4k | FreeBlockMemory(current_block); |
270 | 24.4k | delete current_block; |
271 | 24.4k | blocks_allocated_--; |
272 | 24.4k | current_block = previous; |
273 | 24.4k | } |
274 | 11.5k | } |
275 | | |
276 | | // Creates a new block. |
277 | 24.4k | bool NewBlock() { |
278 | 24.4k | Block* block = new Block; |
279 | 24.4k | block->previous = block_; |
280 | 24.4k | if (alignment_ <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) { |
281 | 24.4k | block->buf = ::operator new(block_size_); |
282 | 24.4k | } else { |
283 | 0 | block->buf = ::operator new(block_size_, std::align_val_t(alignment_)); |
284 | 0 | } |
285 | 24.4k | std::memset(block->buf, 0, block_size_); |
286 | | // Align the block to alignment boundary. |
287 | | // |
288 | | // The adjusted space may be lesser than block size. |
289 | 24.4k | std::size_t adjusted_space = block_size_; |
290 | 24.4k | if (alignment_ > 1) { |
291 | 24.4k | if (block->buf = std::align(alignment_, block_size_, block->buf, |
292 | 24.4k | adjusted_space); |
293 | 24.4k | block->buf == nullptr) { |
294 | | // In case std::align fails, try manual alignment. |
295 | 0 | if (!AlignFreeAddress()) { |
296 | 0 | return false; |
297 | 0 | } |
298 | 24.4k | } else if (adjusted_space < object_size_) { |
299 | 0 | return false; |
300 | 0 | } |
301 | 24.4k | } |
302 | | |
303 | 24.4k | next_free_ = static_cast<unsigned char*>(block->buf); |
304 | 24.4k | remaining_ = adjusted_space; |
305 | 24.4k | blocks_allocated_++; |
306 | 24.4k | block_ = block; |
307 | 24.4k | return true; |
308 | 24.4k | } |
309 | | |
310 | 24.4k | void FreeBlockMemory(Block* block) { |
311 | 24.4k | if (alignment_ <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) { |
312 | 24.4k | ::operator delete(block->buf, block_size_); |
313 | 24.4k | block->buf = nullptr; |
314 | 24.4k | } else { |
315 | 0 | ::operator delete(block->buf, block_size_, std::align_val_t(alignment_)); |
316 | 0 | block->buf = nullptr; |
317 | 0 | } |
318 | 24.4k | } |
319 | | |
320 | | // Destroys the T objects allocated and constructed using this block. |
321 | | // If the block is only partially consumed, destroys only objects allocated |
322 | | // in the partial space. |
323 | 24.4k | void Destroy(Block* block, bool partial_allocation = false) { |
324 | 24.4k | unsigned char* t_ptr = static_cast<unsigned char*>(block->buf); |
325 | 24.4k | unsigned char* block_end = t_ptr + block_size_; |
326 | 24.4k | std::size_t remaining = block_end - t_ptr; |
327 | | // All offsets are aligned, so simply increment and destroy. |
328 | 24.4k | if ((object_size_ & (alignment_ - 1)) == 0) { |
329 | 24.3M | while (remaining >= object_size_) { |
330 | 24.3M | reinterpret_cast<T*>(t_ptr)->~T(); |
331 | 24.3M | t_ptr += object_size_; |
332 | 24.3M | if (partial_allocation && t_ptr >= next_free_) { |
333 | 11.5k | return; |
334 | 11.5k | } |
335 | 24.3M | remaining -= object_size_; |
336 | 24.3M | } |
337 | 24.4k | } |
338 | 24.4k | } |
339 | | |
340 | | // If the block's address is not aligned, moves the pointer to the address |
341 | | // that is multiple of aligment_. |
342 | 24.3M | bool AlignFreeAddress() { |
343 | | // Checks how many bytes to skip to be at the correct alignment. |
344 | 24.3M | if (const std::size_t skip = |
345 | 24.3M | reinterpret_cast<std::size_t>(next_free_) & (alignment_ - 1); |
346 | 24.3M | skip > 0) { |
347 | 0 | auto waste = remaining_ - skip; |
348 | 0 | if (waste >= remaining_) return false; |
349 | 0 | next_free_ += waste; |
350 | 0 | remaining_ -= waste; |
351 | 0 | } |
352 | | |
353 | 24.3M | return true; |
354 | 24.3M | } |
355 | | |
356 | | const std::size_t alignment_; |
357 | | const std::size_t block_size_; |
358 | | const std::size_t object_size_; |
359 | | std::size_t remaining_; |
360 | | unsigned char* last_alloc_; |
361 | | unsigned char* next_free_; |
362 | | uint32_t blocks_allocated_; |
363 | | |
364 | | Block* block_; |
365 | | }; |
366 | | |
367 | | } // namespace htmlparser |
368 | | |
369 | | #endif // CPP_HTMLPARSER_ALLOCATOR_H_ |