/src/serenity/Userland/Libraries/LibSQL/Heap.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2021, Jan de Visser <jan@de-visser.net> |
3 | | * Copyright (c) 2023, Jelle Raaijmakers <jelle@gmta.nl> |
4 | | * |
5 | | * SPDX-License-Identifier: BSD-2-Clause |
6 | | */ |
7 | | |
8 | | #include <AK/ByteString.h> |
9 | | #include <AK/Format.h> |
10 | | #include <AK/QuickSort.h> |
11 | | #include <LibCore/System.h> |
12 | | #include <LibSQL/Heap.h> |
13 | | #include <sys/stat.h> |
14 | | |
15 | | namespace SQL { |
16 | | |
17 | | ErrorOr<NonnullRefPtr<Heap>> Heap::create(ByteString file_name) |
18 | 0 | { |
19 | 0 | return adopt_nonnull_ref_or_enomem(new (nothrow) Heap(move(file_name))); |
20 | 0 | } |
21 | | |
22 | | Heap::Heap(ByteString file_name) |
23 | 0 | : m_name(move(file_name)) |
24 | 0 | { |
25 | 0 | } |
26 | | |
27 | | Heap::~Heap() |
28 | 0 | { |
29 | 0 | if (m_file && !m_write_ahead_log.is_empty()) { |
30 | 0 | if (auto maybe_error = flush(); maybe_error.is_error()) |
31 | 0 | warnln("~Heap({}): {}", name(), maybe_error.error()); |
32 | 0 | } |
33 | 0 | } |
34 | | |
35 | | ErrorOr<void> Heap::open() |
36 | 0 | { |
37 | 0 | VERIFY(!m_file); |
38 | | |
39 | 0 | size_t file_size = 0; |
40 | 0 | struct stat stat_buffer; |
41 | 0 | if (stat(name().characters(), &stat_buffer) != 0) { |
42 | 0 | if (errno != ENOENT) { |
43 | 0 | warnln("Heap::open({}): could not stat: {}"sv, name(), strerror(errno)); |
44 | 0 | return Error::from_string_literal("Heap::open(): could not stat file"); |
45 | 0 | } |
46 | 0 | } else if (!S_ISREG(stat_buffer.st_mode)) { |
47 | 0 | warnln("Heap::open({}): can only use regular files"sv, name()); |
48 | 0 | return Error::from_string_literal("Heap::open(): can only use regular files"); |
49 | 0 | } else { |
50 | 0 | file_size = stat_buffer.st_size; |
51 | 0 | } |
52 | | |
53 | 0 | if (file_size > 0) { |
54 | 0 | m_next_block = file_size / Block::SIZE; |
55 | 0 | m_highest_block_written = m_next_block - 1; |
56 | 0 | } |
57 | |
|
58 | 0 | auto file = TRY(Core::File::open(name(), Core::File::OpenMode::ReadWrite)); |
59 | 0 | m_file = TRY(Core::InputBufferedFile::create(move(file))); |
60 | | |
61 | 0 | if (file_size > 0) { |
62 | 0 | if (auto error_maybe = read_zero_block(); error_maybe.is_error()) { |
63 | 0 | m_file = nullptr; |
64 | 0 | return error_maybe.release_error(); |
65 | 0 | } |
66 | 0 | } else { |
67 | 0 | TRY(initialize_zero_block()); |
68 | 0 | } |
69 | | |
70 | | // FIXME: We should more gracefully handle version incompatibilities. For now, we drop the database. |
71 | 0 | if (m_version != VERSION) { |
72 | 0 | dbgln_if(SQL_DEBUG, "Heap file {} opened has incompatible version {}. Deleting for version {}.", name(), m_version, VERSION); |
73 | 0 | m_file = nullptr; |
74 | |
|
75 | 0 | TRY(Core::System::unlink(name())); |
76 | 0 | return open(); |
77 | 0 | } |
78 | | |
79 | | // Perform a heap scan to find all free blocks |
80 | | // FIXME: this is very inefficient; store free blocks in a persistent heap structure |
81 | 0 | for (Block::Index index = 1; index <= m_highest_block_written; ++index) { |
82 | 0 | auto block_data = TRY(read_raw_block(index)); |
83 | 0 | auto size_in_bytes = *reinterpret_cast<u32*>(block_data.data()); |
84 | 0 | if (size_in_bytes == 0) |
85 | 0 | TRY(m_free_block_indices.try_append(index)); |
86 | 0 | } |
87 | | |
88 | 0 | dbgln_if(SQL_DEBUG, "Heap file {} opened; number of blocks = {}; free blocks = {}", name(), m_highest_block_written, m_free_block_indices.size()); |
89 | 0 | return {}; |
90 | 0 | } |
91 | | |
92 | | ErrorOr<size_t> Heap::file_size_in_bytes() const |
93 | 0 | { |
94 | 0 | TRY(m_file->seek(0, SeekMode::FromEndPosition)); |
95 | 0 | return TRY(m_file->tell()); |
96 | 0 | } |
97 | | |
98 | | bool Heap::has_block(Block::Index index) const |
99 | 0 | { |
100 | 0 | return (index <= m_highest_block_written || m_write_ahead_log.contains(index)) |
101 | 0 | && !m_free_block_indices.contains_slow(index); |
102 | 0 | } |
103 | | |
104 | | Block::Index Heap::request_new_block_index() |
105 | 0 | { |
106 | 0 | if (!m_free_block_indices.is_empty()) |
107 | 0 | return m_free_block_indices.take_last(); |
108 | 0 | return m_next_block++; |
109 | 0 | } |
110 | | |
111 | | ErrorOr<ByteBuffer> Heap::read_storage(Block::Index index) |
112 | 0 | { |
113 | 0 | dbgln_if(SQL_DEBUG, "{}({})", __FUNCTION__, index); |
114 | | |
115 | | // Reconstruct the data storage from a potential chain of blocks |
116 | 0 | ByteBuffer data; |
117 | 0 | while (index > 0) { |
118 | 0 | auto block = TRY(read_block(index)); |
119 | 0 | dbgln_if(SQL_DEBUG, " -> {} bytes", block.size_in_bytes()); |
120 | 0 | TRY(data.try_append(block.data().bytes().slice(0, block.size_in_bytes()))); |
121 | 0 | index = block.next_block(); |
122 | 0 | } |
123 | 0 | return data; |
124 | 0 | } |
125 | | |
126 | | ErrorOr<void> Heap::write_storage(Block::Index index, ReadonlyBytes data) |
127 | 0 | { |
128 | 0 | dbgln_if(SQL_DEBUG, "{}({}, {} bytes)", __FUNCTION__, index, data.size()); |
129 | 0 | if (index == 0) |
130 | 0 | return Error::from_string_view("Writing to zero block is not allowed"sv); |
131 | 0 | if (data.is_empty()) |
132 | 0 | return Error::from_string_view("Writing empty data is not allowed"sv); |
133 | 0 | if (m_free_block_indices.contains_slow(index)) |
134 | 0 | return Error::from_string_view("Invalid write to a free block index"sv); |
135 | | |
136 | | // Split up the storage across multiple blocks if necessary, creating a chain |
137 | 0 | u32 remaining_size = static_cast<u32>(data.size()); |
138 | 0 | u32 offset_in_data = 0; |
139 | 0 | Block::Index existing_next_block_index = 0; |
140 | 0 | while (remaining_size > 0) { |
141 | 0 | auto block_data_size = AK::min(remaining_size, Block::DATA_SIZE); |
142 | 0 | remaining_size -= block_data_size; |
143 | |
|
144 | 0 | ByteBuffer block_data; |
145 | 0 | if (has_block(index)) { |
146 | 0 | auto existing_block = TRY(read_block(index)); |
147 | 0 | block_data = existing_block.data(); |
148 | 0 | TRY(block_data.try_resize(block_data_size)); |
149 | 0 | existing_next_block_index = existing_block.next_block(); |
150 | 0 | } else { |
151 | 0 | block_data = TRY(ByteBuffer::create_uninitialized(block_data_size)); |
152 | 0 | existing_next_block_index = 0; |
153 | 0 | } |
154 | | |
155 | 0 | Block::Index next_block_index = existing_next_block_index; |
156 | 0 | if (next_block_index == 0 && remaining_size > 0) |
157 | 0 | next_block_index = request_new_block_index(); |
158 | 0 | else if (remaining_size == 0) |
159 | 0 | next_block_index = 0; |
160 | |
|
161 | 0 | block_data.bytes().overwrite(0, data.offset(offset_in_data), block_data_size); |
162 | 0 | TRY(write_block({ index, block_data_size, next_block_index, move(block_data) })); |
163 | | |
164 | 0 | index = next_block_index; |
165 | 0 | offset_in_data += block_data_size; |
166 | 0 | } |
167 | | |
168 | | // Free remaining blocks in existing chain, if any |
169 | 0 | if (existing_next_block_index > 0) |
170 | 0 | TRY(free_storage(existing_next_block_index)); |
171 | | |
172 | 0 | return {}; |
173 | 0 | } |
174 | | |
175 | | ErrorOr<ByteBuffer> Heap::read_raw_block(Block::Index index) |
176 | 0 | { |
177 | 0 | VERIFY(m_file); |
178 | 0 | VERIFY(index < m_next_block); |
179 | | |
180 | 0 | if (auto wal_entry = m_write_ahead_log.get(index); wal_entry.has_value()) |
181 | 0 | return wal_entry.value(); |
182 | | |
183 | 0 | TRY(m_file->seek(index * Block::SIZE, SeekMode::SetPosition)); |
184 | 0 | auto buffer = TRY(ByteBuffer::create_uninitialized(Block::SIZE)); |
185 | 0 | TRY(m_file->read_until_filled(buffer)); |
186 | 0 | return buffer; |
187 | 0 | } |
188 | | |
189 | | ErrorOr<Block> Heap::read_block(Block::Index index) |
190 | 0 | { |
191 | 0 | dbgln_if(SQL_DEBUG, "{}({})", __FUNCTION__, index); |
192 | |
|
193 | 0 | auto buffer = TRY(read_raw_block(index)); |
194 | 0 | auto size_in_bytes = *reinterpret_cast<u32*>(buffer.offset_pointer(0)); |
195 | 0 | auto next_block = *reinterpret_cast<Block::Index*>(buffer.offset_pointer(sizeof(u32))); |
196 | 0 | auto data = TRY(buffer.slice(Block::HEADER_SIZE, Block::DATA_SIZE)); |
197 | | |
198 | 0 | return Block { index, size_in_bytes, next_block, move(data) }; |
199 | 0 | } |
200 | | |
201 | | ErrorOr<void> Heap::write_raw_block(Block::Index index, ReadonlyBytes data) |
202 | 0 | { |
203 | 0 | dbgln_if(SQL_DEBUG, "Write raw block {}", index); |
204 | |
|
205 | 0 | VERIFY(m_file); |
206 | 0 | VERIFY(data.size() == Block::SIZE); |
207 | | |
208 | 0 | TRY(m_file->seek(index * Block::SIZE, SeekMode::SetPosition)); |
209 | 0 | TRY(m_file->write_until_depleted(data)); |
210 | | |
211 | 0 | if (index > m_highest_block_written) |
212 | 0 | m_highest_block_written = index; |
213 | |
|
214 | 0 | return {}; |
215 | 0 | } |
216 | | |
217 | | ErrorOr<void> Heap::write_raw_block_to_wal(Block::Index index, ByteBuffer&& data) |
218 | 0 | { |
219 | 0 | dbgln_if(SQL_DEBUG, "{}({})", __FUNCTION__, index); |
220 | 0 | VERIFY(index < m_next_block); |
221 | 0 | VERIFY(data.size() == Block::SIZE); |
222 | | |
223 | 0 | TRY(m_write_ahead_log.try_set(index, move(data))); |
224 | | |
225 | 0 | return {}; |
226 | 0 | } |
227 | | |
228 | | ErrorOr<void> Heap::write_block(Block const& block) |
229 | 0 | { |
230 | 0 | dbgln_if(SQL_DEBUG, "{}({})", __FUNCTION__, block.index()); |
231 | 0 | VERIFY(block.index() < m_next_block); |
232 | 0 | VERIFY(block.next_block() < m_next_block); |
233 | 0 | VERIFY(block.size_in_bytes() > 0); |
234 | 0 | VERIFY(block.data().size() <= Block::DATA_SIZE); |
235 | | |
236 | 0 | auto size_in_bytes = block.size_in_bytes(); |
237 | 0 | auto next_block = block.next_block(); |
238 | |
|
239 | 0 | auto heap_data = TRY(ByteBuffer::create_zeroed(Block::SIZE)); |
240 | 0 | heap_data.overwrite(0, &size_in_bytes, sizeof(size_in_bytes)); |
241 | 0 | heap_data.overwrite(sizeof(size_in_bytes), &next_block, sizeof(next_block)); |
242 | |
|
243 | 0 | block.data().bytes().copy_to(heap_data.bytes().slice(Block::HEADER_SIZE)); |
244 | |
|
245 | 0 | return write_raw_block_to_wal(block.index(), move(heap_data)); |
246 | 0 | } |
247 | | |
248 | | ErrorOr<void> Heap::free_storage(Block::Index index) |
249 | 0 | { |
250 | 0 | dbgln_if(SQL_DEBUG, "{}({})", __FUNCTION__, index); |
251 | 0 | VERIFY(index > 0); |
252 | | |
253 | 0 | while (index > 0) { |
254 | 0 | auto block = TRY(read_block(index)); |
255 | 0 | TRY(free_block(block)); |
256 | 0 | index = block.next_block(); |
257 | 0 | } |
258 | 0 | return {}; |
259 | 0 | } |
260 | | |
261 | | ErrorOr<void> Heap::free_block(Block const& block) |
262 | 0 | { |
263 | 0 | auto index = block.index(); |
264 | 0 | dbgln_if(SQL_DEBUG, "{}({})", __FUNCTION__, index); |
265 | |
|
266 | 0 | VERIFY(index > 0); |
267 | 0 | VERIFY(has_block(index)); |
268 | | |
269 | | // Zero out freed blocks to facilitate a free block scan upon opening the database later |
270 | 0 | auto zeroed_data = TRY(ByteBuffer::create_zeroed(Block::SIZE)); |
271 | 0 | TRY(write_raw_block_to_wal(index, move(zeroed_data))); |
272 | | |
273 | 0 | return m_free_block_indices.try_append(index); |
274 | 0 | } |
275 | | |
276 | | ErrorOr<void> Heap::flush() |
277 | 0 | { |
278 | 0 | VERIFY(m_file); |
279 | 0 | auto indices = m_write_ahead_log.keys(); |
280 | 0 | quick_sort(indices); |
281 | 0 | for (auto index : indices) { |
282 | 0 | dbgln_if(SQL_DEBUG, "Flushing block {}", index); |
283 | 0 | auto& data = m_write_ahead_log.get(index).value(); |
284 | 0 | TRY(write_raw_block(index, data)); |
285 | 0 | } |
286 | 0 | m_write_ahead_log.clear(); |
287 | 0 | dbgln_if(SQL_DEBUG, "WAL flushed; new number of blocks = {}", m_highest_block_written); |
288 | 0 | return {}; |
289 | 0 | } |
290 | | |
291 | | constexpr static auto FILE_ID = "SerenitySQL "sv; |
292 | | constexpr static auto VERSION_OFFSET = FILE_ID.length(); |
293 | | constexpr static auto SCHEMAS_ROOT_OFFSET = VERSION_OFFSET + sizeof(u32); |
294 | | constexpr static auto TABLES_ROOT_OFFSET = SCHEMAS_ROOT_OFFSET + sizeof(u32); |
295 | | constexpr static auto TABLE_COLUMNS_ROOT_OFFSET = TABLES_ROOT_OFFSET + sizeof(u32); |
296 | | constexpr static auto USER_VALUES_OFFSET = TABLE_COLUMNS_ROOT_OFFSET + sizeof(u32); |
297 | | |
298 | | ErrorOr<void> Heap::read_zero_block() |
299 | 0 | { |
300 | 0 | dbgln_if(SQL_DEBUG, "Read zero block from {}", name()); |
301 | |
|
302 | 0 | auto block = TRY(read_raw_block(0)); |
303 | 0 | auto file_id_buffer = TRY(block.slice(0, FILE_ID.length())); |
304 | 0 | auto file_id = StringView(file_id_buffer); |
305 | 0 | if (file_id != FILE_ID) { |
306 | 0 | warnln("{}: Zero page corrupt. This is probably not a {} heap file"sv, name(), FILE_ID); |
307 | 0 | return Error::from_string_literal("Heap()::read_zero_block(): Zero page corrupt. This is probably not a SerenitySQL heap file"); |
308 | 0 | } |
309 | | |
310 | 0 | memcpy(&m_version, block.offset_pointer(VERSION_OFFSET), sizeof(u32)); |
311 | 0 | dbgln_if(SQL_DEBUG, "Version: {}.{}", (m_version & 0xFFFF0000) >> 16, (m_version & 0x0000FFFF)); |
312 | |
|
313 | 0 | memcpy(&m_schemas_root, block.offset_pointer(SCHEMAS_ROOT_OFFSET), sizeof(u32)); |
314 | 0 | dbgln_if(SQL_DEBUG, "Schemas root node: {}", m_schemas_root); |
315 | |
|
316 | 0 | memcpy(&m_tables_root, block.offset_pointer(TABLES_ROOT_OFFSET), sizeof(u32)); |
317 | 0 | dbgln_if(SQL_DEBUG, "Tables root node: {}", m_tables_root); |
318 | |
|
319 | 0 | memcpy(&m_table_columns_root, block.offset_pointer(TABLE_COLUMNS_ROOT_OFFSET), sizeof(u32)); |
320 | 0 | dbgln_if(SQL_DEBUG, "Table columns root node: {}", m_table_columns_root); |
321 | |
|
322 | 0 | memcpy(m_user_values.data(), block.offset_pointer(USER_VALUES_OFFSET), m_user_values.size() * sizeof(u32)); |
323 | 0 | for (auto ix = 0u; ix < m_user_values.size(); ix++) { |
324 | 0 | if (m_user_values[ix]) |
325 | 0 | dbgln_if(SQL_DEBUG, "User value {}: {}", ix, m_user_values[ix]); |
326 | 0 | } |
327 | 0 | return {}; |
328 | 0 | } |
329 | | |
330 | | ErrorOr<void> Heap::update_zero_block() |
331 | 0 | { |
332 | 0 | dbgln_if(SQL_DEBUG, "Write zero block to {}", name()); |
333 | 0 | dbgln_if(SQL_DEBUG, "Version: {}.{}", (m_version & 0xFFFF0000) >> 16, (m_version & 0x0000FFFF)); |
334 | 0 | dbgln_if(SQL_DEBUG, "Schemas root node: {}", m_schemas_root); |
335 | 0 | dbgln_if(SQL_DEBUG, "Tables root node: {}", m_tables_root); |
336 | 0 | dbgln_if(SQL_DEBUG, "Table Columns root node: {}", m_table_columns_root); |
337 | 0 | for (auto ix = 0u; ix < m_user_values.size(); ix++) { |
338 | 0 | if (m_user_values[ix] > 0) |
339 | 0 | dbgln_if(SQL_DEBUG, "User value {}: {}", ix, m_user_values[ix]); |
340 | 0 | } |
341 | |
|
342 | 0 | auto buffer = TRY(ByteBuffer::create_zeroed(Block::SIZE)); |
343 | 0 | auto buffer_bytes = buffer.bytes(); |
344 | 0 | buffer_bytes.overwrite(0, FILE_ID.characters_without_null_termination(), FILE_ID.length()); |
345 | 0 | buffer_bytes.overwrite(VERSION_OFFSET, &m_version, sizeof(u32)); |
346 | 0 | buffer_bytes.overwrite(SCHEMAS_ROOT_OFFSET, &m_schemas_root, sizeof(u32)); |
347 | 0 | buffer_bytes.overwrite(TABLES_ROOT_OFFSET, &m_tables_root, sizeof(u32)); |
348 | 0 | buffer_bytes.overwrite(TABLE_COLUMNS_ROOT_OFFSET, &m_table_columns_root, sizeof(u32)); |
349 | 0 | buffer_bytes.overwrite(USER_VALUES_OFFSET, m_user_values.data(), m_user_values.size() * sizeof(u32)); |
350 | |
|
351 | 0 | return write_raw_block_to_wal(0, move(buffer)); |
352 | 0 | } |
353 | | |
354 | | ErrorOr<void> Heap::initialize_zero_block() |
355 | 0 | { |
356 | 0 | m_version = VERSION; |
357 | 0 | m_schemas_root = 0; |
358 | 0 | m_tables_root = 0; |
359 | 0 | m_table_columns_root = 0; |
360 | 0 | m_next_block = 1; |
361 | 0 | m_highest_block_written = 0; |
362 | 0 | for (auto& user : m_user_values) |
363 | 0 | user = 0u; |
364 | 0 | return update_zero_block(); |
365 | 0 | } |
366 | | |
367 | | } |