/src/rocksdb/table/plain/plain_table_builder.cc
Line | Count | Source |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | |
6 | | #include "table/plain/plain_table_builder.h" |
7 | | |
8 | | #include <cassert> |
9 | | #include <limits> |
10 | | #include <map> |
11 | | #include <string> |
12 | | |
13 | | #include "db/dbformat.h" |
14 | | #include "file/writable_file_writer.h" |
15 | | #include "logging/logging.h" |
16 | | #include "rocksdb/comparator.h" |
17 | | #include "rocksdb/env.h" |
18 | | #include "rocksdb/filter_policy.h" |
19 | | #include "rocksdb/options.h" |
20 | | #include "rocksdb/table.h" |
21 | | #include "table/block_based/block_builder.h" |
22 | | #include "table/format.h" |
23 | | #include "table/meta_blocks.h" |
24 | | #include "table/plain/plain_table_bloom.h" |
25 | | #include "table/plain/plain_table_factory.h" |
26 | | #include "table/plain/plain_table_index.h" |
27 | | #include "util/coding.h" |
28 | | #include "util/crc32c.h" |
29 | | #include "util/stop_watch.h" |
30 | | |
31 | | namespace ROCKSDB_NAMESPACE { |
32 | | |
33 | | namespace { |
34 | | |
35 | | // a utility that helps writing block content to the file |
36 | | // @offset will advance if @block_contents was successfully written. |
37 | | // @block_handle the block handle this particular block. |
38 | | IOStatus WriteBlock(const Slice& block_contents, WritableFileWriter* file, |
39 | 0 | uint64_t* offset, BlockHandle* block_handle) { |
40 | 0 | block_handle->set_offset(*offset); |
41 | 0 | block_handle->set_size(block_contents.size()); |
42 | 0 | IOStatus io_s = file->Append(IOOptions(), block_contents); |
43 | |
|
44 | 0 | if (io_s.ok()) { |
45 | 0 | *offset += block_contents.size(); |
46 | 0 | } |
47 | 0 | return io_s; |
48 | 0 | } |
49 | | |
50 | | } // namespace |
51 | | |
52 | | // kPlainTableMagicNumber was picked by running |
53 | | // echo rocksdb.table.plain | sha1sum |
54 | | // and taking the leading 64 bits. |
55 | | const uint64_t kPlainTableMagicNumber = 0x8242229663bf9564ull; |
56 | | const uint64_t kLegacyPlainTableMagicNumber = 0x4f3418eb7a8f13b8ull; |
57 | | |
58 | | PlainTableBuilder::PlainTableBuilder( |
59 | | const ImmutableOptions& ioptions, const MutableCFOptions& moptions, |
60 | | const InternalTblPropCollFactories* internal_tbl_prop_coll_factories, |
61 | | uint32_t column_family_id, int level_at_creation, WritableFileWriter* file, |
62 | | uint32_t user_key_len, EncodingType encoding_type, size_t index_sparseness, |
63 | | uint32_t bloom_bits_per_key, const std::string& column_family_name, |
64 | | uint32_t num_probes, size_t huge_page_tlb_size, double hash_table_ratio, |
65 | | bool store_index_in_file, const std::string& db_id, |
66 | | const std::string& db_session_id, uint64_t file_number) |
67 | 0 | : ioptions_(ioptions), |
68 | 0 | moptions_(moptions), |
69 | 0 | bloom_block_(num_probes), |
70 | 0 | file_(file), |
71 | 0 | bloom_bits_per_key_(bloom_bits_per_key), |
72 | 0 | huge_page_tlb_size_(huge_page_tlb_size), |
73 | 0 | encoder_(encoding_type, user_key_len, moptions.prefix_extractor.get(), |
74 | 0 | index_sparseness), |
75 | 0 | store_index_in_file_(store_index_in_file), |
76 | 0 | prefix_extractor_(moptions.prefix_extractor.get()) { |
77 | | // Build index block and save it in the file if hash_table_ratio > 0 |
78 | 0 | if (store_index_in_file_) { |
79 | 0 | assert(hash_table_ratio > 0 || IsTotalOrderMode()); |
80 | 0 | index_builder_.reset(new PlainTableIndexBuilder( |
81 | 0 | &arena_, ioptions, moptions.prefix_extractor.get(), index_sparseness, |
82 | 0 | hash_table_ratio, huge_page_tlb_size_)); |
83 | 0 | properties_ |
84 | 0 | .user_collected_properties[PlainTablePropertyNames::kBloomVersion] = |
85 | 0 | "1"; // For future use |
86 | 0 | } |
87 | |
|
88 | 0 | properties_.fixed_key_len = user_key_len; |
89 | | |
90 | | // for plain table, we put all the data in a big chuck. |
91 | 0 | properties_.num_data_blocks = 1; |
92 | | // Fill it later if store_index_in_file_ == true |
93 | 0 | properties_.index_size = 0; |
94 | 0 | properties_.filter_size = 0; |
95 | | // To support roll-back to previous version, now still use version 0 for |
96 | | // plain encoding. |
97 | 0 | properties_.format_version = (encoding_type == kPlain) ? 0 : 1; |
98 | 0 | properties_.column_family_id = column_family_id; |
99 | 0 | properties_.column_family_name = column_family_name; |
100 | 0 | properties_.db_id = db_id; |
101 | 0 | properties_.db_session_id = db_session_id; |
102 | 0 | properties_.db_host_id = ioptions.db_host_id; |
103 | 0 | if (!ReifyDbHostIdProperty(ioptions_.env, &properties_.db_host_id).ok()) { |
104 | 0 | ROCKS_LOG_INFO(ioptions_.logger, "db_host_id property will not be set"); |
105 | 0 | } |
106 | 0 | properties_.orig_file_number = file_number; |
107 | 0 | properties_.prefix_extractor_name = |
108 | 0 | moptions_.prefix_extractor != nullptr |
109 | 0 | ? moptions_.prefix_extractor->AsString() |
110 | 0 | : "nullptr"; |
111 | |
|
112 | 0 | std::string val; |
113 | 0 | PutFixed32(&val, static_cast<uint32_t>(encoder_.GetEncodingType())); |
114 | 0 | properties_ |
115 | 0 | .user_collected_properties[PlainTablePropertyNames::kEncodingType] = val; |
116 | |
|
117 | 0 | assert(internal_tbl_prop_coll_factories); |
118 | 0 | for (auto& factory : *internal_tbl_prop_coll_factories) { |
119 | 0 | assert(factory); |
120 | |
|
121 | 0 | std::unique_ptr<InternalTblPropColl> collector{ |
122 | 0 | factory->CreateInternalTblPropColl(column_family_id, level_at_creation, |
123 | 0 | ioptions.num_levels)}; |
124 | 0 | if (collector) { |
125 | 0 | table_properties_collectors_.emplace_back(std::move(collector)); |
126 | 0 | } |
127 | 0 | } |
128 | 0 | } |
129 | | |
130 | 0 | PlainTableBuilder::~PlainTableBuilder() { |
131 | | // They are supposed to have been passed to users through Finish() |
132 | | // if the file succeeds. |
133 | 0 | status_.PermitUncheckedError(); |
134 | 0 | io_status_.PermitUncheckedError(); |
135 | 0 | } |
136 | | |
137 | 0 | void PlainTableBuilder::Add(const Slice& key, const Slice& value) { |
138 | | // temp buffer for metadata bytes between key and value. |
139 | 0 | char meta_bytes_buf[6]; |
140 | 0 | size_t meta_bytes_buf_size = 0; |
141 | 0 | const IOOptions opts; |
142 | |
|
143 | 0 | ParsedInternalKey internal_key; |
144 | 0 | if (!ParseInternalKey(key, &internal_key, false /* log_err_key */) |
145 | 0 | .ok()) { // TODO |
146 | 0 | assert(false); |
147 | 0 | return; |
148 | 0 | } |
149 | 0 | if (internal_key.type == kTypeRangeDeletion) { |
150 | 0 | status_ = Status::NotSupported("Range deletion unsupported"); |
151 | 0 | return; |
152 | 0 | } |
153 | | |
154 | | #ifndef NDEBUG |
155 | | bool skip = false; |
156 | | TEST_SYNC_POINT_CALLBACK("PlainTableBuilder::Add::skip", (void*)&skip); |
157 | | if (skip) { |
158 | | return; |
159 | | } |
160 | | #endif // !NDEBUG |
161 | | |
162 | | // Store key hash |
163 | 0 | if (store_index_in_file_) { |
164 | 0 | if (moptions_.prefix_extractor == nullptr) { |
165 | 0 | keys_or_prefixes_hashes_.push_back(GetSliceHash(internal_key.user_key)); |
166 | 0 | } else { |
167 | 0 | Slice prefix = |
168 | 0 | moptions_.prefix_extractor->Transform(internal_key.user_key); |
169 | 0 | keys_or_prefixes_hashes_.push_back(GetSliceHash(prefix)); |
170 | 0 | } |
171 | 0 | } |
172 | | |
173 | | // Write value |
174 | 0 | assert(offset_ <= std::numeric_limits<uint32_t>::max()); |
175 | 0 | auto prev_offset = static_cast<uint32_t>(offset_); |
176 | | // Write out the key |
177 | 0 | io_status_ = encoder_.AppendKey(key, file_, &offset_, meta_bytes_buf, |
178 | 0 | &meta_bytes_buf_size); |
179 | 0 | if (SaveIndexInFile()) { |
180 | 0 | index_builder_->AddKeyPrefix(GetPrefix(internal_key), prev_offset); |
181 | 0 | } |
182 | | |
183 | | // Write value length |
184 | 0 | uint32_t value_size = static_cast<uint32_t>(value.size()); |
185 | 0 | if (io_status_.ok()) { |
186 | 0 | char* end_ptr = |
187 | 0 | EncodeVarint32(meta_bytes_buf + meta_bytes_buf_size, value_size); |
188 | 0 | assert(end_ptr <= meta_bytes_buf + sizeof(meta_bytes_buf)); |
189 | 0 | meta_bytes_buf_size = end_ptr - meta_bytes_buf; |
190 | 0 | io_status_ = |
191 | 0 | file_->Append(opts, Slice(meta_bytes_buf, meta_bytes_buf_size)); |
192 | 0 | } |
193 | | |
194 | | // Write value |
195 | 0 | if (io_status_.ok()) { |
196 | 0 | io_status_ = file_->Append(opts, value); |
197 | 0 | offset_ += value_size + meta_bytes_buf_size; |
198 | 0 | } |
199 | |
|
200 | 0 | if (io_status_.ok()) { |
201 | 0 | properties_.num_entries++; |
202 | 0 | properties_.raw_key_size += key.size(); |
203 | 0 | properties_.raw_value_size += value.size(); |
204 | 0 | if (internal_key.type == kTypeDeletion || |
205 | 0 | internal_key.type == kTypeSingleDeletion) { |
206 | 0 | properties_.num_deletions++; |
207 | 0 | } else if (internal_key.type == kTypeMerge) { |
208 | 0 | properties_.num_merge_operands++; |
209 | 0 | } |
210 | 0 | } |
211 | | |
212 | | // notify property collectors |
213 | 0 | NotifyCollectTableCollectorsOnAdd( |
214 | 0 | key, value, offset_, table_properties_collectors_, ioptions_.logger); |
215 | 0 | status_ = io_status_; |
216 | 0 | } |
217 | | |
218 | 0 | Status PlainTableBuilder::Finish() { |
219 | 0 | assert(!closed_); |
220 | 0 | closed_ = true; |
221 | |
|
222 | 0 | properties_.data_size = offset_; |
223 | | |
224 | | // Write the following blocks |
225 | | // 1. [meta block: bloom] - optional |
226 | | // 2. [meta block: index] - optional |
227 | | // 3. [meta block: properties] |
228 | | // 4. [metaindex block] |
229 | | // 5. [footer] |
230 | |
|
231 | 0 | MetaIndexBuilder meta_index_builer; |
232 | |
|
233 | 0 | if (store_index_in_file_ && (properties_.num_entries > 0)) { |
234 | 0 | assert(properties_.num_entries <= std::numeric_limits<uint32_t>::max()); |
235 | 0 | BlockHandle bloom_block_handle; |
236 | 0 | if (bloom_bits_per_key_ > 0) { |
237 | 0 | bloom_block_.SetTotalBits( |
238 | 0 | &arena_, |
239 | 0 | static_cast<uint32_t>(properties_.num_entries) * bloom_bits_per_key_, |
240 | 0 | ioptions_.bloom_locality, huge_page_tlb_size_, ioptions_.logger); |
241 | |
|
242 | 0 | PutVarint32(&properties_.user_collected_properties |
243 | 0 | [PlainTablePropertyNames::kNumBloomBlocks], |
244 | 0 | bloom_block_.GetNumBlocks()); |
245 | |
|
246 | 0 | bloom_block_.AddKeysHashes(keys_or_prefixes_hashes_); |
247 | |
|
248 | 0 | Slice bloom_finish_result = bloom_block_.Finish(); |
249 | |
|
250 | 0 | properties_.filter_size = bloom_finish_result.size(); |
251 | 0 | io_status_ = |
252 | 0 | WriteBlock(bloom_finish_result, file_, &offset_, &bloom_block_handle); |
253 | |
|
254 | 0 | if (!io_status_.ok()) { |
255 | 0 | status_ = io_status_; |
256 | 0 | return status_; |
257 | 0 | } |
258 | 0 | meta_index_builer.Add(BloomBlockBuilder::kBloomBlock, bloom_block_handle); |
259 | 0 | } |
260 | 0 | BlockHandle index_block_handle; |
261 | 0 | Slice index_finish_result = index_builder_->Finish(); |
262 | |
|
263 | 0 | properties_.index_size = index_finish_result.size(); |
264 | 0 | io_status_ = |
265 | 0 | WriteBlock(index_finish_result, file_, &offset_, &index_block_handle); |
266 | |
|
267 | 0 | if (!io_status_.ok()) { |
268 | 0 | status_ = io_status_; |
269 | 0 | return status_; |
270 | 0 | } |
271 | | |
272 | 0 | meta_index_builer.Add(PlainTableIndexBuilder::kPlainTableIndexBlock, |
273 | 0 | index_block_handle); |
274 | 0 | } |
275 | | |
276 | | // Calculate bloom block size and index block size |
277 | 0 | PropertyBlockBuilder property_block_builder; |
278 | | // -- Add basic properties |
279 | 0 | property_block_builder.AddTableProperty(properties_); |
280 | | // -- Add eixsting user collected properties |
281 | 0 | property_block_builder.Add(properties_.user_collected_properties); |
282 | | // -- Add more user collected properties |
283 | 0 | UserCollectedProperties more_user_collected_properties; |
284 | 0 | NotifyCollectTableCollectorsOnFinish( |
285 | 0 | table_properties_collectors_, ioptions_.logger, &property_block_builder, |
286 | 0 | more_user_collected_properties, properties_.readable_properties); |
287 | 0 | properties_.user_collected_properties.insert( |
288 | 0 | more_user_collected_properties.begin(), |
289 | 0 | more_user_collected_properties.end()); |
290 | | |
291 | | // -- Write property block |
292 | 0 | BlockHandle property_block_handle; |
293 | 0 | io_status_ = WriteBlock(property_block_builder.Finish(), file_, &offset_, |
294 | 0 | &property_block_handle); |
295 | 0 | if (!io_status_.ok()) { |
296 | 0 | status_ = io_status_; |
297 | 0 | return status_; |
298 | 0 | } |
299 | 0 | meta_index_builer.Add(kPropertiesBlockName, property_block_handle); |
300 | | |
301 | | // -- write metaindex block |
302 | 0 | BlockHandle metaindex_block_handle; |
303 | 0 | io_status_ = WriteBlock(meta_index_builer.Finish(), file_, &offset_, |
304 | 0 | &metaindex_block_handle); |
305 | 0 | if (!io_status_.ok()) { |
306 | 0 | status_ = io_status_; |
307 | 0 | return status_; |
308 | 0 | } |
309 | | |
310 | | // Write Footer |
311 | | // no need to write out new footer if we're using default checksum |
312 | 0 | FooterBuilder footer; |
313 | 0 | Status s = footer.Build(kPlainTableMagicNumber, /* format_version */ 0, |
314 | 0 | offset_, kNoChecksum, metaindex_block_handle); |
315 | 0 | if (!s.ok()) { |
316 | 0 | status_ = s; |
317 | 0 | return status_; |
318 | 0 | } |
319 | 0 | io_status_ = file_->Append(IOOptions(), footer.GetSlice()); |
320 | 0 | if (io_status_.ok()) { |
321 | 0 | offset_ += footer.GetSlice().size(); |
322 | 0 | } |
323 | 0 | status_ = io_status_; |
324 | 0 | return status_; |
325 | 0 | } |
326 | | |
327 | 0 | void PlainTableBuilder::Abandon() { closed_ = true; } |
328 | | |
329 | 0 | uint64_t PlainTableBuilder::NumEntries() const { |
330 | 0 | return properties_.num_entries; |
331 | 0 | } |
332 | | |
333 | 0 | uint64_t PlainTableBuilder::FileSize() const { return offset_; } |
334 | | |
335 | 0 | std::string PlainTableBuilder::GetFileChecksum() const { |
336 | 0 | if (file_ != nullptr) { |
337 | 0 | return file_->GetFileChecksum(); |
338 | 0 | } else { |
339 | 0 | return kUnknownFileChecksum; |
340 | 0 | } |
341 | 0 | } |
342 | | |
343 | 0 | const char* PlainTableBuilder::GetFileChecksumFuncName() const { |
344 | 0 | if (file_ != nullptr) { |
345 | 0 | return file_->GetFileChecksumFuncName(); |
346 | 0 | } else { |
347 | 0 | return kUnknownFileChecksumFuncName; |
348 | 0 | } |
349 | 0 | } |
350 | | void PlainTableBuilder::SetSeqnoTimeTableProperties( |
351 | 0 | const SeqnoToTimeMapping& relevant_mapping, uint64_t uint_64) { |
352 | | // TODO: storing seqno to time mapping is not yet support for plain table. |
353 | 0 | TableBuilder::SetSeqnoTimeTableProperties(relevant_mapping, uint_64); |
354 | 0 | } |
355 | | |
356 | | } // namespace ROCKSDB_NAMESPACE |