/src/rocksdb/table/meta_blocks.cc
Line | Count | Source |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | #include "table/meta_blocks.h" |
6 | | |
7 | | #include <map> |
8 | | #include <string> |
9 | | |
10 | | #include "block_fetcher.h" |
11 | | #include "db/table_properties_collector.h" |
12 | | #include "file/random_access_file_reader.h" |
13 | | #include "logging/logging.h" |
14 | | #include "rocksdb/options.h" |
15 | | #include "rocksdb/table.h" |
16 | | #include "rocksdb/table_properties.h" |
17 | | #include "table/block_based/block.h" |
18 | | #include "table/block_based/reader_common.h" |
19 | | #include "table/format.h" |
20 | | #include "table/internal_iterator.h" |
21 | | #include "table/persistent_cache_helper.h" |
22 | | #include "table/sst_file_writer_collectors.h" |
23 | | #include "table/table_properties_internal.h" |
24 | | #include "test_util/sync_point.h" |
25 | | #include "util/coding.h" |
26 | | |
27 | | namespace ROCKSDB_NAMESPACE { |
28 | | |
29 | | const std::string kPropertiesBlockName = "rocksdb.properties"; |
30 | | // NB: only used with format_version >= 6 |
31 | | const std::string kIndexBlockName = "rocksdb.index"; |
32 | | const std::string kCompressionDictBlockName = "rocksdb.compression_dict"; |
33 | | const std::string kRangeDelBlockName = "rocksdb.range_del"; |
34 | | |
35 | | MetaIndexBuilder::MetaIndexBuilder() |
36 | 16.9k | : meta_index_block_(new BlockBuilder(1 /* restart interval */)) {} |
37 | | |
38 | 36.4k | void MetaIndexBuilder::Add(const std::string& key, const BlockHandle& handle) { |
39 | 36.4k | std::string handle_encoding; |
40 | 36.4k | handle.EncodeTo(&handle_encoding); |
41 | 36.4k | meta_block_handles_.insert({key, handle_encoding}); |
42 | 36.4k | } |
43 | | |
44 | 16.9k | Slice MetaIndexBuilder::Finish() { |
45 | 36.4k | for (const auto& metablock : meta_block_handles_) { |
46 | 36.4k | meta_index_block_->Add(metablock.first, metablock.second); |
47 | 36.4k | } |
48 | 16.9k | return meta_index_block_->Finish(); |
49 | 16.9k | } |
50 | | |
51 | | // Property block will be read sequentially and cached in a heap located |
52 | | // object, so there's no need for restart points. Thus we set the restart |
53 | | // interval to infinity to save space. |
54 | | PropertyBlockBuilder::PropertyBlockBuilder() |
55 | 16.9k | : properties_block_(new BlockBuilder( |
56 | 16.9k | std::numeric_limits<int32_t>::max() /* restart interval */)) {} |
57 | | |
58 | | void PropertyBlockBuilder::Add(const std::string& name, |
59 | 681k | const std::string& val) { |
60 | 681k | assert(props_.find(name) == props_.end()); |
61 | 681k | props_.insert({name, val}); |
62 | 681k | } |
63 | | |
64 | 444k | void PropertyBlockBuilder::Add(const std::string& name, uint64_t val) { |
65 | 444k | std::string dst; |
66 | 444k | PutVarint64(&dst, val); |
67 | | |
68 | 444k | Add(name, dst); |
69 | 444k | } |
70 | | |
71 | | void PropertyBlockBuilder::Add( |
72 | 16.9k | const UserCollectedProperties& user_collected_properties) { |
73 | 67.8k | for (const auto& prop : user_collected_properties) { |
74 | 67.8k | Add(prop.first, prop.second); |
75 | 67.8k | } |
76 | 16.9k | } |
77 | | |
78 | 16.9k | void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { |
79 | 16.9k | TEST_SYNC_POINT_CALLBACK("PropertyBlockBuilder::AddTableProperty:Start", |
80 | 16.9k | const_cast<TableProperties*>(&props)); |
81 | | |
82 | 16.9k | Add(TablePropertiesNames::kOriginalFileNumber, props.orig_file_number); |
83 | 16.9k | Add(TablePropertiesNames::kRawKeySize, props.raw_key_size); |
84 | 16.9k | Add(TablePropertiesNames::kRawValueSize, props.raw_value_size); |
85 | 16.9k | Add(TablePropertiesNames::kDataSize, props.data_size); |
86 | 16.9k | Add(TablePropertiesNames::kIndexSize, props.index_size); |
87 | 16.9k | if (props.index_partitions != 0) { |
88 | 0 | Add(TablePropertiesNames::kIndexPartitions, props.index_partitions); |
89 | 0 | Add(TablePropertiesNames::kTopLevelIndexSize, props.top_level_index_size); |
90 | 0 | } |
91 | 16.9k | Add(TablePropertiesNames::kIndexKeyIsUserKey, props.index_key_is_user_key); |
92 | 16.9k | Add(TablePropertiesNames::kIndexValueIsDeltaEncoded, |
93 | 16.9k | props.index_value_is_delta_encoded); |
94 | 16.9k | Add(TablePropertiesNames::kNumEntries, props.num_entries); |
95 | 16.9k | Add(TablePropertiesNames::kNumFilterEntries, props.num_filter_entries); |
96 | 16.9k | Add(TablePropertiesNames::kDeletedKeys, props.num_deletions); |
97 | 16.9k | Add(TablePropertiesNames::kMergeOperands, props.num_merge_operands); |
98 | 16.9k | Add(TablePropertiesNames::kNumRangeDeletions, props.num_range_deletions); |
99 | 16.9k | Add(TablePropertiesNames::kNumDataBlocks, props.num_data_blocks); |
100 | 16.9k | Add(TablePropertiesNames::kNumUniformBlocks, props.num_uniform_blocks); |
101 | 16.9k | Add(TablePropertiesNames::kFilterSize, props.filter_size); |
102 | 16.9k | Add(TablePropertiesNames::kFormatVersion, props.format_version); |
103 | 16.9k | Add(TablePropertiesNames::kFixedKeyLen, props.fixed_key_len); |
104 | 16.9k | Add(TablePropertiesNames::kColumnFamilyId, props.column_family_id); |
105 | 16.9k | Add(TablePropertiesNames::kCreationTime, props.creation_time); |
106 | 16.9k | Add(TablePropertiesNames::kOldestKeyTime, props.oldest_key_time); |
107 | 16.9k | Add(TablePropertiesNames::kNewestKeyTime, props.newest_key_time); |
108 | 16.9k | if (props.file_creation_time > 0) { |
109 | 2.92k | Add(TablePropertiesNames::kFileCreationTime, props.file_creation_time); |
110 | 2.92k | } |
111 | 16.9k | if (props.slow_compression_estimated_data_size > 0) { |
112 | 0 | Add(TablePropertiesNames::kSlowCompressionEstimatedDataSize, |
113 | 0 | props.slow_compression_estimated_data_size); |
114 | 0 | } |
115 | 16.9k | if (props.fast_compression_estimated_data_size > 0) { |
116 | 0 | Add(TablePropertiesNames::kFastCompressionEstimatedDataSize, |
117 | 0 | props.fast_compression_estimated_data_size); |
118 | 0 | } |
119 | 16.9k | Add(TablePropertiesNames::kTailStartOffset, props.tail_start_offset); |
120 | 16.9k | if (props.user_defined_timestamps_persisted == 0) { |
121 | 0 | Add(TablePropertiesNames::kUserDefinedTimestampsPersisted, |
122 | 0 | props.user_defined_timestamps_persisted); |
123 | 0 | } |
124 | 16.9k | if (!props.db_id.empty()) { |
125 | 16.9k | Add(TablePropertiesNames::kDbId, props.db_id); |
126 | 16.9k | } |
127 | 16.9k | if (!props.db_session_id.empty()) { |
128 | 16.9k | Add(TablePropertiesNames::kDbSessionId, props.db_session_id); |
129 | 16.9k | } |
130 | 16.9k | if (!props.db_host_id.empty()) { |
131 | 16.9k | Add(TablePropertiesNames::kDbHostId, props.db_host_id); |
132 | 16.9k | } |
133 | | |
134 | 16.9k | if (!props.filter_policy_name.empty()) { |
135 | 0 | Add(TablePropertiesNames::kFilterPolicy, props.filter_policy_name); |
136 | 0 | } |
137 | 16.9k | if (!props.comparator_name.empty()) { |
138 | 16.9k | Add(TablePropertiesNames::kComparator, props.comparator_name); |
139 | 16.9k | } |
140 | | |
141 | 16.9k | if (!props.merge_operator_name.empty()) { |
142 | 16.9k | Add(TablePropertiesNames::kMergeOperator, props.merge_operator_name); |
143 | 16.9k | } |
144 | 16.9k | if (!props.prefix_extractor_name.empty()) { |
145 | 16.9k | Add(TablePropertiesNames::kPrefixExtractorName, |
146 | 16.9k | props.prefix_extractor_name); |
147 | 16.9k | } |
148 | 16.9k | if (!props.property_collectors_names.empty()) { |
149 | 16.9k | Add(TablePropertiesNames::kPropertyCollectors, |
150 | 16.9k | props.property_collectors_names); |
151 | 16.9k | } |
152 | 16.9k | if (!props.column_family_name.empty()) { |
153 | 16.9k | Add(TablePropertiesNames::kColumnFamilyName, props.column_family_name); |
154 | 16.9k | } |
155 | | |
156 | 16.9k | if (!props.compression_name.empty()) { |
157 | 16.9k | Add(TablePropertiesNames::kCompression, props.compression_name); |
158 | 16.9k | } |
159 | 16.9k | if (!props.compression_options.empty()) { |
160 | 16.9k | Add(TablePropertiesNames::kCompressionOptions, props.compression_options); |
161 | 16.9k | } |
162 | 16.9k | if (!props.seqno_to_time_mapping.empty()) { |
163 | 0 | Add(TablePropertiesNames::kSequenceNumberTimeMapping, |
164 | 0 | props.seqno_to_time_mapping); |
165 | 0 | } |
166 | 16.9k | if (props.key_largest_seqno != UINT64_MAX) { |
167 | 16.9k | Add(TablePropertiesNames::kKeyLargestSeqno, props.key_largest_seqno); |
168 | 16.9k | } |
169 | 16.9k | if (props.key_smallest_seqno != UINT64_MAX) { |
170 | 16.9k | Add(TablePropertiesNames::kKeySmallestSeqno, props.key_smallest_seqno); |
171 | 16.9k | } |
172 | 16.9k | if (props.data_block_restart_interval > 0) { |
173 | 16.9k | Add(TablePropertiesNames::kDataBlockRestartInterval, |
174 | 16.9k | props.data_block_restart_interval); |
175 | 16.9k | } |
176 | 16.9k | if (props.index_block_restart_interval > 0) { |
177 | 16.9k | Add(TablePropertiesNames::kIndexBlockRestartInterval, |
178 | 16.9k | props.index_block_restart_interval); |
179 | 16.9k | } |
180 | 16.9k | if (props.separate_key_value_in_data_block > 0) { |
181 | 0 | Add(TablePropertiesNames::kSeparateKeyValueInDataBlock, |
182 | 0 | props.separate_key_value_in_data_block); |
183 | 0 | } |
184 | 16.9k | } |
185 | | |
186 | 16.9k | Slice PropertyBlockBuilder::Finish() { |
187 | 681k | for (const auto& prop : props_) { |
188 | 681k | assert(last_prop_added_to_block_.empty() || |
189 | 681k | comparator_->Compare(prop.first, last_prop_added_to_block_) > 0); |
190 | 681k | properties_block_->Add(prop.first, prop.second); |
191 | | #ifndef NDEBUG |
192 | | last_prop_added_to_block_ = prop.first; |
193 | | #endif /* !NDEBUG */ |
194 | 681k | } |
195 | | |
196 | 16.9k | return properties_block_->Finish(); |
197 | 16.9k | } |
198 | | |
199 | | void LogPropertiesCollectionError(Logger* info_log, const std::string& method, |
200 | 0 | const std::string& name) { |
201 | 0 | assert(method == "Add" || method == "Finish"); |
202 | |
|
203 | 0 | std::string msg = |
204 | 0 | "Encountered error when calling TablePropertiesCollector::" + method + |
205 | 0 | "() with collector name: " + name; |
206 | 0 | ROCKS_LOG_ERROR(info_log, "%s", msg.c_str()); |
207 | 0 | } |
208 | | |
209 | | bool NotifyCollectTableCollectorsOnAdd( |
210 | | const Slice& key, const Slice& value, uint64_t file_size, |
211 | | const std::vector<std::unique_ptr<InternalTblPropColl>>& collectors, |
212 | 109k | Logger* info_log) { |
213 | 109k | bool all_succeeded = true; |
214 | 109k | for (auto& collector : collectors) { |
215 | 109k | Status s = collector->InternalAdd(key, value, file_size); |
216 | 109k | all_succeeded = all_succeeded && s.ok(); |
217 | 109k | if (!s.ok()) { |
218 | 0 | LogPropertiesCollectionError(info_log, "Add" /* method */, |
219 | 0 | collector->Name()); |
220 | 0 | } |
221 | 109k | } |
222 | 109k | return all_succeeded; |
223 | 109k | } |
224 | | |
225 | | void NotifyCollectTableCollectorsOnBlockAdd( |
226 | | const std::vector<std::unique_ptr<InternalTblPropColl>>& collectors, |
227 | | const uint64_t block_uncomp_bytes, |
228 | | const uint64_t block_compressed_bytes_fast, |
229 | 19.3k | const uint64_t block_compressed_bytes_slow) { |
230 | 19.3k | for (auto& collector : collectors) { |
231 | 19.3k | collector->BlockAdd(block_uncomp_bytes, block_compressed_bytes_fast, |
232 | 19.3k | block_compressed_bytes_slow); |
233 | 19.3k | } |
234 | 19.3k | } |
235 | | |
236 | | bool NotifyCollectTableCollectorsOnFinish( |
237 | | const std::vector<std::unique_ptr<InternalTblPropColl>>& collectors, |
238 | | Logger* info_log, PropertyBlockBuilder* builder, |
239 | | UserCollectedProperties& user_collected_properties, |
240 | 16.9k | UserCollectedProperties& readable_properties) { |
241 | 16.9k | bool all_succeeded = true; |
242 | 16.9k | for (auto& collector : collectors) { |
243 | 16.9k | UserCollectedProperties user_properties; |
244 | 16.9k | Status s = collector->Finish(&user_properties); |
245 | 16.9k | if (s.ok()) { |
246 | 16.9k | for (const auto& prop : collector->GetReadableProperties()) { |
247 | 0 | readable_properties.insert(prop); |
248 | 0 | } |
249 | | #ifndef NDEBUG |
250 | | // Check different user properties collectors are not adding properties of |
251 | | // the same name. |
252 | | for (const auto& pair : user_properties) { |
253 | | assert(user_collected_properties.find(pair.first) == |
254 | | user_collected_properties.end()); |
255 | | } |
256 | | #endif /* !NDEBUG */ |
257 | 16.9k | user_collected_properties.merge(user_properties); |
258 | 16.9k | } else { |
259 | 2 | LogPropertiesCollectionError(info_log, "Finish" /* method */, |
260 | 2 | collector->Name()); |
261 | 2 | if (all_succeeded) { |
262 | 0 | all_succeeded = false; |
263 | 0 | } |
264 | 2 | } |
265 | 16.9k | } |
266 | 16.9k | builder->Add(user_collected_properties); |
267 | 16.9k | return all_succeeded; |
268 | 16.9k | } |
269 | | |
270 | | Status ParsePropertiesBlock( |
271 | | const ImmutableOptions& ioptions, uint64_t offset, Block& properties_block, |
272 | 77.9k | std::unique_ptr<TableProperties>& new_table_properties) { |
273 | 77.9k | std::unique_ptr<MetaBlockIter> iter(properties_block.NewMetaIterator()); |
274 | | |
275 | | // All pre-defined properties of type uint64_t |
276 | 77.9k | std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = { |
277 | 77.9k | {TablePropertiesNames::kOriginalFileNumber, |
278 | 77.9k | &new_table_properties->orig_file_number}, |
279 | 77.9k | {TablePropertiesNames::kDataSize, &new_table_properties->data_size}, |
280 | 77.9k | {TablePropertiesNames::kIndexSize, &new_table_properties->index_size}, |
281 | 77.9k | {TablePropertiesNames::kIndexPartitions, |
282 | 77.9k | &new_table_properties->index_partitions}, |
283 | 77.9k | {TablePropertiesNames::kTopLevelIndexSize, |
284 | 77.9k | &new_table_properties->top_level_index_size}, |
285 | 77.9k | {TablePropertiesNames::kIndexKeyIsUserKey, |
286 | 77.9k | &new_table_properties->index_key_is_user_key}, |
287 | 77.9k | {TablePropertiesNames::kIndexValueIsDeltaEncoded, |
288 | 77.9k | &new_table_properties->index_value_is_delta_encoded}, |
289 | 77.9k | {TablePropertiesNames::kFilterSize, &new_table_properties->filter_size}, |
290 | 77.9k | {TablePropertiesNames::kRawKeySize, &new_table_properties->raw_key_size}, |
291 | 77.9k | {TablePropertiesNames::kRawValueSize, |
292 | 77.9k | &new_table_properties->raw_value_size}, |
293 | 77.9k | {TablePropertiesNames::kNumDataBlocks, |
294 | 77.9k | &new_table_properties->num_data_blocks}, |
295 | 77.9k | {TablePropertiesNames::kNumUniformBlocks, |
296 | 77.9k | &new_table_properties->num_uniform_blocks}, |
297 | 77.9k | {TablePropertiesNames::kNumEntries, &new_table_properties->num_entries}, |
298 | 77.9k | {TablePropertiesNames::kNumFilterEntries, |
299 | 77.9k | &new_table_properties->num_filter_entries}, |
300 | 77.9k | {TablePropertiesNames::kDeletedKeys, |
301 | 77.9k | &new_table_properties->num_deletions}, |
302 | 77.9k | {TablePropertiesNames::kMergeOperands, |
303 | 77.9k | &new_table_properties->num_merge_operands}, |
304 | 77.9k | {TablePropertiesNames::kNumRangeDeletions, |
305 | 77.9k | &new_table_properties->num_range_deletions}, |
306 | 77.9k | {TablePropertiesNames::kFormatVersion, |
307 | 77.9k | &new_table_properties->format_version}, |
308 | 77.9k | {TablePropertiesNames::kFixedKeyLen, |
309 | 77.9k | &new_table_properties->fixed_key_len}, |
310 | 77.9k | {TablePropertiesNames::kColumnFamilyId, |
311 | 77.9k | &new_table_properties->column_family_id}, |
312 | 77.9k | {TablePropertiesNames::kCreationTime, |
313 | 77.9k | &new_table_properties->creation_time}, |
314 | 77.9k | {TablePropertiesNames::kOldestKeyTime, |
315 | 77.9k | &new_table_properties->oldest_key_time}, |
316 | 77.9k | {TablePropertiesNames::kNewestKeyTime, |
317 | 77.9k | &new_table_properties->newest_key_time}, |
318 | 77.9k | {TablePropertiesNames::kFileCreationTime, |
319 | 77.9k | &new_table_properties->file_creation_time}, |
320 | 77.9k | {TablePropertiesNames::kSlowCompressionEstimatedDataSize, |
321 | 77.9k | &new_table_properties->slow_compression_estimated_data_size}, |
322 | 77.9k | {TablePropertiesNames::kFastCompressionEstimatedDataSize, |
323 | 77.9k | &new_table_properties->fast_compression_estimated_data_size}, |
324 | 77.9k | {TablePropertiesNames::kTailStartOffset, |
325 | 77.9k | &new_table_properties->tail_start_offset}, |
326 | 77.9k | {TablePropertiesNames::kUserDefinedTimestampsPersisted, |
327 | 77.9k | &new_table_properties->user_defined_timestamps_persisted}, |
328 | 77.9k | {TablePropertiesNames::kKeyLargestSeqno, |
329 | 77.9k | &new_table_properties->key_largest_seqno}, |
330 | 77.9k | {TablePropertiesNames::kKeySmallestSeqno, |
331 | 77.9k | &new_table_properties->key_smallest_seqno}, |
332 | 77.9k | {TablePropertiesNames::kDataBlockRestartInterval, |
333 | 77.9k | &new_table_properties->data_block_restart_interval}, |
334 | 77.9k | {TablePropertiesNames::kIndexBlockRestartInterval, |
335 | 77.9k | &new_table_properties->index_block_restart_interval}, |
336 | 77.9k | {TablePropertiesNames::kSeparateKeyValueInDataBlock, |
337 | 77.9k | &new_table_properties->separate_key_value_in_data_block}, |
338 | 77.9k | }; |
339 | | |
340 | 77.9k | Status s; |
341 | 77.9k | std::string last_key; |
342 | 2.78M | for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { |
343 | 2.70M | s = iter->status(); |
344 | 2.70M | if (!s.ok()) { |
345 | 0 | break; |
346 | 0 | } |
347 | | |
348 | 2.70M | auto key = iter->key().ToString(); |
349 | | // properties block should be strictly sorted with no duplicate key. |
350 | 2.70M | if (!last_key.empty() && |
351 | 2.64M | BytewiseComparator()->Compare(key, last_key) <= 0) { |
352 | 0 | s = Status::Corruption("properties unsorted"); |
353 | 0 | break; |
354 | 0 | } |
355 | 2.70M | last_key = key; |
356 | | |
357 | 2.70M | auto raw_val = iter->value(); |
358 | 2.70M | auto pos = predefined_uint64_properties.find(key); |
359 | | |
360 | 2.70M | if (key == ExternalSstFilePropertyNames::kGlobalSeqno) { |
361 | 0 | new_table_properties->external_sst_file_global_seqno_offset = |
362 | 0 | offset + iter->ValueOffset(); |
363 | 0 | } |
364 | | |
365 | 2.70M | if (pos != predefined_uint64_properties.end()) { |
366 | 2.00M | if (key == TablePropertiesNames::kDeletedKeys || |
367 | 1.93M | key == TablePropertiesNames::kMergeOperands) { |
368 | | // Insert in user-collected properties for API backwards compatibility |
369 | 155k | new_table_properties->user_collected_properties.insert( |
370 | 155k | {key, raw_val.ToString()}); |
371 | 155k | } |
372 | | // handle predefined rocksdb properties |
373 | 2.00M | uint64_t val; |
374 | 2.00M | if (!GetVarint64(&raw_val, &val)) { |
375 | | // skip malformed value |
376 | 0 | auto error_msg = |
377 | 0 | "Detect malformed value in properties meta-block:" |
378 | 0 | "\tkey: " + |
379 | 0 | key + "\tval: " + raw_val.ToString(); |
380 | 0 | ROCKS_LOG_ERROR(ioptions.logger, "%s", error_msg.c_str()); |
381 | 0 | continue; |
382 | 0 | } |
383 | 2.00M | *(pos->second) = val; |
384 | 2.00M | } else if (key == TablePropertiesNames::kDbId) { |
385 | 78.0k | new_table_properties->db_id = raw_val.ToString(); |
386 | 621k | } else if (key == TablePropertiesNames::kDbSessionId) { |
387 | 78.0k | new_table_properties->db_session_id = raw_val.ToString(); |
388 | 542k | } else if (key == TablePropertiesNames::kDbHostId) { |
389 | 78.0k | new_table_properties->db_host_id = raw_val.ToString(); |
390 | 464k | } else if (key == TablePropertiesNames::kFilterPolicy) { |
391 | 0 | new_table_properties->filter_policy_name = raw_val.ToString(); |
392 | 464k | } else if (key == TablePropertiesNames::kColumnFamilyName) { |
393 | 78.0k | new_table_properties->column_family_name = raw_val.ToString(); |
394 | 386k | } else if (key == TablePropertiesNames::kComparator) { |
395 | 78.0k | new_table_properties->comparator_name = raw_val.ToString(); |
396 | 308k | } else if (key == TablePropertiesNames::kMergeOperator) { |
397 | 78.0k | new_table_properties->merge_operator_name = raw_val.ToString(); |
398 | 230k | } else if (key == TablePropertiesNames::kPrefixExtractorName) { |
399 | 78.0k | new_table_properties->prefix_extractor_name = raw_val.ToString(); |
400 | 152k | } else if (key == TablePropertiesNames::kPropertyCollectors) { |
401 | 78.0k | new_table_properties->property_collectors_names = raw_val.ToString(); |
402 | 78.0k | } else if (key == TablePropertiesNames::kCompression) { |
403 | 78.0k | new_table_properties->compression_name = raw_val.ToString(); |
404 | 18.4E | } else if (key == TablePropertiesNames::kCompressionOptions) { |
405 | 77.9k | new_table_properties->compression_options = raw_val.ToString(); |
406 | 18.4E | } else if (key == TablePropertiesNames::kSequenceNumberTimeMapping) { |
407 | 0 | new_table_properties->seqno_to_time_mapping = raw_val.ToString(); |
408 | 18.4E | } else { |
409 | | // handle user-collected properties |
410 | 18.4E | new_table_properties->user_collected_properties.insert( |
411 | 18.4E | {key, raw_val.ToString()}); |
412 | 18.4E | } |
413 | 2.70M | } |
414 | | |
415 | 77.9k | return s; |
416 | 77.9k | } |
417 | | |
418 | | // FIXME: should be a parameter for reading table properties to use persistent |
419 | | // cache? |
420 | | Status ReadTablePropertiesHelper( |
421 | | const ReadOptions& ro, const BlockHandle& handle, |
422 | | RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, |
423 | | const Footer& footer, const ImmutableOptions& ioptions, |
424 | | std::unique_ptr<TableProperties>* table_properties, |
425 | 77.8k | MemoryAllocator* memory_allocator) { |
426 | 77.8k | assert(table_properties); |
427 | | |
428 | 77.8k | Status s; |
429 | 77.8k | bool retry = false; |
430 | 77.8k | while (true) { |
431 | 77.8k | BlockContents block_contents; |
432 | 77.8k | size_t len = handle.size() + footer.GetBlockTrailerSize(); |
433 | | // If this is an external SST file ingested with write_global_seqno set to |
434 | | // true, then we expect the checksum mismatch because checksum was written |
435 | | // by SstFileWriter, but its global seqno in the properties block may have |
436 | | // been changed during ingestion. For this reason, we initially read |
437 | | // and process without checksum verification, then later try checksum |
438 | | // verification so that if it fails, we can copy to a temporary buffer with |
439 | | // global seqno set to its original value, i.e. 0, and attempt checksum |
440 | | // verification again. |
441 | 77.8k | if (!retry) { |
442 | 77.7k | ReadOptions modified_ro = ro; |
443 | 77.7k | modified_ro.verify_checksums = false; |
444 | 77.7k | BlockFetcher block_fetcher( |
445 | 77.7k | file, prefetch_buffer, footer, modified_ro, handle, &block_contents, |
446 | 77.7k | ioptions, false /* decompress */, false /*maybe_compressed*/, |
447 | 77.7k | BlockType::kProperties, nullptr /*decompressor*/, |
448 | 77.7k | PersistentCacheOptions::kEmpty, memory_allocator); |
449 | 77.7k | s = block_fetcher.ReadBlockContents(); |
450 | 77.7k | if (!s.ok()) { |
451 | 0 | return s; |
452 | 0 | } |
453 | 77.7k | assert(block_fetcher.GetBlockSizeWithTrailer() == len); |
454 | 77.7k | TEST_SYNC_POINT_CALLBACK("ReadTablePropertiesHelper:0", |
455 | 77.7k | &block_contents.data); |
456 | 77.7k | } else { |
457 | 36 | assert(s.IsCorruption()); |
458 | | // If retrying, use a stronger file system read to check and correct |
459 | | // data corruption |
460 | 36 | IOOptions opts; |
461 | 36 | IODebugContext dbg; |
462 | 36 | if (PrepareIOFromReadOptions(ro, ioptions.clock, opts, &dbg) != |
463 | 36 | IOStatus::OK()) { |
464 | 0 | return s; |
465 | 0 | } |
466 | 36 | opts.verify_and_reconstruct_read = true; |
467 | 36 | std::unique_ptr<char[]> data(new char[len]); |
468 | 36 | Slice result; |
469 | 36 | IOStatus io_s = file->Read(opts, handle.offset(), len, &result, |
470 | 36 | data.get(), nullptr, &dbg); |
471 | 36 | RecordTick(ioptions.stats, FILE_READ_CORRUPTION_RETRY_COUNT); |
472 | 36 | if (!io_s.ok()) { |
473 | 0 | ROCKS_LOG_INFO(ioptions.info_log, |
474 | 0 | "Reading properties block failed - %s", |
475 | 0 | io_s.ToString().c_str()); |
476 | | // Return the original corruption error as that's more serious |
477 | 0 | return s; |
478 | 0 | } |
479 | 36 | if (result.size() < len) { |
480 | 0 | return Status::Corruption("Reading properties block failed - " + |
481 | 0 | std::to_string(result.size()) + |
482 | 0 | " bytes read"); |
483 | 0 | } |
484 | 36 | RecordTick(ioptions.stats, FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT); |
485 | 36 | block_contents = BlockContents(std::move(data), handle.size()); |
486 | 36 | } |
487 | | |
488 | 77.8k | uint64_t block_size = block_contents.data.size(); |
489 | 77.8k | Block properties_block(std::move(block_contents)); |
490 | 77.8k | std::unique_ptr<TableProperties> new_table_properties{new TableProperties}; |
491 | 77.8k | s = ParsePropertiesBlock(ioptions, handle.offset(), properties_block, |
492 | 77.8k | new_table_properties); |
493 | | |
494 | | // Modified version of BlockFetcher checksum verification |
495 | | // (See write_global_seqno comment above) |
496 | 77.9k | if (s.ok() && footer.GetBlockTrailerSize() > 0) { |
497 | 77.9k | s = VerifyBlockChecksum(footer, properties_block.data(), block_size, |
498 | 77.9k | file->file_name(), handle.offset(), |
499 | 77.9k | BlockType::kProperties); |
500 | 77.9k | if (s.IsCorruption()) { |
501 | 0 | if (new_table_properties->external_sst_file_global_seqno_offset != 0) { |
502 | 0 | std::string tmp_buf(properties_block.data(), len); |
503 | 0 | uint64_t global_seqno_offset = |
504 | 0 | new_table_properties->external_sst_file_global_seqno_offset - |
505 | 0 | handle.offset(); |
506 | 0 | EncodeFixed64(&tmp_buf[static_cast<size_t>(global_seqno_offset)], 0); |
507 | 0 | s = VerifyBlockChecksum(footer, tmp_buf.data(), block_size, |
508 | 0 | file->file_name(), handle.offset(), |
509 | 0 | BlockType::kProperties); |
510 | 0 | } |
511 | 0 | } |
512 | 77.9k | } |
513 | | |
514 | | // If we detected a corruption and the file system supports verification |
515 | | // and reconstruction, retry the read |
516 | 77.8k | if (s.IsCorruption() && !retry && |
517 | 0 | CheckFSFeatureSupport(ioptions.fs.get(), |
518 | 0 | FSSupportedOps::kVerifyAndReconstructRead)) { |
519 | 0 | retry = true; |
520 | 77.8k | } else { |
521 | 77.8k | if (s.ok()) { |
522 | 77.7k | *table_properties = std::move(new_table_properties); |
523 | 77.7k | } |
524 | 77.8k | break; |
525 | 77.8k | } |
526 | 77.8k | } |
527 | | |
528 | 77.8k | return s; |
529 | 77.8k | } |
530 | | |
531 | | Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, |
532 | | uint64_t table_magic_number, |
533 | | const ImmutableOptions& ioptions, |
534 | | const ReadOptions& read_options, |
535 | | std::unique_ptr<TableProperties>* properties, |
536 | | MemoryAllocator* memory_allocator, |
537 | 0 | FilePrefetchBuffer* prefetch_buffer) { |
538 | 0 | BlockHandle block_handle; |
539 | 0 | Footer footer; |
540 | 0 | Status s = |
541 | 0 | FindMetaBlockInFile(file, file_size, table_magic_number, ioptions, |
542 | 0 | read_options, kPropertiesBlockName, &block_handle, |
543 | 0 | memory_allocator, prefetch_buffer, &footer); |
544 | 0 | if (!s.ok()) { |
545 | 0 | return s; |
546 | 0 | } |
547 | | |
548 | 0 | if (!block_handle.IsNull()) { |
549 | 0 | s = ReadTablePropertiesHelper(read_options, block_handle, file, |
550 | 0 | prefetch_buffer, footer, ioptions, properties, |
551 | 0 | memory_allocator); |
552 | 0 | } else { |
553 | 0 | s = Status::NotFound(); |
554 | 0 | } |
555 | 0 | return s; |
556 | 0 | } |
557 | | |
558 | | Status FindOptionalMetaBlock(InternalIterator* meta_index_iter, |
559 | | const std::string& meta_block_name, |
560 | 309k | BlockHandle* block_handle) { |
561 | 309k | assert(block_handle != nullptr); |
562 | 309k | meta_index_iter->Seek(meta_block_name); |
563 | 309k | if (meta_index_iter->status().ok()) { |
564 | 304k | if (meta_index_iter->Valid() && meta_index_iter->key() == meta_block_name) { |
565 | 156k | Slice v = meta_index_iter->value(); |
566 | 156k | return block_handle->DecodeFrom(&v); |
567 | 156k | } |
568 | 304k | } |
569 | | // else |
570 | 153k | *block_handle = BlockHandle::NullBlockHandle(); |
571 | 153k | return meta_index_iter->status(); |
572 | 309k | } |
573 | | |
574 | | Status FindMetaBlock(InternalIterator* meta_index_iter, |
575 | | const std::string& meta_block_name, |
576 | 77.7k | BlockHandle* block_handle) { |
577 | 77.7k | Status s = |
578 | 77.7k | FindOptionalMetaBlock(meta_index_iter, meta_block_name, block_handle); |
579 | 77.7k | if (s.ok() && block_handle->IsNull()) { |
580 | 0 | return Status::Corruption("Cannot find the meta block", meta_block_name); |
581 | 77.7k | } else { |
582 | 77.7k | return s; |
583 | 77.7k | } |
584 | 77.7k | } |
585 | | |
586 | | Status ReadMetaIndexBlockInFile(RandomAccessFileReader* file, |
587 | | uint64_t file_size, uint64_t table_magic_number, |
588 | | const ImmutableOptions& ioptions, |
589 | | const ReadOptions& read_options, |
590 | | BlockContents* metaindex_contents, |
591 | | MemoryAllocator* memory_allocator, |
592 | | FilePrefetchBuffer* prefetch_buffer, |
593 | 0 | Footer* footer_out) { |
594 | 0 | Footer footer; |
595 | 0 | IOOptions opts; |
596 | 0 | IODebugContext dbg; |
597 | 0 | Status s; |
598 | 0 | s = file->PrepareIOOptions(read_options, opts, &dbg); |
599 | 0 | if (!s.ok()) { |
600 | 0 | return s; |
601 | 0 | } |
602 | 0 | s = ReadFooterFromFile(opts, file, *ioptions.fs, prefetch_buffer, file_size, |
603 | 0 | &footer, table_magic_number, ioptions.stats); |
604 | 0 | if (!s.ok()) { |
605 | 0 | return s; |
606 | 0 | } |
607 | 0 | if (footer_out) { |
608 | 0 | *footer_out = footer; |
609 | 0 | } |
610 | |
|
611 | 0 | auto metaindex_handle = footer.metaindex_handle(); |
612 | 0 | return BlockFetcher(file, prefetch_buffer, footer, read_options, |
613 | 0 | metaindex_handle, metaindex_contents, ioptions, |
614 | 0 | false /* do decompression */, false /*maybe_compressed*/, |
615 | 0 | BlockType::kMetaIndex, nullptr /*decompressor*/, |
616 | 0 | PersistentCacheOptions::kEmpty, memory_allocator) |
617 | 0 | .ReadBlockContents(); |
618 | 0 | } |
619 | | |
620 | | Status FindMetaBlockInFile( |
621 | | RandomAccessFileReader* file, uint64_t file_size, |
622 | | uint64_t table_magic_number, const ImmutableOptions& ioptions, |
623 | | const ReadOptions& read_options, const std::string& meta_block_name, |
624 | | BlockHandle* block_handle, MemoryAllocator* memory_allocator, |
625 | 0 | FilePrefetchBuffer* prefetch_buffer, Footer* footer_out) { |
626 | 0 | BlockContents metaindex_contents; |
627 | 0 | auto s = ReadMetaIndexBlockInFile( |
628 | 0 | file, file_size, table_magic_number, ioptions, read_options, |
629 | 0 | &metaindex_contents, memory_allocator, prefetch_buffer, footer_out); |
630 | 0 | if (!s.ok()) { |
631 | 0 | return s; |
632 | 0 | } |
633 | | // meta blocks are never compressed. Need to add uncompress logic if we are to |
634 | | // compress it. |
635 | 0 | Block metaindex_block(std::move(metaindex_contents)); |
636 | |
|
637 | 0 | std::unique_ptr<InternalIterator> meta_iter; |
638 | 0 | meta_iter.reset(metaindex_block.NewMetaIterator()); |
639 | |
|
640 | 0 | return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle); |
641 | 0 | } |
642 | | |
643 | | Status ReadMetaBlock(RandomAccessFileReader* file, |
644 | | FilePrefetchBuffer* prefetch_buffer, uint64_t file_size, |
645 | | uint64_t table_magic_number, |
646 | | const ImmutableOptions& ioptions, |
647 | | const ReadOptions& read_options, |
648 | | const std::string& meta_block_name, BlockType block_type, |
649 | | BlockContents* contents, |
650 | 0 | MemoryAllocator* memory_allocator) { |
651 | | // TableProperties requires special handling because of checksum issues. |
652 | | // Call ReadTableProperties instead for that case. |
653 | 0 | assert(block_type != BlockType::kProperties); |
654 | |
|
655 | 0 | BlockHandle block_handle; |
656 | 0 | Footer footer; |
657 | 0 | Status status = |
658 | 0 | FindMetaBlockInFile(file, file_size, table_magic_number, ioptions, |
659 | 0 | read_options, meta_block_name, &block_handle, |
660 | 0 | memory_allocator, prefetch_buffer, &footer); |
661 | 0 | if (!status.ok()) { |
662 | 0 | return status; |
663 | 0 | } |
664 | | |
665 | 0 | return BlockFetcher(file, prefetch_buffer, footer, read_options, block_handle, |
666 | 0 | contents, ioptions, false /* decompress */, |
667 | 0 | false /*maybe_compressed*/, block_type, |
668 | 0 | nullptr /*decompressor*/, PersistentCacheOptions::kEmpty, |
669 | 0 | memory_allocator) |
670 | 0 | .ReadBlockContents(); |
671 | 0 | } |
672 | | |
673 | | } // namespace ROCKSDB_NAMESPACE |