Coverage Report

Created: 2026-03-31 07:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rocksdb/table/meta_blocks.cc
Line
Count
Source
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under both the GPLv2 (found in the
3
//  COPYING file in the root directory) and Apache 2.0 License
4
//  (found in the LICENSE.Apache file in the root directory).
5
#include "table/meta_blocks.h"
6
7
#include <map>
8
#include <string>
9
10
#include "block_fetcher.h"
11
#include "db/table_properties_collector.h"
12
#include "file/random_access_file_reader.h"
13
#include "logging/logging.h"
14
#include "rocksdb/options.h"
15
#include "rocksdb/table.h"
16
#include "rocksdb/table_properties.h"
17
#include "table/block_based/block.h"
18
#include "table/block_based/reader_common.h"
19
#include "table/format.h"
20
#include "table/internal_iterator.h"
21
#include "table/persistent_cache_helper.h"
22
#include "table/sst_file_writer_collectors.h"
23
#include "table/table_properties_internal.h"
24
#include "test_util/sync_point.h"
25
#include "util/coding.h"
26
27
namespace ROCKSDB_NAMESPACE {
28
29
const std::string kPropertiesBlockName = "rocksdb.properties";
30
// NB: only used with format_version >= 6
31
const std::string kIndexBlockName = "rocksdb.index";
32
const std::string kCompressionDictBlockName = "rocksdb.compression_dict";
33
const std::string kRangeDelBlockName = "rocksdb.range_del";
34
35
MetaIndexBuilder::MetaIndexBuilder()
36
16.9k
    : meta_index_block_(new BlockBuilder(1 /* restart interval */)) {}
37
38
36.4k
void MetaIndexBuilder::Add(const std::string& key, const BlockHandle& handle) {
39
36.4k
  std::string handle_encoding;
40
36.4k
  handle.EncodeTo(&handle_encoding);
41
36.4k
  meta_block_handles_.insert({key, handle_encoding});
42
36.4k
}
43
44
16.9k
Slice MetaIndexBuilder::Finish() {
45
36.4k
  for (const auto& metablock : meta_block_handles_) {
46
36.4k
    meta_index_block_->Add(metablock.first, metablock.second);
47
36.4k
  }
48
16.9k
  return meta_index_block_->Finish();
49
16.9k
}
50
51
// Property block will be read sequentially and cached in a heap located
52
// object, so there's no need for restart points. Thus we set the restart
53
// interval to infinity to save space.
54
PropertyBlockBuilder::PropertyBlockBuilder()
55
16.9k
    : properties_block_(new BlockBuilder(
56
16.9k
          std::numeric_limits<int32_t>::max() /* restart interval */)) {}
57
58
void PropertyBlockBuilder::Add(const std::string& name,
59
681k
                               const std::string& val) {
60
681k
  assert(props_.find(name) == props_.end());
61
681k
  props_.insert({name, val});
62
681k
}
63
64
444k
void PropertyBlockBuilder::Add(const std::string& name, uint64_t val) {
65
444k
  std::string dst;
66
444k
  PutVarint64(&dst, val);
67
68
444k
  Add(name, dst);
69
444k
}
70
71
void PropertyBlockBuilder::Add(
72
16.9k
    const UserCollectedProperties& user_collected_properties) {
73
67.8k
  for (const auto& prop : user_collected_properties) {
74
67.8k
    Add(prop.first, prop.second);
75
67.8k
  }
76
16.9k
}
77
78
16.9k
void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) {
79
16.9k
  TEST_SYNC_POINT_CALLBACK("PropertyBlockBuilder::AddTableProperty:Start",
80
16.9k
                           const_cast<TableProperties*>(&props));
81
82
16.9k
  Add(TablePropertiesNames::kOriginalFileNumber, props.orig_file_number);
83
16.9k
  Add(TablePropertiesNames::kRawKeySize, props.raw_key_size);
84
16.9k
  Add(TablePropertiesNames::kRawValueSize, props.raw_value_size);
85
16.9k
  Add(TablePropertiesNames::kDataSize, props.data_size);
86
16.9k
  Add(TablePropertiesNames::kIndexSize, props.index_size);
87
16.9k
  if (props.index_partitions != 0) {
88
0
    Add(TablePropertiesNames::kIndexPartitions, props.index_partitions);
89
0
    Add(TablePropertiesNames::kTopLevelIndexSize, props.top_level_index_size);
90
0
  }
91
16.9k
  Add(TablePropertiesNames::kIndexKeyIsUserKey, props.index_key_is_user_key);
92
16.9k
  Add(TablePropertiesNames::kIndexValueIsDeltaEncoded,
93
16.9k
      props.index_value_is_delta_encoded);
94
16.9k
  Add(TablePropertiesNames::kNumEntries, props.num_entries);
95
16.9k
  Add(TablePropertiesNames::kNumFilterEntries, props.num_filter_entries);
96
16.9k
  Add(TablePropertiesNames::kDeletedKeys, props.num_deletions);
97
16.9k
  Add(TablePropertiesNames::kMergeOperands, props.num_merge_operands);
98
16.9k
  Add(TablePropertiesNames::kNumRangeDeletions, props.num_range_deletions);
99
16.9k
  Add(TablePropertiesNames::kNumDataBlocks, props.num_data_blocks);
100
16.9k
  Add(TablePropertiesNames::kNumUniformBlocks, props.num_uniform_blocks);
101
16.9k
  Add(TablePropertiesNames::kFilterSize, props.filter_size);
102
16.9k
  Add(TablePropertiesNames::kFormatVersion, props.format_version);
103
16.9k
  Add(TablePropertiesNames::kFixedKeyLen, props.fixed_key_len);
104
16.9k
  Add(TablePropertiesNames::kColumnFamilyId, props.column_family_id);
105
16.9k
  Add(TablePropertiesNames::kCreationTime, props.creation_time);
106
16.9k
  Add(TablePropertiesNames::kOldestKeyTime, props.oldest_key_time);
107
16.9k
  Add(TablePropertiesNames::kNewestKeyTime, props.newest_key_time);
108
16.9k
  if (props.file_creation_time > 0) {
109
2.92k
    Add(TablePropertiesNames::kFileCreationTime, props.file_creation_time);
110
2.92k
  }
111
16.9k
  if (props.slow_compression_estimated_data_size > 0) {
112
0
    Add(TablePropertiesNames::kSlowCompressionEstimatedDataSize,
113
0
        props.slow_compression_estimated_data_size);
114
0
  }
115
16.9k
  if (props.fast_compression_estimated_data_size > 0) {
116
0
    Add(TablePropertiesNames::kFastCompressionEstimatedDataSize,
117
0
        props.fast_compression_estimated_data_size);
118
0
  }
119
16.9k
  Add(TablePropertiesNames::kTailStartOffset, props.tail_start_offset);
120
16.9k
  if (props.user_defined_timestamps_persisted == 0) {
121
0
    Add(TablePropertiesNames::kUserDefinedTimestampsPersisted,
122
0
        props.user_defined_timestamps_persisted);
123
0
  }
124
16.9k
  if (!props.db_id.empty()) {
125
16.9k
    Add(TablePropertiesNames::kDbId, props.db_id);
126
16.9k
  }
127
16.9k
  if (!props.db_session_id.empty()) {
128
16.9k
    Add(TablePropertiesNames::kDbSessionId, props.db_session_id);
129
16.9k
  }
130
16.9k
  if (!props.db_host_id.empty()) {
131
16.9k
    Add(TablePropertiesNames::kDbHostId, props.db_host_id);
132
16.9k
  }
133
134
16.9k
  if (!props.filter_policy_name.empty()) {
135
0
    Add(TablePropertiesNames::kFilterPolicy, props.filter_policy_name);
136
0
  }
137
16.9k
  if (!props.comparator_name.empty()) {
138
16.9k
    Add(TablePropertiesNames::kComparator, props.comparator_name);
139
16.9k
  }
140
141
16.9k
  if (!props.merge_operator_name.empty()) {
142
16.9k
    Add(TablePropertiesNames::kMergeOperator, props.merge_operator_name);
143
16.9k
  }
144
16.9k
  if (!props.prefix_extractor_name.empty()) {
145
16.9k
    Add(TablePropertiesNames::kPrefixExtractorName,
146
16.9k
        props.prefix_extractor_name);
147
16.9k
  }
148
16.9k
  if (!props.property_collectors_names.empty()) {
149
16.9k
    Add(TablePropertiesNames::kPropertyCollectors,
150
16.9k
        props.property_collectors_names);
151
16.9k
  }
152
16.9k
  if (!props.column_family_name.empty()) {
153
16.9k
    Add(TablePropertiesNames::kColumnFamilyName, props.column_family_name);
154
16.9k
  }
155
156
16.9k
  if (!props.compression_name.empty()) {
157
16.9k
    Add(TablePropertiesNames::kCompression, props.compression_name);
158
16.9k
  }
159
16.9k
  if (!props.compression_options.empty()) {
160
16.9k
    Add(TablePropertiesNames::kCompressionOptions, props.compression_options);
161
16.9k
  }
162
16.9k
  if (!props.seqno_to_time_mapping.empty()) {
163
0
    Add(TablePropertiesNames::kSequenceNumberTimeMapping,
164
0
        props.seqno_to_time_mapping);
165
0
  }
166
16.9k
  if (props.key_largest_seqno != UINT64_MAX) {
167
16.9k
    Add(TablePropertiesNames::kKeyLargestSeqno, props.key_largest_seqno);
168
16.9k
  }
169
16.9k
  if (props.key_smallest_seqno != UINT64_MAX) {
170
16.9k
    Add(TablePropertiesNames::kKeySmallestSeqno, props.key_smallest_seqno);
171
16.9k
  }
172
16.9k
  if (props.data_block_restart_interval > 0) {
173
16.9k
    Add(TablePropertiesNames::kDataBlockRestartInterval,
174
16.9k
        props.data_block_restart_interval);
175
16.9k
  }
176
16.9k
  if (props.index_block_restart_interval > 0) {
177
16.9k
    Add(TablePropertiesNames::kIndexBlockRestartInterval,
178
16.9k
        props.index_block_restart_interval);
179
16.9k
  }
180
16.9k
  if (props.separate_key_value_in_data_block > 0) {
181
0
    Add(TablePropertiesNames::kSeparateKeyValueInDataBlock,
182
0
        props.separate_key_value_in_data_block);
183
0
  }
184
16.9k
}
185
186
16.9k
Slice PropertyBlockBuilder::Finish() {
187
681k
  for (const auto& prop : props_) {
188
681k
    assert(last_prop_added_to_block_.empty() ||
189
681k
           comparator_->Compare(prop.first, last_prop_added_to_block_) > 0);
190
681k
    properties_block_->Add(prop.first, prop.second);
191
#ifndef NDEBUG
192
    last_prop_added_to_block_ = prop.first;
193
#endif /* !NDEBUG */
194
681k
  }
195
196
16.9k
  return properties_block_->Finish();
197
16.9k
}
198
199
void LogPropertiesCollectionError(Logger* info_log, const std::string& method,
200
0
                                  const std::string& name) {
201
0
  assert(method == "Add" || method == "Finish");
202
203
0
  std::string msg =
204
0
      "Encountered error when calling TablePropertiesCollector::" + method +
205
0
      "() with collector name: " + name;
206
0
  ROCKS_LOG_ERROR(info_log, "%s", msg.c_str());
207
0
}
208
209
bool NotifyCollectTableCollectorsOnAdd(
210
    const Slice& key, const Slice& value, uint64_t file_size,
211
    const std::vector<std::unique_ptr<InternalTblPropColl>>& collectors,
212
109k
    Logger* info_log) {
213
109k
  bool all_succeeded = true;
214
109k
  for (auto& collector : collectors) {
215
109k
    Status s = collector->InternalAdd(key, value, file_size);
216
109k
    all_succeeded = all_succeeded && s.ok();
217
109k
    if (!s.ok()) {
218
0
      LogPropertiesCollectionError(info_log, "Add" /* method */,
219
0
                                   collector->Name());
220
0
    }
221
109k
  }
222
109k
  return all_succeeded;
223
109k
}
224
225
void NotifyCollectTableCollectorsOnBlockAdd(
226
    const std::vector<std::unique_ptr<InternalTblPropColl>>& collectors,
227
    const uint64_t block_uncomp_bytes,
228
    const uint64_t block_compressed_bytes_fast,
229
19.3k
    const uint64_t block_compressed_bytes_slow) {
230
19.3k
  for (auto& collector : collectors) {
231
19.3k
    collector->BlockAdd(block_uncomp_bytes, block_compressed_bytes_fast,
232
19.3k
                        block_compressed_bytes_slow);
233
19.3k
  }
234
19.3k
}
235
236
bool NotifyCollectTableCollectorsOnFinish(
237
    const std::vector<std::unique_ptr<InternalTblPropColl>>& collectors,
238
    Logger* info_log, PropertyBlockBuilder* builder,
239
    UserCollectedProperties& user_collected_properties,
240
16.9k
    UserCollectedProperties& readable_properties) {
241
16.9k
  bool all_succeeded = true;
242
16.9k
  for (auto& collector : collectors) {
243
16.9k
    UserCollectedProperties user_properties;
244
16.9k
    Status s = collector->Finish(&user_properties);
245
16.9k
    if (s.ok()) {
246
16.9k
      for (const auto& prop : collector->GetReadableProperties()) {
247
0
        readable_properties.insert(prop);
248
0
      }
249
#ifndef NDEBUG
250
      // Check different user properties collectors are not adding properties of
251
      // the same name.
252
      for (const auto& pair : user_properties) {
253
        assert(user_collected_properties.find(pair.first) ==
254
               user_collected_properties.end());
255
      }
256
#endif /* !NDEBUG */
257
16.9k
      user_collected_properties.merge(user_properties);
258
16.9k
    } else {
259
2
      LogPropertiesCollectionError(info_log, "Finish" /* method */,
260
2
                                   collector->Name());
261
2
      if (all_succeeded) {
262
0
        all_succeeded = false;
263
0
      }
264
2
    }
265
16.9k
  }
266
16.9k
  builder->Add(user_collected_properties);
267
16.9k
  return all_succeeded;
268
16.9k
}
269
270
Status ParsePropertiesBlock(
271
    const ImmutableOptions& ioptions, uint64_t offset, Block& properties_block,
272
77.9k
    std::unique_ptr<TableProperties>& new_table_properties) {
273
77.9k
  std::unique_ptr<MetaBlockIter> iter(properties_block.NewMetaIterator());
274
275
  //  All pre-defined properties of type uint64_t
276
77.9k
  std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = {
277
77.9k
      {TablePropertiesNames::kOriginalFileNumber,
278
77.9k
       &new_table_properties->orig_file_number},
279
77.9k
      {TablePropertiesNames::kDataSize, &new_table_properties->data_size},
280
77.9k
      {TablePropertiesNames::kIndexSize, &new_table_properties->index_size},
281
77.9k
      {TablePropertiesNames::kIndexPartitions,
282
77.9k
       &new_table_properties->index_partitions},
283
77.9k
      {TablePropertiesNames::kTopLevelIndexSize,
284
77.9k
       &new_table_properties->top_level_index_size},
285
77.9k
      {TablePropertiesNames::kIndexKeyIsUserKey,
286
77.9k
       &new_table_properties->index_key_is_user_key},
287
77.9k
      {TablePropertiesNames::kIndexValueIsDeltaEncoded,
288
77.9k
       &new_table_properties->index_value_is_delta_encoded},
289
77.9k
      {TablePropertiesNames::kFilterSize, &new_table_properties->filter_size},
290
77.9k
      {TablePropertiesNames::kRawKeySize, &new_table_properties->raw_key_size},
291
77.9k
      {TablePropertiesNames::kRawValueSize,
292
77.9k
       &new_table_properties->raw_value_size},
293
77.9k
      {TablePropertiesNames::kNumDataBlocks,
294
77.9k
       &new_table_properties->num_data_blocks},
295
77.9k
      {TablePropertiesNames::kNumUniformBlocks,
296
77.9k
       &new_table_properties->num_uniform_blocks},
297
77.9k
      {TablePropertiesNames::kNumEntries, &new_table_properties->num_entries},
298
77.9k
      {TablePropertiesNames::kNumFilterEntries,
299
77.9k
       &new_table_properties->num_filter_entries},
300
77.9k
      {TablePropertiesNames::kDeletedKeys,
301
77.9k
       &new_table_properties->num_deletions},
302
77.9k
      {TablePropertiesNames::kMergeOperands,
303
77.9k
       &new_table_properties->num_merge_operands},
304
77.9k
      {TablePropertiesNames::kNumRangeDeletions,
305
77.9k
       &new_table_properties->num_range_deletions},
306
77.9k
      {TablePropertiesNames::kFormatVersion,
307
77.9k
       &new_table_properties->format_version},
308
77.9k
      {TablePropertiesNames::kFixedKeyLen,
309
77.9k
       &new_table_properties->fixed_key_len},
310
77.9k
      {TablePropertiesNames::kColumnFamilyId,
311
77.9k
       &new_table_properties->column_family_id},
312
77.9k
      {TablePropertiesNames::kCreationTime,
313
77.9k
       &new_table_properties->creation_time},
314
77.9k
      {TablePropertiesNames::kOldestKeyTime,
315
77.9k
       &new_table_properties->oldest_key_time},
316
77.9k
      {TablePropertiesNames::kNewestKeyTime,
317
77.9k
       &new_table_properties->newest_key_time},
318
77.9k
      {TablePropertiesNames::kFileCreationTime,
319
77.9k
       &new_table_properties->file_creation_time},
320
77.9k
      {TablePropertiesNames::kSlowCompressionEstimatedDataSize,
321
77.9k
       &new_table_properties->slow_compression_estimated_data_size},
322
77.9k
      {TablePropertiesNames::kFastCompressionEstimatedDataSize,
323
77.9k
       &new_table_properties->fast_compression_estimated_data_size},
324
77.9k
      {TablePropertiesNames::kTailStartOffset,
325
77.9k
       &new_table_properties->tail_start_offset},
326
77.9k
      {TablePropertiesNames::kUserDefinedTimestampsPersisted,
327
77.9k
       &new_table_properties->user_defined_timestamps_persisted},
328
77.9k
      {TablePropertiesNames::kKeyLargestSeqno,
329
77.9k
       &new_table_properties->key_largest_seqno},
330
77.9k
      {TablePropertiesNames::kKeySmallestSeqno,
331
77.9k
       &new_table_properties->key_smallest_seqno},
332
77.9k
      {TablePropertiesNames::kDataBlockRestartInterval,
333
77.9k
       &new_table_properties->data_block_restart_interval},
334
77.9k
      {TablePropertiesNames::kIndexBlockRestartInterval,
335
77.9k
       &new_table_properties->index_block_restart_interval},
336
77.9k
      {TablePropertiesNames::kSeparateKeyValueInDataBlock,
337
77.9k
       &new_table_properties->separate_key_value_in_data_block},
338
77.9k
  };
339
340
77.9k
  Status s;
341
77.9k
  std::string last_key;
342
2.78M
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
343
2.70M
    s = iter->status();
344
2.70M
    if (!s.ok()) {
345
0
      break;
346
0
    }
347
348
2.70M
    auto key = iter->key().ToString();
349
    // properties block should be strictly sorted with no duplicate key.
350
2.70M
    if (!last_key.empty() &&
351
2.64M
        BytewiseComparator()->Compare(key, last_key) <= 0) {
352
0
      s = Status::Corruption("properties unsorted");
353
0
      break;
354
0
    }
355
2.70M
    last_key = key;
356
357
2.70M
    auto raw_val = iter->value();
358
2.70M
    auto pos = predefined_uint64_properties.find(key);
359
360
2.70M
    if (key == ExternalSstFilePropertyNames::kGlobalSeqno) {
361
0
      new_table_properties->external_sst_file_global_seqno_offset =
362
0
          offset + iter->ValueOffset();
363
0
    }
364
365
2.70M
    if (pos != predefined_uint64_properties.end()) {
366
2.00M
      if (key == TablePropertiesNames::kDeletedKeys ||
367
1.93M
          key == TablePropertiesNames::kMergeOperands) {
368
        // Insert in user-collected properties for API backwards compatibility
369
155k
        new_table_properties->user_collected_properties.insert(
370
155k
            {key, raw_val.ToString()});
371
155k
      }
372
      // handle predefined rocksdb properties
373
2.00M
      uint64_t val;
374
2.00M
      if (!GetVarint64(&raw_val, &val)) {
375
        // skip malformed value
376
0
        auto error_msg =
377
0
            "Detect malformed value in properties meta-block:"
378
0
            "\tkey: " +
379
0
            key + "\tval: " + raw_val.ToString();
380
0
        ROCKS_LOG_ERROR(ioptions.logger, "%s", error_msg.c_str());
381
0
        continue;
382
0
      }
383
2.00M
      *(pos->second) = val;
384
2.00M
    } else if (key == TablePropertiesNames::kDbId) {
385
78.0k
      new_table_properties->db_id = raw_val.ToString();
386
621k
    } else if (key == TablePropertiesNames::kDbSessionId) {
387
78.0k
      new_table_properties->db_session_id = raw_val.ToString();
388
542k
    } else if (key == TablePropertiesNames::kDbHostId) {
389
78.0k
      new_table_properties->db_host_id = raw_val.ToString();
390
464k
    } else if (key == TablePropertiesNames::kFilterPolicy) {
391
0
      new_table_properties->filter_policy_name = raw_val.ToString();
392
464k
    } else if (key == TablePropertiesNames::kColumnFamilyName) {
393
78.0k
      new_table_properties->column_family_name = raw_val.ToString();
394
386k
    } else if (key == TablePropertiesNames::kComparator) {
395
78.0k
      new_table_properties->comparator_name = raw_val.ToString();
396
308k
    } else if (key == TablePropertiesNames::kMergeOperator) {
397
78.0k
      new_table_properties->merge_operator_name = raw_val.ToString();
398
230k
    } else if (key == TablePropertiesNames::kPrefixExtractorName) {
399
78.0k
      new_table_properties->prefix_extractor_name = raw_val.ToString();
400
152k
    } else if (key == TablePropertiesNames::kPropertyCollectors) {
401
78.0k
      new_table_properties->property_collectors_names = raw_val.ToString();
402
78.0k
    } else if (key == TablePropertiesNames::kCompression) {
403
78.0k
      new_table_properties->compression_name = raw_val.ToString();
404
18.4E
    } else if (key == TablePropertiesNames::kCompressionOptions) {
405
77.9k
      new_table_properties->compression_options = raw_val.ToString();
406
18.4E
    } else if (key == TablePropertiesNames::kSequenceNumberTimeMapping) {
407
0
      new_table_properties->seqno_to_time_mapping = raw_val.ToString();
408
18.4E
    } else {
409
      // handle user-collected properties
410
18.4E
      new_table_properties->user_collected_properties.insert(
411
18.4E
          {key, raw_val.ToString()});
412
18.4E
    }
413
2.70M
  }
414
415
77.9k
  return s;
416
77.9k
}
417
418
// FIXME: should be a parameter for reading table properties to use persistent
419
// cache?
420
Status ReadTablePropertiesHelper(
421
    const ReadOptions& ro, const BlockHandle& handle,
422
    RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer,
423
    const Footer& footer, const ImmutableOptions& ioptions,
424
    std::unique_ptr<TableProperties>* table_properties,
425
77.8k
    MemoryAllocator* memory_allocator) {
426
77.8k
  assert(table_properties);
427
428
77.8k
  Status s;
429
77.8k
  bool retry = false;
430
77.8k
  while (true) {
431
77.8k
    BlockContents block_contents;
432
77.8k
    size_t len = handle.size() + footer.GetBlockTrailerSize();
433
    // If this is an external SST file ingested with write_global_seqno set to
434
    // true, then we expect the checksum mismatch because checksum was written
435
    // by SstFileWriter, but its global seqno in the properties block may have
436
    // been changed during ingestion. For this reason, we initially read
437
    // and process without checksum verification, then later try checksum
438
    // verification so that if it fails, we can copy to a temporary buffer with
439
    // global seqno set to its original value, i.e. 0, and attempt checksum
440
    // verification again.
441
77.8k
    if (!retry) {
442
77.7k
      ReadOptions modified_ro = ro;
443
77.7k
      modified_ro.verify_checksums = false;
444
77.7k
      BlockFetcher block_fetcher(
445
77.7k
          file, prefetch_buffer, footer, modified_ro, handle, &block_contents,
446
77.7k
          ioptions, false /* decompress */, false /*maybe_compressed*/,
447
77.7k
          BlockType::kProperties, nullptr /*decompressor*/,
448
77.7k
          PersistentCacheOptions::kEmpty, memory_allocator);
449
77.7k
      s = block_fetcher.ReadBlockContents();
450
77.7k
      if (!s.ok()) {
451
0
        return s;
452
0
      }
453
77.7k
      assert(block_fetcher.GetBlockSizeWithTrailer() == len);
454
77.7k
      TEST_SYNC_POINT_CALLBACK("ReadTablePropertiesHelper:0",
455
77.7k
                               &block_contents.data);
456
77.7k
    } else {
457
36
      assert(s.IsCorruption());
458
      // If retrying, use a stronger file system read to check and correct
459
      // data corruption
460
36
      IOOptions opts;
461
36
      IODebugContext dbg;
462
36
      if (PrepareIOFromReadOptions(ro, ioptions.clock, opts, &dbg) !=
463
36
          IOStatus::OK()) {
464
0
        return s;
465
0
      }
466
36
      opts.verify_and_reconstruct_read = true;
467
36
      std::unique_ptr<char[]> data(new char[len]);
468
36
      Slice result;
469
36
      IOStatus io_s = file->Read(opts, handle.offset(), len, &result,
470
36
                                 data.get(), nullptr, &dbg);
471
36
      RecordTick(ioptions.stats, FILE_READ_CORRUPTION_RETRY_COUNT);
472
36
      if (!io_s.ok()) {
473
0
        ROCKS_LOG_INFO(ioptions.info_log,
474
0
                       "Reading properties block failed - %s",
475
0
                       io_s.ToString().c_str());
476
        // Return the original corruption error as that's more serious
477
0
        return s;
478
0
      }
479
36
      if (result.size() < len) {
480
0
        return Status::Corruption("Reading properties block failed - " +
481
0
                                  std::to_string(result.size()) +
482
0
                                  " bytes read");
483
0
      }
484
36
      RecordTick(ioptions.stats, FILE_READ_CORRUPTION_RETRY_SUCCESS_COUNT);
485
36
      block_contents = BlockContents(std::move(data), handle.size());
486
36
    }
487
488
77.8k
    uint64_t block_size = block_contents.data.size();
489
77.8k
    Block properties_block(std::move(block_contents));
490
77.8k
    std::unique_ptr<TableProperties> new_table_properties{new TableProperties};
491
77.8k
    s = ParsePropertiesBlock(ioptions, handle.offset(), properties_block,
492
77.8k
                             new_table_properties);
493
494
    // Modified version of BlockFetcher checksum verification
495
    // (See write_global_seqno comment above)
496
77.9k
    if (s.ok() && footer.GetBlockTrailerSize() > 0) {
497
77.9k
      s = VerifyBlockChecksum(footer, properties_block.data(), block_size,
498
77.9k
                              file->file_name(), handle.offset(),
499
77.9k
                              BlockType::kProperties);
500
77.9k
      if (s.IsCorruption()) {
501
0
        if (new_table_properties->external_sst_file_global_seqno_offset != 0) {
502
0
          std::string tmp_buf(properties_block.data(), len);
503
0
          uint64_t global_seqno_offset =
504
0
              new_table_properties->external_sst_file_global_seqno_offset -
505
0
              handle.offset();
506
0
          EncodeFixed64(&tmp_buf[static_cast<size_t>(global_seqno_offset)], 0);
507
0
          s = VerifyBlockChecksum(footer, tmp_buf.data(), block_size,
508
0
                                  file->file_name(), handle.offset(),
509
0
                                  BlockType::kProperties);
510
0
        }
511
0
      }
512
77.9k
    }
513
514
    // If we detected a corruption and the file system supports verification
515
    // and reconstruction, retry the read
516
77.8k
    if (s.IsCorruption() && !retry &&
517
0
        CheckFSFeatureSupport(ioptions.fs.get(),
518
0
                              FSSupportedOps::kVerifyAndReconstructRead)) {
519
0
      retry = true;
520
77.8k
    } else {
521
77.8k
      if (s.ok()) {
522
77.7k
        *table_properties = std::move(new_table_properties);
523
77.7k
      }
524
77.8k
      break;
525
77.8k
    }
526
77.8k
  }
527
528
77.8k
  return s;
529
77.8k
}
530
531
Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
532
                           uint64_t table_magic_number,
533
                           const ImmutableOptions& ioptions,
534
                           const ReadOptions& read_options,
535
                           std::unique_ptr<TableProperties>* properties,
536
                           MemoryAllocator* memory_allocator,
537
0
                           FilePrefetchBuffer* prefetch_buffer) {
538
0
  BlockHandle block_handle;
539
0
  Footer footer;
540
0
  Status s =
541
0
      FindMetaBlockInFile(file, file_size, table_magic_number, ioptions,
542
0
                          read_options, kPropertiesBlockName, &block_handle,
543
0
                          memory_allocator, prefetch_buffer, &footer);
544
0
  if (!s.ok()) {
545
0
    return s;
546
0
  }
547
548
0
  if (!block_handle.IsNull()) {
549
0
    s = ReadTablePropertiesHelper(read_options, block_handle, file,
550
0
                                  prefetch_buffer, footer, ioptions, properties,
551
0
                                  memory_allocator);
552
0
  } else {
553
0
    s = Status::NotFound();
554
0
  }
555
0
  return s;
556
0
}
557
558
Status FindOptionalMetaBlock(InternalIterator* meta_index_iter,
559
                             const std::string& meta_block_name,
560
309k
                             BlockHandle* block_handle) {
561
309k
  assert(block_handle != nullptr);
562
309k
  meta_index_iter->Seek(meta_block_name);
563
309k
  if (meta_index_iter->status().ok()) {
564
304k
    if (meta_index_iter->Valid() && meta_index_iter->key() == meta_block_name) {
565
156k
      Slice v = meta_index_iter->value();
566
156k
      return block_handle->DecodeFrom(&v);
567
156k
    }
568
304k
  }
569
  // else
570
153k
  *block_handle = BlockHandle::NullBlockHandle();
571
153k
  return meta_index_iter->status();
572
309k
}
573
574
Status FindMetaBlock(InternalIterator* meta_index_iter,
575
                     const std::string& meta_block_name,
576
77.7k
                     BlockHandle* block_handle) {
577
77.7k
  Status s =
578
77.7k
      FindOptionalMetaBlock(meta_index_iter, meta_block_name, block_handle);
579
77.7k
  if (s.ok() && block_handle->IsNull()) {
580
0
    return Status::Corruption("Cannot find the meta block", meta_block_name);
581
77.7k
  } else {
582
77.7k
    return s;
583
77.7k
  }
584
77.7k
}
585
586
Status ReadMetaIndexBlockInFile(RandomAccessFileReader* file,
587
                                uint64_t file_size, uint64_t table_magic_number,
588
                                const ImmutableOptions& ioptions,
589
                                const ReadOptions& read_options,
590
                                BlockContents* metaindex_contents,
591
                                MemoryAllocator* memory_allocator,
592
                                FilePrefetchBuffer* prefetch_buffer,
593
0
                                Footer* footer_out) {
594
0
  Footer footer;
595
0
  IOOptions opts;
596
0
  IODebugContext dbg;
597
0
  Status s;
598
0
  s = file->PrepareIOOptions(read_options, opts, &dbg);
599
0
  if (!s.ok()) {
600
0
    return s;
601
0
  }
602
0
  s = ReadFooterFromFile(opts, file, *ioptions.fs, prefetch_buffer, file_size,
603
0
                         &footer, table_magic_number, ioptions.stats);
604
0
  if (!s.ok()) {
605
0
    return s;
606
0
  }
607
0
  if (footer_out) {
608
0
    *footer_out = footer;
609
0
  }
610
611
0
  auto metaindex_handle = footer.metaindex_handle();
612
0
  return BlockFetcher(file, prefetch_buffer, footer, read_options,
613
0
                      metaindex_handle, metaindex_contents, ioptions,
614
0
                      false /* do decompression */, false /*maybe_compressed*/,
615
0
                      BlockType::kMetaIndex, nullptr /*decompressor*/,
616
0
                      PersistentCacheOptions::kEmpty, memory_allocator)
617
0
      .ReadBlockContents();
618
0
}
619
620
Status FindMetaBlockInFile(
621
    RandomAccessFileReader* file, uint64_t file_size,
622
    uint64_t table_magic_number, const ImmutableOptions& ioptions,
623
    const ReadOptions& read_options, const std::string& meta_block_name,
624
    BlockHandle* block_handle, MemoryAllocator* memory_allocator,
625
0
    FilePrefetchBuffer* prefetch_buffer, Footer* footer_out) {
626
0
  BlockContents metaindex_contents;
627
0
  auto s = ReadMetaIndexBlockInFile(
628
0
      file, file_size, table_magic_number, ioptions, read_options,
629
0
      &metaindex_contents, memory_allocator, prefetch_buffer, footer_out);
630
0
  if (!s.ok()) {
631
0
    return s;
632
0
  }
633
  // meta blocks are never compressed. Need to add uncompress logic if we are to
634
  // compress it.
635
0
  Block metaindex_block(std::move(metaindex_contents));
636
637
0
  std::unique_ptr<InternalIterator> meta_iter;
638
0
  meta_iter.reset(metaindex_block.NewMetaIterator());
639
640
0
  return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle);
641
0
}
642
643
Status ReadMetaBlock(RandomAccessFileReader* file,
644
                     FilePrefetchBuffer* prefetch_buffer, uint64_t file_size,
645
                     uint64_t table_magic_number,
646
                     const ImmutableOptions& ioptions,
647
                     const ReadOptions& read_options,
648
                     const std::string& meta_block_name, BlockType block_type,
649
                     BlockContents* contents,
650
0
                     MemoryAllocator* memory_allocator) {
651
  // TableProperties requires special handling because of checksum issues.
652
  // Call ReadTableProperties instead for that case.
653
0
  assert(block_type != BlockType::kProperties);
654
655
0
  BlockHandle block_handle;
656
0
  Footer footer;
657
0
  Status status =
658
0
      FindMetaBlockInFile(file, file_size, table_magic_number, ioptions,
659
0
                          read_options, meta_block_name, &block_handle,
660
0
                          memory_allocator, prefetch_buffer, &footer);
661
0
  if (!status.ok()) {
662
0
    return status;
663
0
  }
664
665
0
  return BlockFetcher(file, prefetch_buffer, footer, read_options, block_handle,
666
0
                      contents, ioptions, false /* decompress */,
667
0
                      false /*maybe_compressed*/, block_type,
668
0
                      nullptr /*decompressor*/, PersistentCacheOptions::kEmpty,
669
0
                      memory_allocator)
670
0
      .ReadBlockContents();
671
0
}
672
673
}  // namespace ROCKSDB_NAMESPACE