Coverage Report

Created: 2024-09-08 07:17

/src/rocksdb/table/block_based/index_builder.cc
Line
Count
Source (jump to first uncovered line)
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under both the GPLv2 (found in the
3
//  COPYING file in the root directory) and Apache 2.0 License
4
//  (found in the LICENSE.Apache file in the root directory).
5
//
6
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7
// Use of this source code is governed by a BSD-style license that can be
8
// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10
#include "table/block_based/index_builder.h"
11
12
#include <cassert>
13
#include <cinttypes>
14
#include <list>
15
#include <string>
16
17
#include "db/dbformat.h"
18
#include "rocksdb/comparator.h"
19
#include "rocksdb/flush_block_policy.h"
20
#include "table/block_based/partitioned_filter_block.h"
21
#include "table/format.h"
22
23
namespace ROCKSDB_NAMESPACE {
24
25
// Create a index builder based on its type.
26
IndexBuilder* IndexBuilder::CreateIndexBuilder(
27
    BlockBasedTableOptions::IndexType index_type,
28
    const InternalKeyComparator* comparator,
29
    const InternalKeySliceTransform* int_key_slice_transform,
30
    const bool use_value_delta_encoding,
31
    const BlockBasedTableOptions& table_opt, size_t ts_sz,
32
5.44k
    const bool persist_user_defined_timestamps) {
33
5.44k
  IndexBuilder* result = nullptr;
34
5.44k
  switch (index_type) {
35
5.44k
    case BlockBasedTableOptions::kBinarySearch: {
36
5.44k
      result = new ShortenedIndexBuilder(
37
5.44k
          comparator, table_opt.index_block_restart_interval,
38
5.44k
          table_opt.format_version, use_value_delta_encoding,
39
5.44k
          table_opt.index_shortening, /* include_first_key */ false, ts_sz,
40
5.44k
          persist_user_defined_timestamps);
41
5.44k
      break;
42
0
    }
43
0
    case BlockBasedTableOptions::kHashSearch: {
44
      // Currently kHashSearch is incompatible with index_block_restart_interval
45
      // > 1
46
0
      assert(table_opt.index_block_restart_interval == 1);
47
0
      result = new HashIndexBuilder(
48
0
          comparator, int_key_slice_transform,
49
0
          table_opt.index_block_restart_interval, table_opt.format_version,
50
0
          use_value_delta_encoding, table_opt.index_shortening, ts_sz,
51
0
          persist_user_defined_timestamps);
52
0
      break;
53
0
    }
54
0
    case BlockBasedTableOptions::kTwoLevelIndexSearch: {
55
0
      result = PartitionedIndexBuilder::CreateIndexBuilder(
56
0
          comparator, use_value_delta_encoding, table_opt, ts_sz,
57
0
          persist_user_defined_timestamps);
58
0
      break;
59
0
    }
60
0
    case BlockBasedTableOptions::kBinarySearchWithFirstKey: {
61
0
      result = new ShortenedIndexBuilder(
62
0
          comparator, table_opt.index_block_restart_interval,
63
0
          table_opt.format_version, use_value_delta_encoding,
64
0
          table_opt.index_shortening, /* include_first_key */ true, ts_sz,
65
0
          persist_user_defined_timestamps);
66
0
      break;
67
0
    }
68
0
    default: {
69
0
      assert(!"Do not recognize the index type ");
70
0
      break;
71
0
    }
72
5.44k
  }
73
5.44k
  return result;
74
5.44k
}
75
76
Slice ShortenedIndexBuilder::FindShortestInternalKeySeparator(
77
    const Comparator& comparator, const Slice& start, const Slice& limit,
78
3.59k
    std::string* scratch) {
79
  // Attempt to shorten the user portion of the key
80
3.59k
  Slice user_start = ExtractUserKey(start);
81
3.59k
  Slice user_limit = ExtractUserKey(limit);
82
3.59k
  scratch->assign(user_start.data(), user_start.size());
83
3.59k
  comparator.FindShortestSeparator(scratch, user_limit);
84
3.59k
  assert(comparator.Compare(user_start, *scratch) <= 0);
85
3.59k
  assert(comparator.Compare(user_start, user_limit) >= 0 ||
86
3.59k
         comparator.Compare(*scratch, user_limit) < 0);
87
3.59k
  if (scratch->size() <= user_start.size() &&
88
3.59k
      comparator.Compare(user_start, *scratch) < 0) {
89
    // User key has become shorter physically, but larger logically.
90
    // Tack on the earliest possible number to the shortened user key.
91
3.37k
    PutFixed64(scratch,
92
3.37k
               PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
93
3.37k
    assert(InternalKeyComparator(&comparator).Compare(start, *scratch) < 0);
94
3.37k
    assert(InternalKeyComparator(&comparator).Compare(*scratch, limit) < 0);
95
3.37k
    return *scratch;
96
3.37k
  } else {
97
224
    return start;
98
224
  }
99
3.59k
}
100
101
Slice ShortenedIndexBuilder::FindShortInternalKeySuccessor(
102
0
    const Comparator& comparator, const Slice& key, std::string* scratch) {
103
0
  Slice user_key = ExtractUserKey(key);
104
0
  scratch->assign(user_key.data(), user_key.size());
105
0
  comparator.FindShortSuccessor(scratch);
106
0
  assert(comparator.Compare(user_key, *scratch) <= 0);
107
0
  if (scratch->size() <= user_key.size() &&
108
0
      comparator.Compare(user_key, *scratch) < 0) {
109
    // User key has become shorter physically, but larger logically.
110
    // Tack on the earliest possible number to the shortened user key.
111
0
    PutFixed64(scratch,
112
0
               PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
113
0
    assert(InternalKeyComparator(&comparator).Compare(key, *scratch) < 0);
114
0
    return *scratch;
115
0
  } else {
116
0
    return key;
117
0
  }
118
0
}
119
120
PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder(
121
    const InternalKeyComparator* comparator,
122
    const bool use_value_delta_encoding,
123
    const BlockBasedTableOptions& table_opt, size_t ts_sz,
124
0
    const bool persist_user_defined_timestamps) {
125
0
  return new PartitionedIndexBuilder(comparator, table_opt,
126
0
                                     use_value_delta_encoding, ts_sz,
127
0
                                     persist_user_defined_timestamps);
128
0
}
129
130
PartitionedIndexBuilder::PartitionedIndexBuilder(
131
    const InternalKeyComparator* comparator,
132
    const BlockBasedTableOptions& table_opt,
133
    const bool use_value_delta_encoding, size_t ts_sz,
134
    const bool persist_user_defined_timestamps)
135
    : IndexBuilder(comparator, ts_sz, persist_user_defined_timestamps),
136
      index_block_builder_(
137
          table_opt.index_block_restart_interval, true /*use_delta_encoding*/,
138
          use_value_delta_encoding,
139
          BlockBasedTableOptions::kDataBlockBinarySearch /* index_type */,
140
          0.75 /* data_block_hash_table_util_ratio */, ts_sz,
141
          persist_user_defined_timestamps, false /* is_user_key */),
142
      index_block_builder_without_seq_(
143
          table_opt.index_block_restart_interval, true /*use_delta_encoding*/,
144
          use_value_delta_encoding,
145
          BlockBasedTableOptions::kDataBlockBinarySearch /* index_type */,
146
          0.75 /* data_block_hash_table_util_ratio */, ts_sz,
147
          persist_user_defined_timestamps, true /* is_user_key */),
148
      table_opt_(table_opt),
149
      // We start by false. After each partition we revise the value based on
150
      // what the sub_index_builder has decided. If the feature is disabled
151
      // entirely, this will be set to true after switching the first
152
      // sub_index_builder. Otherwise, it could be set to true even one of the
153
      // sub_index_builders could not safely exclude seq from the keys, then it
154
      // wil be enforced on all sub_index_builders on ::Finish.
155
      seperator_is_key_plus_seq_(false),
156
0
      use_value_delta_encoding_(use_value_delta_encoding) {}
157
158
0
void PartitionedIndexBuilder::MakeNewSubIndexBuilder() {
159
0
  assert(sub_index_builder_ == nullptr);
160
0
  sub_index_builder_ = std::make_unique<ShortenedIndexBuilder>(
161
0
      comparator_, table_opt_.index_block_restart_interval,
162
0
      table_opt_.format_version, use_value_delta_encoding_,
163
0
      table_opt_.index_shortening, /* include_first_key */ false, ts_sz_,
164
0
      persist_user_defined_timestamps_);
165
166
  // Set sub_index_builder_->seperator_is_key_plus_seq_ to true if
167
  // seperator_is_key_plus_seq_ is true (internal-key mode) (set to false by
168
  // default on Creation) so that flush policy can point to
169
  // sub_index_builder_->index_block_builder_
170
0
  if (seperator_is_key_plus_seq_) {
171
0
    sub_index_builder_->seperator_is_key_plus_seq_ = true;
172
0
  }
173
174
0
  flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
175
0
      table_opt_.metadata_block_size, table_opt_.block_size_deviation,
176
      // Note: this is sub-optimal since sub_index_builder_ could later reset
177
      // seperator_is_key_plus_seq_ but the probability of that is low.
178
0
      sub_index_builder_->seperator_is_key_plus_seq_
179
0
          ? sub_index_builder_->index_block_builder_
180
0
          : sub_index_builder_->index_block_builder_without_seq_));
181
0
  partition_cut_requested_ = false;
182
0
}
183
184
0
void PartitionedIndexBuilder::RequestPartitionCut() {
185
0
  partition_cut_requested_ = true;
186
0
}
187
188
Slice PartitionedIndexBuilder::AddIndexEntry(
189
    const Slice& last_key_in_current_block,
190
    const Slice* first_key_in_next_block, const BlockHandle& block_handle,
191
0
    std::string* separator_scratch) {
192
  // Note: to avoid two consecuitive flush in the same method call, we do not
193
  // check flush policy when adding the last key
194
0
  if (UNLIKELY(first_key_in_next_block == nullptr)) {  // no more keys
195
0
    if (sub_index_builder_ == nullptr) {
196
0
      MakeNewSubIndexBuilder();
197
      // Reserve next partition entry, where we will modify the key and
198
      // eventually set the value
199
0
      entries_.push_back({{}, {}});
200
0
    }
201
0
    auto sep = sub_index_builder_->AddIndexEntry(
202
0
        last_key_in_current_block, first_key_in_next_block, block_handle,
203
0
        separator_scratch);
204
0
    if (!seperator_is_key_plus_seq_ &&
205
0
        sub_index_builder_->seperator_is_key_plus_seq_) {
206
      // We need to apply !seperator_is_key_plus_seq to all sub-index builders
207
0
      seperator_is_key_plus_seq_ = true;
208
      // Would associate flush_policy with the appropriate builder, but it won't
209
      // be used again with no more keys
210
0
      flush_policy_.reset();
211
0
    }
212
0
    entries_.back().key.assign(sep.data(), sep.size());
213
0
    assert(entries_.back().value == nullptr);
214
0
    std::swap(entries_.back().value, sub_index_builder_);
215
0
    cut_filter_block = true;
216
0
    return sep;
217
0
  } else {
218
    // apply flush policy only to non-empty sub_index_builder_
219
0
    if (sub_index_builder_ != nullptr) {
220
0
      std::string handle_encoding;
221
0
      block_handle.EncodeTo(&handle_encoding);
222
0
      bool do_flush =
223
0
          partition_cut_requested_ ||
224
0
          flush_policy_->Update(last_key_in_current_block, handle_encoding);
225
0
      if (do_flush) {
226
0
        assert(entries_.back().value == nullptr);
227
0
        std::swap(entries_.back().value, sub_index_builder_);
228
0
        cut_filter_block = true;
229
0
      }
230
0
    }
231
0
    if (sub_index_builder_ == nullptr) {
232
0
      MakeNewSubIndexBuilder();
233
      // Reserve next partition entry, where we will modify the key and
234
      // eventually set the value
235
0
      entries_.push_back({{}, {}});
236
0
    }
237
0
    auto sep = sub_index_builder_->AddIndexEntry(
238
0
        last_key_in_current_block, first_key_in_next_block, block_handle,
239
0
        separator_scratch);
240
0
    entries_.back().key.assign(sep.data(), sep.size());
241
0
    if (!seperator_is_key_plus_seq_ &&
242
0
        sub_index_builder_->seperator_is_key_plus_seq_) {
243
      // We need to apply !seperator_is_key_plus_seq to all sub-index builders
244
0
      seperator_is_key_plus_seq_ = true;
245
      // And use a flush_policy with the appropriate builder
246
0
      flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
247
0
          table_opt_.metadata_block_size, table_opt_.block_size_deviation,
248
0
          sub_index_builder_->index_block_builder_));
249
0
    }
250
0
    return sep;
251
0
  }
252
0
}
253
254
Status PartitionedIndexBuilder::Finish(
255
0
    IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) {
256
0
  if (partition_cnt_ == 0) {
257
0
    partition_cnt_ = entries_.size();
258
0
  }
259
  // It must be set to null after last key is added
260
0
  assert(sub_index_builder_ == nullptr);
261
0
  if (finishing_indexes == true) {
262
0
    Entry& last_entry = entries_.front();
263
0
    std::string handle_encoding;
264
0
    last_partition_block_handle.EncodeTo(&handle_encoding);
265
0
    std::string handle_delta_encoding;
266
0
    PutVarsignedint64(
267
0
        &handle_delta_encoding,
268
0
        last_partition_block_handle.size() - last_encoded_handle_.size());
269
0
    last_encoded_handle_ = last_partition_block_handle;
270
0
    const Slice handle_delta_encoding_slice(handle_delta_encoding);
271
0
    index_block_builder_.Add(last_entry.key, handle_encoding,
272
0
                             &handle_delta_encoding_slice);
273
0
    if (!seperator_is_key_plus_seq_) {
274
0
      index_block_builder_without_seq_.Add(ExtractUserKey(last_entry.key),
275
0
                                           handle_encoding,
276
0
                                           &handle_delta_encoding_slice);
277
0
    }
278
0
    entries_.pop_front();
279
0
  }
280
  // If there is no sub_index left, then return the 2nd level index.
281
0
  if (UNLIKELY(entries_.empty())) {
282
0
    if (seperator_is_key_plus_seq_) {
283
0
      index_blocks->index_block_contents = index_block_builder_.Finish();
284
0
    } else {
285
0
      index_blocks->index_block_contents =
286
0
          index_block_builder_without_seq_.Finish();
287
0
    }
288
0
    top_level_index_size_ = index_blocks->index_block_contents.size();
289
0
    index_size_ += top_level_index_size_;
290
0
    return Status::OK();
291
0
  } else {
292
    // Finish the next partition index in line and Incomplete() to indicate we
293
    // expect more calls to Finish
294
0
    Entry& entry = entries_.front();
295
    // Apply the policy to all sub-indexes
296
0
    entry.value->seperator_is_key_plus_seq_ = seperator_is_key_plus_seq_;
297
0
    auto s = entry.value->Finish(index_blocks);
298
0
    index_size_ += index_blocks->index_block_contents.size();
299
0
    finishing_indexes = true;
300
0
    return s.ok() ? Status::Incomplete() : s;
301
0
  }
302
0
}
303
304
0
size_t PartitionedIndexBuilder::NumPartitions() const { return partition_cnt_; }
305
}  // namespace ROCKSDB_NAMESPACE