Coverage Report

Created: 2025-10-26 07:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rocksdb/db/compaction/compaction_picker.h
Line
Count
Source
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under both the GPLv2 (found in the
3
//  COPYING file in the root directory) and Apache 2.0 License
4
//  (found in the LICENSE.Apache file in the root directory).
5
//
6
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7
// Use of this source code is governed by a BSD-style license that can be
8
// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10
#pragma once
11
12
#include <memory>
13
#include <set>
14
#include <string>
15
#include <unordered_set>
16
#include <vector>
17
18
#include "db/compaction/compaction.h"
19
#include "db/snapshot_checker.h"
20
#include "db/version_set.h"
21
#include "options/cf_options.h"
22
#include "rocksdb/env.h"
23
#include "rocksdb/options.h"
24
#include "rocksdb/status.h"
25
26
namespace ROCKSDB_NAMESPACE {
27
28
// The file contains an abstract class CompactionPicker, and its two
29
// sub-classes LevelCompactionPicker and NullCompactionPicker, as
30
// well as some helper functions used by them.
31
32
class LogBuffer;
33
class Compaction;
34
class VersionStorageInfo;
35
struct CompactionInputFiles;
36
37
// An abstract class to pick compactions from an existing LSM-tree.
38
//
39
// Each compaction style inherits the class and implement the
40
// interface to form automatic compactions. If NeedCompaction() is true,
41
// then call PickCompaction() to find what files need to be compacted
42
// and where to put the output files.
43
//
44
// Non-virtual functions CompactRange() and CompactFiles() are used to
45
// pick files to compact based on users' DB::CompactRange() and
46
// DB::CompactFiles() requests, respectively. There is little
47
// compaction style specific logic for them.
48
class CompactionPicker {
49
 public:
50
  CompactionPicker(const ImmutableOptions& ioptions,
51
                   const InternalKeyComparator* icmp);
52
  virtual ~CompactionPicker();
53
54
  // Pick level and inputs for a new compaction.
55
  //
56
  // Returns nullptr if there is no compaction to be done.
57
  // Otherwise returns a pointer to a heap-allocated object that
58
  // describes the compaction.  Caller should delete the result.
59
  // Currently, only universal compaction will query existing snapshots and
60
  // pass it to aid compaction picking. And it's only passed when user-defined
61
  // timestamps is not enabled. The other compaction styles do not pass or use
62
  // `existing_snapshots` or `snapshot_checker`.
63
  virtual Compaction* PickCompaction(
64
      const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
65
      const MutableDBOptions& mutable_db_options,
66
      const std::vector<SequenceNumber>& existing_snapshots,
67
      const SnapshotChecker* snapshot_checker, VersionStorageInfo* vstorage,
68
      LogBuffer* log_buffer, bool require_max_output_level) = 0;
69
70
  // The returned Compaction might not include the whole requested range.
71
  // In that case, compaction_end will be set to the next key that needs
72
  // compacting. In case the compaction will compact the whole range,
73
  // compaction_end will be set to nullptr.
74
  // Client is responsible for compaction_end storage -- when called,
75
  // *compaction_end should point to valid InternalKey!
76
  // REQUIRES: If not compacting all levels (input_level == kCompactAllLevels),
77
  // then levels between input_level and output_level should be empty.
78
  virtual Compaction* PickCompactionForCompactRange(
79
      const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
80
      const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
81
      int input_level, int output_level,
82
      const CompactRangeOptions& compact_range_options,
83
      const InternalKey* begin, const InternalKey* end,
84
      InternalKey** compaction_end, bool* manual_conflict,
85
      uint64_t max_file_num_to_ignore, const std::string& trim_ts);
86
87
  // The maximum allowed output level.  Default value is NumberLevels() - 1.
88
0
  virtual int MaxOutputLevel() const { return NumberLevels() - 1; }
89
90
  virtual bool NeedsCompaction(const VersionStorageInfo* vstorage) const = 0;
91
92
  // Sanitize the input set of compaction input files and convert it to
93
  // `std::vector<CompactionInputFiles>` in the output parameter
94
  // `converted_input_files`.
95
  // When the input parameters do not describe a valid
96
  // compaction, the function will try to fix the input_files by adding
97
  // necessary files.  If it's not possible to convert an invalid input_files
98
  // into a valid one by adding more files, the function will return a
99
  // non-ok status with specific reason.
100
  //
101
  Status SanitizeAndConvertCompactionInputFiles(
102
      std::unordered_set<uint64_t>* input_files, const int output_level,
103
      Version* version,
104
      std::vector<CompactionInputFiles>* converted_input_files) const;
105
106
  // Free up the files that participated in a compaction
107
  //
108
  // Requirement: DB mutex held
109
  void ReleaseCompactionFiles(Compaction* c, const Status& status);
110
111
  // Returns true if any one of the specified files are being compacted
112
  bool AreFilesInCompaction(const std::vector<FileMetaData*>& files);
113
114
  // Takes a list of CompactionInputFiles and returns a (manual) Compaction
115
  // object.
116
  //
117
  // Caller must provide a set of input files that has been passed through
118
  // `SanitizeAndConvertCompactionInputFiles` earlier. The lock should not be
119
  // released between that call and this one.
120
  //
121
  //  TODO - Remove default values for earliest_snapshot and snapshot_checker
122
  //  and require all callers to pass them in so that DB::CompactFiles() can
123
  //  also benefit from Standalone Range Tombstone Optimization
124
  Compaction* PickCompactionForCompactFiles(
125
      const CompactionOptions& compact_options,
126
      const std::vector<CompactionInputFiles>& input_files, int output_level,
127
      VersionStorageInfo* vstorage, const MutableCFOptions& mutable_cf_options,
128
      const MutableDBOptions& mutable_db_options, uint32_t output_path_id,
129
      std::optional<SequenceNumber> earliest_snapshot = std::nullopt,
130
      const SnapshotChecker* snapshot_checker = nullptr);
131
132
  // Converts a set of compaction input file numbers into
133
  // a list of CompactionInputFiles.
134
  // TODO(hx235): remove the unused paramter `compact_options`
135
  Status GetCompactionInputsFromFileNumbers(
136
      std::vector<CompactionInputFiles>* input_files,
137
      std::unordered_set<uint64_t>* input_set,
138
      const VersionStorageInfo* vstorage,
139
      const CompactionOptions& compact_options) const;
140
141
  // Is there currently a compaction involving level 0 taking place
142
31
  bool IsLevel0CompactionInProgress() const {
143
31
    return !level0_compactions_in_progress_.empty();
144
31
  }
145
146
  // Is any compaction in progress
147
0
  bool IsCompactionInProgress() const {
148
0
    return !(level0_compactions_in_progress_.empty() &&
149
0
             compactions_in_progress_.empty());
150
0
  }
151
152
  // Return true if the passed key range overlap with a compaction output
153
  // that is currently running.
154
  bool RangeOverlapWithCompaction(const Slice& smallest_user_key,
155
                                  const Slice& largest_user_key,
156
                                  int level) const;
157
158
  // Stores the minimal range that covers all entries in inputs in
159
  // *smallest, *largest.
160
  // REQUIRES: inputs is not empty
161
  void GetRange(const CompactionInputFiles& inputs, InternalKey* smallest,
162
                InternalKey* largest) const;
163
164
  // Stores the minimal range that covers all entries in inputs1 and inputs2
165
  // in *smallest, *largest.
166
  // REQUIRES: inputs is not empty
167
  void GetRange(const CompactionInputFiles& inputs1,
168
                const CompactionInputFiles& inputs2, InternalKey* smallest,
169
                InternalKey* largest) const;
170
171
  // Stores the minimal range that covers all entries in inputs
172
  // in *smallest, *largest.
173
  // REQUIRES: inputs is not empty (at least on entry have one file)
174
  void GetRange(const std::vector<CompactionInputFiles>& inputs,
175
                InternalKey* smallest, InternalKey* largest,
176
                int exclude_level) const;
177
178
9.76k
  int NumberLevels() const { return ioptions_.num_levels; }
179
180
  // Add more files to the inputs on "level" to make sure that
181
  // no newer version of a key is compacted to "level+1" while leaving an older
182
  // version in a "level". Otherwise, any Get() will search "level" first,
183
  // and will likely return an old/stale value for the key, since it always
184
  // searches in increasing order of level to find the value. This could
185
  // also scramble the order of merge operands. This function should be
186
  // called any time a new Compaction is created, and its inputs_[0] are
187
  // populated.
188
  //
189
  // Will return false if it is impossible to apply this compaction.
190
  bool ExpandInputsToCleanCut(const std::string& cf_name,
191
                              VersionStorageInfo* vstorage,
192
                              CompactionInputFiles* inputs,
193
                              InternalKey** next_smallest = nullptr);
194
195
  // Returns true if any one of the parent files are being compacted
196
  bool IsRangeInCompaction(VersionStorageInfo* vstorage,
197
                           const InternalKey* smallest,
198
                           const InternalKey* largest, int level, int* index);
199
200
  // Returns true if the key range that `inputs` files cover overlap with the
201
  // key range of a currently running compaction.
202
  bool FilesRangeOverlapWithCompaction(
203
      const std::vector<CompactionInputFiles>& inputs, int level,
204
      int proximal_level) const;
205
206
  // @param starting_l0_file If not null, restricts L0 file selection to only
207
  //                         include files at or older than starting_l0_file.
208
  bool SetupOtherInputs(const std::string& cf_name,
209
                        const MutableCFOptions& mutable_cf_options,
210
                        VersionStorageInfo* vstorage,
211
                        CompactionInputFiles* inputs,
212
                        CompactionInputFiles* output_level_inputs,
213
                        int* parent_index, int base_index,
214
                        bool only_expand_towards_right = false,
215
                        const FileMetaData* starting_l0_file = nullptr);
216
217
  void GetGrandparents(VersionStorageInfo* vstorage,
218
                       const CompactionInputFiles& inputs,
219
                       const CompactionInputFiles& output_level_inputs,
220
                       std::vector<FileMetaData*>* grandparents);
221
222
  void PickFilesMarkedForCompaction(
223
      const std::string& cf_name, VersionStorageInfo* vstorage,
224
      int* start_level, int* output_level,
225
      CompactionInputFiles* start_level_inputs,
226
      std::function<bool(const FileMetaData*)> skip_marked_file);
227
228
  // @param starting_l0_file If not null, restricts L0 file selection to only
229
  //                         include files at or older than starting_l0_file.
230
  bool GetOverlappingL0Files(VersionStorageInfo* vstorage,
231
                             CompactionInputFiles* start_level_inputs,
232
                             int output_level, int* parent_index,
233
                             const FileMetaData* starting_l0_file = nullptr);
234
235
  // Register this compaction in the set of running compactions
236
  void RegisterCompaction(Compaction* c);
237
238
  // Remove this compaction from the set of running compactions
239
  void UnregisterCompaction(Compaction* c);
240
241
3.75k
  std::set<Compaction*>* level0_compactions_in_progress() {
242
3.75k
    return &level0_compactions_in_progress_;
243
3.75k
  }
244
0
  std::unordered_set<Compaction*>* compactions_in_progress() {
245
0
    return &compactions_in_progress_;
246
0
  }
247
248
2.65k
  const InternalKeyComparator* icmp() const { return icmp_; }
249
250
 protected:
251
  const ImmutableOptions& ioptions_;
252
253
  // A helper function to SanitizeAndConvertCompactionInputFiles() that
254
  // sanitizes "input_files" by adding necessary files.
255
  virtual Status SanitizeCompactionInputFilesForAllLevels(
256
      std::unordered_set<uint64_t>* input_files,
257
      const ColumnFamilyMetaData& cf_meta, const int output_level) const;
258
259
  // Keeps track of all compactions that are running on Level0.
260
  // Protected by DB mutex
261
  std::set<Compaction*> level0_compactions_in_progress_;
262
263
  // Keeps track of all compactions that are running.
264
  // Protected by DB mutex
265
  std::unordered_set<Compaction*> compactions_in_progress_;
266
267
  const InternalKeyComparator* const icmp_;
268
};
269
270
// A dummy compaction that never triggers any automatic
271
// compaction.
272
class NullCompactionPicker : public CompactionPicker {
273
 public:
274
  NullCompactionPicker(const ImmutableOptions& ioptions,
275
                       const InternalKeyComparator* icmp)
276
0
      : CompactionPicker(ioptions, icmp) {}
277
0
  virtual ~NullCompactionPicker() {}
278
279
  // Always return "nullptr"
280
  Compaction* PickCompaction(
281
      const std::string& /*cf_name*/,
282
      const MutableCFOptions& /*mutable_cf_options*/,
283
      const MutableDBOptions& /*mutable_db_options*/,
284
      const std::vector<SequenceNumber>& /*existing_snapshots*/,
285
      const SnapshotChecker* /*snapshot_checker*/,
286
      VersionStorageInfo* /*vstorage*/, LogBuffer* /* log_buffer */,
287
0
      bool /*require_max_output_level*/ = false) override {
288
0
    return nullptr;
289
0
  }
290
291
  // Always return "nullptr"
292
  Compaction* PickCompactionForCompactRange(
293
      const std::string& /*cf_name*/,
294
      const MutableCFOptions& /*mutable_cf_options*/,
295
      const MutableDBOptions& /*mutable_db_options*/,
296
      VersionStorageInfo* /*vstorage*/, int /*input_level*/,
297
      int /*output_level*/,
298
      const CompactRangeOptions& /*compact_range_options*/,
299
      const InternalKey* /*begin*/, const InternalKey* /*end*/,
300
      InternalKey** /*compaction_end*/, bool* /*manual_conflict*/,
301
      uint64_t /*max_file_num_to_ignore*/,
302
0
      const std::string& /*trim_ts*/) override {
303
0
    return nullptr;
304
0
  }
305
306
  // Always returns false.
307
0
  bool NeedsCompaction(const VersionStorageInfo* /*vstorage*/) const override {
308
0
    return false;
309
0
  }
310
};
311
312
// Attempts to find an intra L0 compaction conforming to the given parameters.
313
//
314
// @param level_files                     Metadata for L0 files.
315
// @param min_files_to_compact            Minimum number of files required to
316
//                                        do the compaction.
317
// @param max_compact_bytes_per_del_file  Maximum average size in bytes per
318
//                                        file that is going to get deleted by
319
//                                        the compaction.
320
// @param max_compaction_bytes            Maximum total size in bytes (in terms
321
//                                        of compensated file size) for files
322
//                                        to be compacted.
323
// @param [out] comp_inputs               If a compaction was found, will be
324
//                                        initialized with corresponding input
325
//                                        files. Cannot be nullptr.
326
//
327
// @return                                true iff compaction was found.
328
bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files,
329
                           size_t min_files_to_compact,
330
                           uint64_t max_compact_bytes_per_del_file,
331
                           uint64_t max_compaction_bytes,
332
                           CompactionInputFiles* comp_inputs);
333
334
CompressionType GetCompressionType(const VersionStorageInfo* vstorage,
335
                                   const MutableCFOptions& mutable_cf_options,
336
                                   int level, int base_level,
337
                                   const bool enable_compression = true);
338
339
CompressionOptions GetCompressionOptions(
340
    const MutableCFOptions& mutable_cf_options,
341
    const VersionStorageInfo* vstorage, int level,
342
    const bool enable_compression = true);
343
344
}  // namespace ROCKSDB_NAMESPACE