/src/rocksdb/db/compaction/compaction_picker.h
Line | Count | Source |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | // |
6 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
7 | | // Use of this source code is governed by a BSD-style license that can be |
8 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
9 | | |
10 | | #pragma once |
11 | | |
12 | | #include <memory> |
13 | | #include <set> |
14 | | #include <string> |
15 | | #include <unordered_set> |
16 | | #include <vector> |
17 | | |
18 | | #include "db/compaction/compaction.h" |
19 | | #include "db/snapshot_checker.h" |
20 | | #include "db/version_set.h" |
21 | | #include "options/cf_options.h" |
22 | | #include "rocksdb/env.h" |
23 | | #include "rocksdb/options.h" |
24 | | #include "rocksdb/status.h" |
25 | | |
26 | | namespace ROCKSDB_NAMESPACE { |
27 | | |
28 | | // The file contains an abstract class CompactionPicker, and its two |
29 | | // sub-classes LevelCompactionPicker and NullCompactionPicker, as |
30 | | // well as some helper functions used by them. |
31 | | |
32 | | class LogBuffer; |
33 | | class Compaction; |
34 | | class VersionStorageInfo; |
35 | | struct CompactionInputFiles; |
36 | | |
37 | | // An abstract class to pick compactions from an existing LSM-tree. |
38 | | // |
39 | | // Each compaction style inherits the class and implement the |
40 | | // interface to form automatic compactions. If NeedCompaction() is true, |
41 | | // then call PickCompaction() to find what files need to be compacted |
42 | | // and where to put the output files. |
43 | | // |
44 | | // Non-virtual functions CompactRange() and CompactFiles() are used to |
45 | | // pick files to compact based on users' DB::CompactRange() and |
46 | | // DB::CompactFiles() requests, respectively. There is little |
47 | | // compaction style specific logic for them. |
48 | | class CompactionPicker { |
49 | | public: |
50 | | CompactionPicker(const ImmutableOptions& ioptions, |
51 | | const InternalKeyComparator* icmp); |
52 | | virtual ~CompactionPicker(); |
53 | | |
54 | | // Pick level and inputs for a new compaction. |
55 | | // |
56 | | // Returns nullptr if there is no compaction to be done. |
57 | | // Otherwise returns a pointer to a heap-allocated object that |
58 | | // describes the compaction. Caller should delete the result. |
59 | | // Currently, only universal compaction will query existing snapshots and |
60 | | // pass it to aid compaction picking. And it's only passed when user-defined |
61 | | // timestamps is not enabled. The other compaction styles do not pass or use |
62 | | // `existing_snapshots` or `snapshot_checker`. |
63 | | virtual Compaction* PickCompaction( |
64 | | const std::string& cf_name, const MutableCFOptions& mutable_cf_options, |
65 | | const MutableDBOptions& mutable_db_options, |
66 | | const std::vector<SequenceNumber>& existing_snapshots, |
67 | | const SnapshotChecker* snapshot_checker, VersionStorageInfo* vstorage, |
68 | | LogBuffer* log_buffer, const std::string& full_history_ts_low, |
69 | | bool require_max_output_level = false) = 0; |
70 | | |
71 | | // The returned Compaction might not include the whole requested range. |
72 | | // In that case, compaction_end will be set to the next key that needs |
73 | | // compacting. In case the compaction will compact the whole range, |
74 | | // compaction_end will be set to nullptr. |
75 | | // Client is responsible for compaction_end storage -- when called, |
76 | | // *compaction_end should point to valid InternalKey! |
77 | | // REQUIRES: If not compacting all levels (input_level == kCompactAllLevels), |
78 | | // then levels between input_level and output_level should be empty. |
79 | | virtual Compaction* PickCompactionForCompactRange( |
80 | | const std::string& cf_name, const MutableCFOptions& mutable_cf_options, |
81 | | const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, |
82 | | int input_level, int output_level, |
83 | | const CompactRangeOptions& compact_range_options, |
84 | | const InternalKey* begin, const InternalKey* end, |
85 | | InternalKey** compaction_end, bool* manual_conflict, |
86 | | uint64_t max_file_num_to_ignore, const std::string& trim_ts, |
87 | | const std::string& full_history_ts_low); |
88 | | |
89 | | // The maximum allowed output level. Default value is NumberLevels() - 1. |
90 | 0 | virtual int MaxOutputLevel() const { return NumberLevels() - 1; } |
91 | | |
92 | | virtual bool NeedsCompaction(const VersionStorageInfo* vstorage) const = 0; |
93 | | |
94 | | // Sanitize the input set of compaction input files and convert it to |
95 | | // `std::vector<CompactionInputFiles>` in the output parameter |
96 | | // `converted_input_files`. |
97 | | // When the input parameters do not describe a valid |
98 | | // compaction, the function will try to fix the input_files by adding |
99 | | // necessary files. If it's not possible to convert an invalid input_files |
100 | | // into a valid one by adding more files, the function will return a |
101 | | // non-ok status with specific reason. |
102 | | // |
103 | | Status SanitizeAndConvertCompactionInputFiles( |
104 | | std::unordered_set<uint64_t>* input_files, const int output_level, |
105 | | Version* version, |
106 | | std::vector<CompactionInputFiles>* converted_input_files) const; |
107 | | |
108 | | // Free up the files that participated in a compaction |
109 | | // |
110 | | // Requirement: DB mutex held |
111 | | void ReleaseCompactionFiles(Compaction* c, const Status& status); |
112 | | |
113 | | // Returns true if any one of the specified files are being compacted |
114 | | bool AreFilesInCompaction(const std::vector<FileMetaData*>& files); |
115 | | |
116 | | // Takes a list of CompactionInputFiles and returns a (manual) Compaction |
117 | | // object. |
118 | | // |
119 | | // Caller must provide a set of input files that has been passed through |
120 | | // `SanitizeAndConvertCompactionInputFiles` earlier. The lock should not be |
121 | | // released between that call and this one. |
122 | | // |
123 | | // TODO - Remove default values for earliest_snapshot and snapshot_checker |
124 | | // and require all callers to pass them in so that DB::CompactFiles() can |
125 | | // also benefit from Standalone Range Tombstone Optimization |
126 | | Compaction* PickCompactionForCompactFiles( |
127 | | const CompactionOptions& compact_options, |
128 | | const std::vector<CompactionInputFiles>& input_files, int output_level, |
129 | | VersionStorageInfo* vstorage, const MutableCFOptions& mutable_cf_options, |
130 | | const MutableDBOptions& mutable_db_options, uint32_t output_path_id, |
131 | | std::optional<SequenceNumber> earliest_snapshot = std::nullopt, |
132 | | const SnapshotChecker* snapshot_checker = nullptr); |
133 | | |
134 | | // Converts a set of compaction input file numbers into |
135 | | // a list of CompactionInputFiles. |
136 | | // TODO(hx235): remove the unused paramter `compact_options` |
137 | | Status GetCompactionInputsFromFileNumbers( |
138 | | std::vector<CompactionInputFiles>* input_files, |
139 | | std::unordered_set<uint64_t>* input_set, |
140 | | const VersionStorageInfo* vstorage, |
141 | | const CompactionOptions& compact_options) const; |
142 | | |
143 | | // Is there currently a compaction involving level 0 taking place |
144 | 0 | bool IsLevel0CompactionInProgress() const { |
145 | 0 | return !level0_compactions_in_progress_.empty(); |
146 | 0 | } |
147 | | |
148 | | // Is any compaction in progress |
149 | 0 | bool IsCompactionInProgress() const { |
150 | 0 | return !(level0_compactions_in_progress_.empty() && |
151 | 0 | compactions_in_progress_.empty()); |
152 | 0 | } |
153 | | |
154 | | // Return true if the passed key range overlap with a compaction output |
155 | | // that is currently running. |
156 | | bool RangeOverlapWithCompaction(const Slice& smallest_user_key, |
157 | | const Slice& largest_user_key, |
158 | | int level) const; |
159 | | |
160 | | // Stores the minimal range that covers all entries in inputs in |
161 | | // *smallest, *largest. |
162 | | // REQUIRES: inputs is not empty |
163 | | void GetRange(const CompactionInputFiles& inputs, InternalKey* smallest, |
164 | | InternalKey* largest) const; |
165 | | |
166 | | // Stores the minimal range that covers all entries in inputs1 and inputs2 |
167 | | // in *smallest, *largest. |
168 | | // REQUIRES: inputs is not empty |
169 | | void GetRange(const CompactionInputFiles& inputs1, |
170 | | const CompactionInputFiles& inputs2, InternalKey* smallest, |
171 | | InternalKey* largest) const; |
172 | | |
173 | | // Stores the minimal range that covers all entries in inputs |
174 | | // in *smallest, *largest. |
175 | | // REQUIRES: inputs is not empty (at least on entry have one file) |
176 | | void GetRange(const std::vector<CompactionInputFiles>& inputs, |
177 | | InternalKey* smallest, InternalKey* largest, |
178 | | int exclude_level) const; |
179 | | |
180 | 4.35k | int NumberLevels() const { return ioptions_.num_levels; } |
181 | | |
182 | | // Add more files to the inputs on "level" to make sure that |
183 | | // no newer version of a key is compacted to "level+1" while leaving an older |
184 | | // version in a "level". Otherwise, any Get() will search "level" first, |
185 | | // and will likely return an old/stale value for the key, since it always |
186 | | // searches in increasing order of level to find the value. This could |
187 | | // also scramble the order of merge operands. This function should be |
188 | | // called any time a new Compaction is created, and its inputs_[0] are |
189 | | // populated. |
190 | | // |
191 | | // Will return false if it is impossible to apply this compaction. |
192 | | bool ExpandInputsToCleanCut(const std::string& cf_name, |
193 | | VersionStorageInfo* vstorage, |
194 | | CompactionInputFiles* inputs, |
195 | | InternalKey** next_smallest = nullptr); |
196 | | |
197 | | // Returns true if any one of the parent files are being compacted |
198 | | bool IsRangeInCompaction(VersionStorageInfo* vstorage, |
199 | | const InternalKey* smallest, |
200 | | const InternalKey* largest, int level, int* index); |
201 | | |
202 | | // Returns true if the key range that `inputs` files cover overlap with the |
203 | | // key range of a currently running compaction. |
204 | | bool FilesRangeOverlapWithCompaction( |
205 | | const std::vector<CompactionInputFiles>& inputs, int level, |
206 | | int proximal_level) const; |
207 | | |
208 | | // @param starting_l0_file If not null, restricts L0 file selection to only |
209 | | // include files at or older than starting_l0_file. |
210 | | bool SetupOtherInputs(const std::string& cf_name, |
211 | | const MutableCFOptions& mutable_cf_options, |
212 | | VersionStorageInfo* vstorage, |
213 | | CompactionInputFiles* inputs, |
214 | | CompactionInputFiles* output_level_inputs, |
215 | | int* parent_index, int base_index, |
216 | | bool only_expand_towards_right = false, |
217 | | const FileMetaData* starting_l0_file = nullptr); |
218 | | |
219 | | void GetGrandparents(VersionStorageInfo* vstorage, |
220 | | const CompactionInputFiles& inputs, |
221 | | const CompactionInputFiles& output_level_inputs, |
222 | | std::vector<FileMetaData*>* grandparents); |
223 | | |
224 | | void PickFilesMarkedForCompaction( |
225 | | const std::string& cf_name, VersionStorageInfo* vstorage, |
226 | | int* start_level, int* output_level, |
227 | | CompactionInputFiles* start_level_inputs, |
228 | | std::function<bool(const FileMetaData*)> skip_marked_file); |
229 | | |
230 | | // @param starting_l0_file If not null, restricts L0 file selection to only |
231 | | // include files at or older than starting_l0_file. |
232 | | bool GetOverlappingL0Files(VersionStorageInfo* vstorage, |
233 | | CompactionInputFiles* start_level_inputs, |
234 | | int output_level, int* parent_index, |
235 | | const FileMetaData* starting_l0_file = nullptr); |
236 | | |
237 | | // Register this compaction in the set of running compactions |
238 | | void RegisterCompaction(Compaction* c); |
239 | | |
240 | | // Remove this compaction from the set of running compactions |
241 | | void UnregisterCompaction(Compaction* c); |
242 | | |
243 | 1.58k | std::set<Compaction*>* level0_compactions_in_progress() { |
244 | 1.58k | return &level0_compactions_in_progress_; |
245 | 1.58k | } |
246 | 0 | std::unordered_set<Compaction*>* compactions_in_progress() { |
247 | 0 | return &compactions_in_progress_; |
248 | 0 | } |
249 | | |
250 | 1.12k | const InternalKeyComparator* icmp() const { return icmp_; } |
251 | | |
252 | | protected: |
253 | | const ImmutableOptions& ioptions_; |
254 | | |
255 | | // A helper function to SanitizeAndConvertCompactionInputFiles() that |
256 | | // sanitizes "input_files" by adding necessary files. |
257 | | virtual Status SanitizeCompactionInputFilesForAllLevels( |
258 | | std::unordered_set<uint64_t>* input_files, |
259 | | const ColumnFamilyMetaData& cf_meta, const int output_level) const; |
260 | | |
261 | | // Keeps track of all compactions that are running on Level0. |
262 | | // Protected by DB mutex |
263 | | std::set<Compaction*> level0_compactions_in_progress_; |
264 | | |
265 | | // Keeps track of all compactions that are running. |
266 | | // Protected by DB mutex |
267 | | std::unordered_set<Compaction*> compactions_in_progress_; |
268 | | |
269 | | const InternalKeyComparator* const icmp_; |
270 | | }; |
271 | | |
272 | | // A dummy compaction that never triggers any automatic |
273 | | // compaction. |
274 | | class NullCompactionPicker : public CompactionPicker { |
275 | | public: |
276 | | NullCompactionPicker(const ImmutableOptions& ioptions, |
277 | | const InternalKeyComparator* icmp) |
278 | 0 | : CompactionPicker(ioptions, icmp) {} |
279 | 0 | virtual ~NullCompactionPicker() {} |
280 | | |
281 | | // Always return "nullptr" |
282 | | Compaction* PickCompaction( |
283 | | const std::string& /*cf_name*/, |
284 | | const MutableCFOptions& /*mutable_cf_options*/, |
285 | | const MutableDBOptions& /*mutable_db_options*/, |
286 | | const std::vector<SequenceNumber>& /*existing_snapshots*/, |
287 | | const SnapshotChecker* /*snapshot_checker*/, |
288 | | VersionStorageInfo* /*vstorage*/, LogBuffer* /* log_buffer */, |
289 | | const std::string& /*full_history_ts_low*/, |
290 | 0 | bool /*require_max_output_level*/) override { |
291 | 0 | return nullptr; |
292 | 0 | } |
293 | | |
294 | | // Always return "nullptr" |
295 | | Compaction* PickCompactionForCompactRange( |
296 | | const std::string& /*cf_name*/, |
297 | | const MutableCFOptions& /*mutable_cf_options*/, |
298 | | const MutableDBOptions& /*mutable_db_options*/, |
299 | | VersionStorageInfo* /*vstorage*/, int /*input_level*/, |
300 | | int /*output_level*/, |
301 | | const CompactRangeOptions& /*compact_range_options*/, |
302 | | const InternalKey* /*begin*/, const InternalKey* /*end*/, |
303 | | InternalKey** /*compaction_end*/, bool* /*manual_conflict*/, |
304 | | uint64_t /*max_file_num_to_ignore*/, const std::string& /*trim_ts*/, |
305 | 0 | const std::string& /*full_history_ts_low*/) override { |
306 | 0 | return nullptr; |
307 | 0 | } |
308 | | |
309 | | // Always returns false. |
310 | 0 | bool NeedsCompaction(const VersionStorageInfo* /*vstorage*/) const override { |
311 | 0 | return false; |
312 | 0 | } |
313 | | }; |
314 | | |
315 | | // Attempts to find an intra L0 compaction conforming to the given parameters. |
316 | | // |
317 | | // @param level_files Metadata for L0 files. |
318 | | // @param min_files_to_compact Minimum number of files required to |
319 | | // do the compaction. |
320 | | // @param max_compact_bytes_per_del_file Maximum average size in bytes per |
321 | | // file that is going to get deleted by |
322 | | // the compaction. |
323 | | // @param max_compaction_bytes Maximum total size in bytes (in terms |
324 | | // of compensated file size) for files |
325 | | // to be compacted. |
326 | | // @param [out] comp_inputs If a compaction was found, will be |
327 | | // initialized with corresponding input |
328 | | // files. Cannot be nullptr. |
329 | | // |
330 | | // @return true iff compaction was found. |
331 | | bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files, |
332 | | size_t min_files_to_compact, |
333 | | uint64_t max_compact_bytes_per_del_file, |
334 | | uint64_t max_compaction_bytes, |
335 | | CompactionInputFiles* comp_inputs); |
336 | | |
337 | | CompressionType GetCompressionType(const VersionStorageInfo* vstorage, |
338 | | const MutableCFOptions& mutable_cf_options, |
339 | | int level, int base_level, |
340 | | const bool enable_compression = true); |
341 | | |
342 | | CompressionOptions GetCompressionOptions( |
343 | | const MutableCFOptions& mutable_cf_options, |
344 | | const VersionStorageInfo* vstorage, int level, |
345 | | const bool enable_compression = true); |
346 | | |
347 | | } // namespace ROCKSDB_NAMESPACE |