/src/rocksdb/file/delete_scheduler.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | |
6 | | #pragma once |
7 | | |
8 | | |
9 | | #include <map> |
10 | | #include <optional> |
11 | | #include <queue> |
12 | | #include <string> |
13 | | #include <thread> |
14 | | |
15 | | #include "monitoring/instrumented_mutex.h" |
16 | | #include "port/port.h" |
17 | | #include "rocksdb/status.h" |
18 | | |
19 | | namespace ROCKSDB_NAMESPACE { |
20 | | |
21 | | class Env; |
22 | | class FileSystem; |
23 | | class Logger; |
24 | | class SstFileManagerImpl; |
25 | | class SystemClock; |
26 | | |
27 | | // DeleteScheduler allows the DB to enforce a rate limit on file deletion, |
28 | | // Instead of deleteing files immediately, files are marked as trash |
29 | | // and deleted in a background thread that apply sleep penalty between deletes |
30 | | // if they are happening in a rate faster than rate_bytes_per_sec, |
31 | | // |
32 | | // Rate limiting can be turned off by setting rate_bytes_per_sec = 0, In this |
33 | | // case DeleteScheduler will delete files immediately. |
34 | | class DeleteScheduler { |
35 | | public: |
36 | | DeleteScheduler(SystemClock* clock, FileSystem* fs, |
37 | | int64_t rate_bytes_per_sec, Logger* info_log, |
38 | | SstFileManagerImpl* sst_file_manager, |
39 | | double max_trash_db_ratio, uint64_t bytes_max_delete_chunk); |
40 | | |
41 | | ~DeleteScheduler(); |
42 | | |
43 | | // Return delete rate limit in bytes per second |
44 | 11.0k | int64_t GetRateBytesPerSecond() { return rate_bytes_per_sec_.load(); } |
45 | | |
46 | | // Set delete rate limit in bytes per second |
47 | 0 | void SetRateBytesPerSecond(int64_t bytes_per_sec) { |
48 | 0 | rate_bytes_per_sec_.store(bytes_per_sec); |
49 | 0 | MaybeCreateBackgroundThread(); |
50 | 0 | } |
51 | | |
52 | | // Delete an accounted file that is tracked by `SstFileManager` and should be |
53 | | // tracked by this `DeleteScheduler` when it's deleted. |
54 | | // The file is deleted immediately if slow deletion is disabled. If force_bg |
55 | | // is not set and trash to db size ratio exceeded the configured threshold, |
56 | | // it is immediately deleted too. In all other cases, the file will be moved |
57 | | // to a trash directory and scheduled for deletion by a background thread. |
58 | | Status DeleteFile(const std::string& fname, const std::string& dir_to_sync, |
59 | | const bool force_bg = false); |
60 | | |
61 | | // Delete an unaccounted file that is not tracked by `SstFileManager` and |
62 | | // should not be tracked by this `DeleteScheduler` when it's deleted. |
63 | | // The file is deleted immediately if slow deletion is disabled. If force_bg |
64 | | // is not set and the file have more than 1 hard link, it is immediately |
65 | | // deleted too. In all other cases, the file will be moved to a trash |
66 | | // directory and scheduled for deletion by a background thread. |
67 | | // This API also supports assign a file to a specified bucket created by |
68 | | // `NewTrashBucket` when delete files in the background. So the caller can |
69 | | // wait for a specific bucket to be empty by checking the |
70 | | // `WaitForEmptyTrashBucket` API. |
71 | | Status DeleteUnaccountedFile(const std::string& file_path, |
72 | | const std::string& dir_to_sync, |
73 | | const bool force_bg = false, |
74 | | std::optional<int32_t> bucket = std::nullopt); |
75 | | |
76 | | // Wait for all files being deleted in the background to finish or for |
77 | | // destructor to be called. |
78 | | void WaitForEmptyTrash(); |
79 | | |
80 | | // Creates a new trash bucket. A bucket is only created and returned when slow |
81 | | // deletion is enabled. |
82 | | // For each bucket that is created, the user should also call |
83 | | // `WaitForEmptyTrashBucket` after scheduling file deletions to make sure the |
84 | | // trash files are all cleared. |
85 | | std::optional<int32_t> NewTrashBucket(); |
86 | | |
87 | | // Wait for all the files in the specified bucket to be deleted in the |
88 | | // background or for the destructor to be called. |
89 | | void WaitForEmptyTrashBucket(int32_t bucket); |
90 | | |
91 | | // Return a map containing errors that happened in BackgroundEmptyTrash |
92 | | // file_path => error status |
93 | | std::map<std::string, Status> GetBackgroundErrors(); |
94 | | |
95 | 0 | uint64_t GetTotalTrashSize() { return total_trash_size_.load(); } |
96 | | |
97 | | // Return trash/DB size ratio where new files will be deleted immediately |
98 | 0 | double GetMaxTrashDBRatio() { return max_trash_db_ratio_.load(); } |
99 | | |
100 | | // Update trash/DB size ratio where new files will be deleted immediately |
101 | 0 | void SetMaxTrashDBRatio(double r) { |
102 | 0 | assert(r >= 0); |
103 | 0 | max_trash_db_ratio_.store(r); |
104 | 0 | } |
105 | | |
106 | | static const std::string kTrashExtension; |
107 | | static bool IsTrashFile(const std::string& file_path); |
108 | | |
109 | | // Check if there are any .trash files in path, and schedule their deletion |
110 | | // Or delete immediately if sst_file_manager is nullptr |
111 | | static Status CleanupDirectory(Env* env, SstFileManagerImpl* sfm, |
112 | | const std::string& path); |
113 | | |
114 | 11.0k | void SetStatisticsPtr(const std::shared_ptr<Statistics>& stats) { |
115 | 11.0k | InstrumentedMutexLock l(&mu_); |
116 | 11.0k | stats_ = stats; |
117 | 11.0k | } |
118 | | |
119 | | private: |
120 | | Status DeleteFileImmediately(const std::string& file_path, bool accounted); |
121 | | |
122 | | Status AddFileToDeletionQueue(const std::string& file_path, |
123 | | const std::string& dir_to_sync, |
124 | | std::optional<int32_t> bucket, bool accounted); |
125 | | |
126 | | Status MarkAsTrash(const std::string& file_path, bool accounted, |
127 | | std::string* path_in_trash); |
128 | | |
129 | | Status DeleteTrashFile(const std::string& path_in_trash, |
130 | | const std::string& dir_to_sync, bool accounted, |
131 | | uint64_t* deleted_bytes, bool* is_complete); |
132 | | |
133 | | Status OnDeleteFile(const std::string& file_path, bool accounted); |
134 | | |
135 | | void BackgroundEmptyTrash(); |
136 | | |
137 | | void MaybeCreateBackgroundThread(); |
138 | | |
139 | | SystemClock* clock_; |
140 | | FileSystem* fs_; |
141 | | |
142 | | // total size of trash files |
143 | | std::atomic<uint64_t> total_trash_size_; |
144 | | // Maximum number of bytes that should be deleted per second |
145 | | std::atomic<int64_t> rate_bytes_per_sec_; |
146 | | // Mutex to protect queue_, pending_files_, next_trash_bucket_, |
147 | | // pending_files_in_buckets_, bg_errors_, closing_, stats_ |
148 | | InstrumentedMutex mu_; |
149 | | |
150 | | struct FileAndDir { |
151 | | FileAndDir(const std::string& _fname, const std::string& _dir, |
152 | | bool _accounted, std::optional<int32_t> _bucket) |
153 | 0 | : fname(_fname), dir(_dir), accounted(_accounted), bucket(_bucket) {} |
154 | | std::string fname; |
155 | | std::string dir; // empty will be skipped. |
156 | | bool accounted; |
157 | | std::optional<int32_t> bucket; |
158 | | }; |
159 | | |
160 | | // Queue of trash files that need to be deleted |
161 | | std::queue<FileAndDir> queue_; |
162 | | // Number of trash files that are waiting to be deleted |
163 | | int32_t pending_files_; |
164 | | // Next trash bucket that can be created |
165 | | int32_t next_trash_bucket_; |
166 | | // A mapping from trash bucket to number of pending files in the bucket |
167 | | std::map<int32_t, int32_t> pending_files_in_buckets_; |
168 | | uint64_t bytes_max_delete_chunk_; |
169 | | // Errors that happened in BackgroundEmptyTrash (file_path => error) |
170 | | std::map<std::string, Status> bg_errors_; |
171 | | |
172 | | bool num_link_error_printed_ = false; |
173 | | // Set to true in ~DeleteScheduler() to force BackgroundEmptyTrash to stop |
174 | | bool closing_; |
175 | | // Condition variable signaled in these conditions |
176 | | // - pending_files_ value change from 0 => 1 |
177 | | // - pending_files_ value change from 1 => 0 |
178 | | // - a value in pending_files_in_buckets change from 1 => 0 |
179 | | // - closing_ value is set to true |
180 | | InstrumentedCondVar cv_; |
181 | | // Background thread running BackgroundEmptyTrash |
182 | | std::unique_ptr<port::Thread> bg_thread_; |
183 | | // Mutex to protect threads from file name conflicts |
184 | | InstrumentedMutex file_move_mu_; |
185 | | Logger* info_log_; |
186 | | SstFileManagerImpl* sst_file_manager_; |
187 | | // If the trash size constitutes for more than this fraction of the total DB |
188 | | // size we will start deleting new files passed to DeleteScheduler |
189 | | // immediately |
190 | | // Unaccounted files passed for deletion will not cause change in |
191 | | // total_trash_size_ or affect the DeleteScheduler::total_trash_size_ over |
192 | | // SstFileManager::total_size_ ratio. Their slow deletion is not subject to |
193 | | // this configured threshold either. |
194 | | std::atomic<double> max_trash_db_ratio_; |
195 | | static const uint64_t kMicrosInSecond = 1000 * 1000LL; |
196 | | std::shared_ptr<Statistics> stats_; |
197 | | }; |
198 | | |
199 | | } // namespace ROCKSDB_NAMESPACE |
200 | | |