/src/rocksdb/db/memtable_list.h
Line | Count | Source |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | // |
6 | | #pragma once |
7 | | |
8 | | #include <deque> |
9 | | #include <limits> |
10 | | #include <list> |
11 | | #include <set> |
12 | | #include <string> |
13 | | #include <vector> |
14 | | |
15 | | #include "db/logs_with_prep_tracker.h" |
16 | | #include "db/memtable.h" |
17 | | #include "db/range_del_aggregator.h" |
18 | | #include "file/filename.h" |
19 | | #include "logging/log_buffer.h" |
20 | | #include "monitoring/instrumented_mutex.h" |
21 | | #include "rocksdb/db.h" |
22 | | #include "rocksdb/iterator.h" |
23 | | #include "rocksdb/options.h" |
24 | | #include "rocksdb/types.h" |
25 | | #include "util/autovector.h" |
26 | | |
27 | | namespace ROCKSDB_NAMESPACE { |
28 | | |
29 | | class ColumnFamilyData; |
30 | | class InternalKeyComparator; |
31 | | class InstrumentedMutex; |
32 | | class MergeIteratorBuilder; |
33 | | class MemTableList; |
34 | | |
35 | | struct FlushJobInfo; |
36 | | |
37 | | // keeps a list of immutable memtables (ReadOnlyMemtable*) in a vector. |
38 | | // The list is immutable if refcount is bigger than one. It is used as |
39 | | // a state for Get() and iterator code paths. |
40 | | // |
41 | | // This class is not thread-safe. External synchronization is required |
42 | | // (such as holding the db mutex or being on the write thread). |
43 | | class MemTableListVersion { |
44 | | public: |
45 | | explicit MemTableListVersion(size_t* parent_memtable_list_memory_usage, |
46 | | const MemTableListVersion& old); |
47 | | explicit MemTableListVersion(size_t* parent_memtable_list_memory_usage, |
48 | | int64_t max_write_buffer_size_to_maintain); |
49 | | |
50 | | void Ref(); |
51 | | void Unref(autovector<ReadOnlyMemTable*>* to_delete = nullptr); |
52 | | |
53 | | // Search all the memtables starting from the most recent one. |
54 | | // Return the most recent value found, if any. |
55 | | // |
56 | | // If any operation was found for this key, its most recent sequence number |
57 | | // will be stored in *seq on success (regardless of whether true/false is |
58 | | // returned). Otherwise, *seq will be set to kMaxSequenceNumber. |
59 | | bool Get(const LookupKey& key, std::string* value, |
60 | | PinnableWideColumns* columns, std::string* timestamp, Status* s, |
61 | | MergeContext* merge_context, |
62 | | SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, |
63 | | const ReadOptions& read_opts, ReadCallback* callback = nullptr, |
64 | | bool* is_blob_index = nullptr, |
65 | | const BlobFetcher* blob_fetcher = nullptr); |
66 | | |
67 | | bool Get(const LookupKey& key, std::string* value, |
68 | | PinnableWideColumns* columns, std::string* timestamp, Status* s, |
69 | | MergeContext* merge_context, |
70 | | SequenceNumber* max_covering_tombstone_seq, |
71 | | const ReadOptions& read_opts, ReadCallback* callback = nullptr, |
72 | | bool* is_blob_index = nullptr, |
73 | 2.70k | const BlobFetcher* blob_fetcher = nullptr) { |
74 | 2.70k | SequenceNumber seq; |
75 | 2.70k | return Get(key, value, columns, timestamp, s, merge_context, |
76 | 2.70k | max_covering_tombstone_seq, &seq, read_opts, callback, |
77 | 2.70k | is_blob_index, blob_fetcher); |
78 | 2.70k | } |
79 | | |
80 | | void MultiGet(const ReadOptions& read_options, MultiGetRange* range, |
81 | | ReadCallback* callback, |
82 | | const BlobFetcher* blob_fetcher = nullptr); |
83 | | |
84 | | // Returns all the merge operands corresponding to the key by searching all |
85 | | // memtables starting from the most recent one. |
86 | | bool GetMergeOperands(const LookupKey& key, Status* s, |
87 | | MergeContext* merge_context, |
88 | | SequenceNumber* max_covering_tombstone_seq, |
89 | | const ReadOptions& read_opts, |
90 | | const BlobFetcher* blob_fetcher = nullptr); |
91 | | |
92 | | // Similar to Get(), but searches the Memtable history of memtables that |
93 | | // have already been flushed. Should only be used from in-memory only |
94 | | // queries (such as Transaction validation) as the history may contain |
95 | | // writes that are also present in the SST files. |
96 | | bool GetFromHistory(const LookupKey& key, std::string* value, |
97 | | PinnableWideColumns* columns, std::string* timestamp, |
98 | | Status* s, MergeContext* merge_context, |
99 | | SequenceNumber* max_covering_tombstone_seq, |
100 | | SequenceNumber* seq, const ReadOptions& read_opts, |
101 | | bool* is_blob_index = nullptr, |
102 | | const BlobFetcher* blob_fetcher = nullptr); |
103 | | bool GetFromHistory(const LookupKey& key, std::string* value, |
104 | | PinnableWideColumns* columns, std::string* timestamp, |
105 | | Status* s, MergeContext* merge_context, |
106 | | SequenceNumber* max_covering_tombstone_seq, |
107 | | const ReadOptions& read_opts, |
108 | | bool* is_blob_index = nullptr, |
109 | 0 | const BlobFetcher* blob_fetcher = nullptr) { |
110 | 0 | SequenceNumber seq; |
111 | 0 | return GetFromHistory(key, value, columns, timestamp, s, merge_context, |
112 | 0 | max_covering_tombstone_seq, &seq, read_opts, |
113 | 0 | is_blob_index, blob_fetcher); |
114 | 0 | } |
115 | | |
116 | | Status AddRangeTombstoneIterators(const ReadOptions& read_opts, Arena* arena, |
117 | | RangeDelAggregator* range_del_agg); |
118 | | |
119 | | void AddIterators(const ReadOptions& options, |
120 | | UnownedPtr<const SeqnoToTimeMapping> seqno_to_time_mapping, |
121 | | const SliceTransform* prefix_extractor, |
122 | | std::vector<InternalIterator*>* iterator_list, |
123 | | Arena* arena); |
124 | | |
125 | | void AddIterators(const ReadOptions& options, |
126 | | UnownedPtr<const SeqnoToTimeMapping> seqno_to_time_mapping, |
127 | | const SliceTransform* prefix_extractor, |
128 | | MergeIteratorBuilder* merge_iter_builder, |
129 | | bool add_range_tombstone_iter); |
130 | | |
131 | | uint64_t GetTotalNumEntries() const; |
132 | | |
133 | | uint64_t GetTotalNumDeletes() const; |
134 | | |
135 | | ReadOnlyMemTable::MemTableStats ApproximateStats(const Slice& start_ikey, |
136 | | const Slice& end_ikey) const; |
137 | | |
138 | | // Returns the value of MemTable::GetEarliestSequenceNumber() on the most |
139 | | // recent MemTable in this list or kMaxSequenceNumber if the list is empty. |
140 | | // If include_history=true, will also search Memtables in MemTableList |
141 | | // History. |
142 | | SequenceNumber GetEarliestSequenceNumber(bool include_history = false) const; |
143 | | |
144 | | // Return the first sequence number from the memtable list, which is the |
145 | | // smallest sequence number of all FirstSequenceNumber. |
146 | | // Return kMaxSequenceNumber if the list is empty. |
147 | | SequenceNumber GetFirstSequenceNumber() const; |
148 | | |
149 | | // REQUIRES: db_mutex held. |
150 | 4.18k | void SetID(uint64_t id) { id_ = id; } |
151 | | |
152 | 0 | uint64_t GetID() const { return id_; } |
153 | | |
154 | 110k | int NumNotFlushed() const { return static_cast<int>(memlist_.size()); } |
155 | | |
156 | 0 | int NumFlushed() const { return static_cast<int>(memlist_history_.size()); } |
157 | | |
158 | | // Gets the newest user defined timestamps from the immutable memtables. |
159 | | // This returns the newest user defined timestamp found in the most recent |
160 | | // immutable memtable. This should only be called when user defined timestamp |
161 | | // is enabled. |
162 | | Slice GetNewestUDT() const; |
163 | | |
164 | | private: |
165 | | friend class MemTableList; |
166 | | |
167 | | friend Status InstallMemtableAtomicFlushResults( |
168 | | const autovector<MemTableList*>* imm_lists, |
169 | | const autovector<ColumnFamilyData*>& cfds, |
170 | | const autovector<const autovector<ReadOnlyMemTable*>*>& mems_list, |
171 | | VersionSet* vset, LogsWithPrepTracker* prep_tracker, |
172 | | InstrumentedMutex* mu, const autovector<FileMetaData*>& file_meta, |
173 | | const autovector<std::list<std::unique_ptr<FlushJobInfo>>*>& |
174 | | committed_flush_jobs_info, |
175 | | autovector<ReadOnlyMemTable*>* to_delete, FSDirectory* db_directory, |
176 | | LogBuffer* log_buffer); |
177 | | |
178 | | // REQUIRE: m is an immutable memtable |
179 | | void Add(ReadOnlyMemTable* m, autovector<ReadOnlyMemTable*>* to_delete); |
180 | | // REQUIRE: m is an immutable memtable |
181 | | void Remove(ReadOnlyMemTable* m, autovector<ReadOnlyMemTable*>* to_delete); |
182 | | |
183 | | // Return true if the memtable list should be trimmed to get memory usage |
184 | | // under budget. |
185 | | bool HistoryShouldBeTrimmed(size_t usage); |
186 | | |
187 | | // Trim history, Return true if memtable is trimmed |
188 | | bool TrimHistory(autovector<ReadOnlyMemTable*>* to_delete, size_t usage); |
189 | | |
190 | | bool GetFromList(std::list<ReadOnlyMemTable*>* list, const LookupKey& key, |
191 | | std::string* value, PinnableWideColumns* columns, |
192 | | std::string* timestamp, Status* s, |
193 | | MergeContext* merge_context, |
194 | | SequenceNumber* max_covering_tombstone_seq, |
195 | | SequenceNumber* seq, const ReadOptions& read_opts, |
196 | | ReadCallback* callback = nullptr, |
197 | | bool* is_blob_index = nullptr, |
198 | | const BlobFetcher* blob_fetcher = nullptr); |
199 | | |
200 | | void AddMemTable(ReadOnlyMemTable* m); |
201 | | |
202 | | void UnrefMemTable(autovector<ReadOnlyMemTable*>* to_delete, |
203 | | ReadOnlyMemTable* m); |
204 | | |
205 | | // Calculate the total amount of memory used by memlist_ and memlist_history_ |
206 | | // excluding the last MemTable in memlist_history_. The reason for excluding |
207 | | // the last MemTable is to see if dropping the last MemTable will keep total |
208 | | // memory usage above or equal to max_write_buffer_size_to_maintain_ |
209 | | size_t MemoryAllocatedBytesExcludingLast() const; |
210 | | |
211 | | // Whether this version contains flushed memtables that are only kept around |
212 | | // for transaction conflict checking. |
213 | 4.18k | bool HasHistory() const { return !memlist_history_.empty(); } |
214 | | |
215 | | bool MemtableLimitExceeded(size_t usage); |
216 | | |
217 | | // Immutable MemTables that have not yet been flushed. |
218 | | std::list<ReadOnlyMemTable*> memlist_; |
219 | | |
220 | | // MemTables that have already been flushed |
221 | | // (used during Transaction validation) |
222 | | std::list<ReadOnlyMemTable*> memlist_history_; |
223 | | |
224 | | // Maximum size of MemTables to keep in memory (including both flushed |
225 | | // and not-yet-flushed tables). |
226 | | const int64_t max_write_buffer_size_to_maintain_; |
227 | | |
228 | | int refs_ = 0; |
229 | | |
230 | | size_t* parent_memtable_list_memory_usage_; |
231 | | |
232 | | // MemtableListVersion id to track for flush results checking. |
233 | | uint64_t id_ = 0; |
234 | | }; |
235 | | |
236 | | // This class stores references to all the immutable memtables. |
237 | | // The memtables are flushed to L0 as soon as possible and in |
238 | | // any order. If there are more than one immutable memtable, their |
239 | | // flushes can occur concurrently. However, they are 'committed' |
240 | | // to the manifest in FIFO order to maintain correctness and |
241 | | // recoverability from a crash. |
242 | | // |
243 | | // |
244 | | // Other than imm_flush_needed and imm_trim_needed, this class is not |
245 | | // thread-safe and requires external synchronization (such as holding the db |
246 | | // mutex or being on the write thread.) |
247 | | class MemTableList { |
248 | | public: |
249 | | // A list of memtables. |
250 | | explicit MemTableList(int min_write_buffer_number_to_merge, |
251 | | int64_t max_write_buffer_size_to_maintain) |
252 | 150k | : imm_flush_needed(false), |
253 | 150k | imm_trim_needed(false), |
254 | 150k | min_write_buffer_number_to_merge_(min_write_buffer_number_to_merge), |
255 | 150k | current_(new MemTableListVersion(¤t_memory_usage_, |
256 | 150k | max_write_buffer_size_to_maintain)), |
257 | 150k | num_flush_not_started_(0), |
258 | 150k | commit_in_progress_(false), |
259 | 150k | flush_requested_(false), |
260 | 150k | current_memory_usage_(0), |
261 | 150k | current_memory_allocted_bytes_excluding_last_(0), |
262 | 150k | current_has_history_(false), |
263 | 150k | last_memtable_list_version_id_(0) { |
264 | 150k | current_->Ref(); |
265 | 150k | } |
266 | | |
267 | | // Should not delete MemTableList without making sure MemTableList::current() |
268 | | // is Unref()'d. |
269 | 150k | ~MemTableList() {} |
270 | | |
271 | 247k | MemTableListVersion* current() const { return current_; } |
272 | | |
273 | | // so that background threads can detect non-nullptr pointer to |
274 | | // determine whether there is anything more to start flushing. |
275 | | std::atomic<bool> imm_flush_needed; |
276 | | |
277 | | std::atomic<bool> imm_trim_needed; |
278 | | |
279 | | // Returns the total number of memtables in the list that haven't yet |
280 | | // been flushed and logged. |
281 | | int NumNotFlushed() const; |
282 | | |
283 | | // Returns total number of memtables in the list that have been |
284 | | // completely flushed and logged. |
285 | | int NumFlushed() const; |
286 | | |
287 | | // Returns true if there is at least one memtable on which flush has |
288 | | // not yet started. |
289 | | bool IsFlushPending() const; |
290 | | |
291 | | // Returns true if there is at least one memtable that is pending flush or |
292 | | // flushing. |
293 | | bool IsFlushPendingOrRunning() const; |
294 | | |
295 | | // Returns the earliest memtables that needs to be flushed. The returned |
296 | | // memtables are guaranteed to be in the ascending order of created time. |
297 | | void PickMemtablesToFlush(uint64_t max_memtable_id, |
298 | | autovector<ReadOnlyMemTable*>* mems, |
299 | | uint64_t* max_next_log_number = nullptr); |
300 | | |
301 | | // Reset status of the given memtable list back to pending state so that |
302 | | // they can get picked up again on the next round of flush. |
303 | | // |
304 | | // @param rollback_succeeding_memtables If true, will rollback adjacent |
305 | | // younger memtables whose flush is completed. Specifically, suppose the |
306 | | // current immutable memtables are M_0,M_1...M_N ordered from youngest to |
307 | | // oldest. Suppose that the youngest memtable in `mems` is M_K. We will try to |
308 | | // rollback M_K-1, M_K-2... until the first memtable whose flush is |
309 | | // not completed. These are the memtables that would have been installed |
310 | | // by this flush job if it were to succeed. This flag is currently used |
311 | | // by non atomic_flush rollback. |
312 | | // Note that we also do rollback in `write_manifest_cb` by calling |
313 | | // `RemoveMemTablesOrRestoreFlags()`. There we rollback the entire batch so |
314 | | // it is similar to what we do here with rollback_succeeding_memtables=true. |
315 | | void RollbackMemtableFlush(const autovector<ReadOnlyMemTable*>& mems, |
316 | | bool rollback_succeeding_memtables); |
317 | | |
318 | | // Try commit a successful flush in the manifest file. It might just return |
319 | | // Status::OK letting a concurrent flush to do the actual the recording. |
320 | | Status TryInstallMemtableFlushResults( |
321 | | ColumnFamilyData* cfd, const autovector<ReadOnlyMemTable*>& m, |
322 | | LogsWithPrepTracker* prep_tracker, VersionSet* vset, |
323 | | InstrumentedMutex* mu, uint64_t file_number, |
324 | | autovector<ReadOnlyMemTable*>* to_delete, FSDirectory* db_directory, |
325 | | LogBuffer* log_buffer, |
326 | | std::list<std::unique_ptr<FlushJobInfo>>* committed_flush_jobs_info, |
327 | | bool write_edits = true); |
328 | | |
329 | | // New memtables are inserted at the front of the list. |
330 | | // Takes ownership of the referenced held on *m by the caller of Add(). |
331 | | // By default, adding memtables will flag that the memtable list needs to be |
332 | | // flushed, but in certain situations, like after a mempurge, we may want to |
333 | | // avoid flushing the memtable list upon addition of a memtable. |
334 | | void Add(ReadOnlyMemTable* m, autovector<ReadOnlyMemTable*>* to_delete); |
335 | | |
336 | | // Returns an estimate of the number of bytes of data in use. |
337 | | size_t ApproximateMemoryUsage(); |
338 | | |
339 | | // Returns the cached current_memory_allocted_bytes_excluding_last_ value. |
340 | | size_t MemoryAllocatedBytesExcludingLast() const; |
341 | | |
342 | | // Returns the cached current_has_history_ value. |
343 | | bool HasHistory() const; |
344 | | |
345 | | // Updates current_memory_allocted_bytes_excluding_last_ and |
346 | | // current_has_history_ from MemTableListVersion. Must be called whenever |
347 | | // InstallNewVersion is called. |
348 | | void UpdateCachedValuesFromMemTableListVersion(); |
349 | | |
350 | | // `usage` is the current size of the mutable Memtable. When |
351 | | // max_write_buffer_size_to_maintain is used, total size of mutable and |
352 | | // immutable memtables is checked against it to decide whether to trim |
353 | | // memtable list. |
354 | | // |
355 | | // Return true if memtable is trimmed |
356 | | bool TrimHistory(autovector<ReadOnlyMemTable*>* to_delete, size_t usage); |
357 | | |
358 | | // Returns an estimate of the number of bytes of data used by |
359 | | // the unflushed mem-tables. |
360 | | size_t ApproximateUnflushedMemTablesMemoryUsage(); |
361 | | |
362 | | // Returns an estimate of the timestamp of the earliest key. |
363 | | uint64_t ApproximateOldestKeyTime() const; |
364 | | |
365 | | // Request a flush of all existing memtables to storage. This will |
366 | | // cause future calls to IsFlushPending() to return true if this list is |
367 | | // non-empty (regardless of the min_write_buffer_number_to_merge |
368 | | // parameter). This flush request will persist until the next time |
369 | | // PickMemtablesToFlush() is called. |
370 | 2.09k | void FlushRequested() { |
371 | 2.09k | flush_requested_ = true; |
372 | | // If there are some memtables stored in imm() that don't trigger |
373 | | // flush (eg: mempurge output memtable), then update imm_flush_needed. |
374 | | // Note: if race condition and imm_flush_needed is set to true |
375 | | // when there is num_flush_not_started_==0, then there is no |
376 | | // impact whatsoever. Imm_flush_needed is only used in an assert |
377 | | // in IsFlushPending(). |
378 | 2.09k | if (num_flush_not_started_ > 0) { |
379 | 2.09k | imm_flush_needed.store(true, std::memory_order_release); |
380 | 2.09k | } |
381 | 2.09k | } |
382 | | |
383 | 0 | bool HasFlushRequested() { return flush_requested_; } |
384 | | |
385 | | // Returns true if a trim history should be scheduled and the caller should |
386 | | // be the one to schedule it |
387 | 0 | bool MarkTrimHistoryNeeded() { |
388 | 0 | auto expected = false; |
389 | 0 | return imm_trim_needed.compare_exchange_strong( |
390 | 0 | expected, true, std::memory_order_relaxed, std::memory_order_relaxed); |
391 | 0 | } |
392 | | |
393 | 4.18k | void ResetTrimHistoryNeeded() { |
394 | 4.18k | auto expected = true; |
395 | 4.18k | imm_trim_needed.compare_exchange_strong( |
396 | 4.18k | expected, false, std::memory_order_relaxed, std::memory_order_relaxed); |
397 | 4.18k | } |
398 | | |
399 | | // Copying allowed |
400 | | // MemTableList(const MemTableList&); |
401 | | // void operator=(const MemTableList&); |
402 | | |
403 | 0 | size_t* current_memory_usage() { return ¤t_memory_usage_; } |
404 | | |
405 | | // Returns the WAL number of the oldest WAL that contains a prepared |
406 | | // transaction that corresponds to the content in this MemTableList, |
407 | | // after memtables listed in `memtables_to_flush` are flushed and their |
408 | | // status is persisted in manifest. |
409 | | uint64_t PrecomputeMinLogContainingPrepSection( |
410 | | const std::unordered_set<ReadOnlyMemTable*>* memtables_to_flush = |
411 | | nullptr) const; |
412 | | |
413 | 4.49k | uint64_t GetEarliestMemTableID() const { |
414 | 4.49k | auto& memlist = current_->memlist_; |
415 | 4.49k | if (memlist.empty()) { |
416 | 2.09k | return std::numeric_limits<uint64_t>::max(); |
417 | 2.09k | } |
418 | 2.40k | return memlist.back()->GetID(); |
419 | 4.49k | } |
420 | | |
421 | 2.09k | uint64_t GetLatestMemTableID(bool for_atomic_flush) const { |
422 | 2.09k | auto& memlist = current_->memlist_; |
423 | 2.09k | if (memlist.empty()) { |
424 | 0 | return 0; |
425 | 0 | } |
426 | 2.09k | if (for_atomic_flush) { |
427 | | // Scan the memtable list from new to old |
428 | 0 | for (auto it = memlist.begin(); it != memlist.end(); ++it) { |
429 | 0 | ReadOnlyMemTable* m = *it; |
430 | 0 | if (m->atomic_flush_seqno_ != kMaxSequenceNumber) { |
431 | 0 | return m->GetID(); |
432 | 0 | } |
433 | 0 | } |
434 | 0 | return 0; |
435 | 0 | } |
436 | 2.09k | return memlist.front()->GetID(); |
437 | 2.09k | } |
438 | | |
439 | | // DB mutex held. |
440 | | // Gets the newest user-defined timestamp for the Memtables in ascending ID |
441 | | // order, up to the `max_memtable_id`. Used by background flush job |
442 | | // to check Memtables' eligibility for flush w.r.t retaining UDTs. |
443 | 0 | std::vector<Slice> GetTablesNewestUDT(uint64_t max_memtable_id) { |
444 | 0 | std::vector<Slice> newest_udts; |
445 | 0 | auto& memlist = current_->memlist_; |
446 | | // Iterating through the memlist starting at the end, the vector<MemTable*> |
447 | | // ret is filled with memtables already sorted in increasing MemTable ID. |
448 | 0 | for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) { |
449 | 0 | ReadOnlyMemTable* m = *it; |
450 | 0 | if (m->GetID() > max_memtable_id) { |
451 | 0 | break; |
452 | 0 | } |
453 | 0 | newest_udts.push_back(m->GetNewestUDT()); |
454 | 0 | } |
455 | 0 | return newest_udts; |
456 | 0 | } |
457 | | |
458 | 0 | void AssignAtomicFlushSeq(const SequenceNumber& seq) { |
459 | 0 | const auto& memlist = current_->memlist_; |
460 | | // Scan the memtable list from new to old |
461 | 0 | for (auto it = memlist.begin(); it != memlist.end(); ++it) { |
462 | 0 | ReadOnlyMemTable* mem = *it; |
463 | 0 | if (mem->atomic_flush_seqno_ == kMaxSequenceNumber) { |
464 | 0 | mem->atomic_flush_seqno_ = seq; |
465 | 0 | } else { |
466 | | // Earlier memtables must have been assigned a atomic flush seq, no |
467 | | // need to continue scan. |
468 | 0 | break; |
469 | 0 | } |
470 | 0 | } |
471 | 0 | } |
472 | | |
473 | | // Used only by DBImplSecondary during log replay. |
474 | | // Remove memtables whose data were written before the WAL with log_number |
475 | | // was created, i.e. mem->GetNextLogNumber() <= log_number. The memtables are |
476 | | // not freed, but put into a vector for future deref and reclamation. |
477 | | void RemoveOldMemTables(uint64_t log_number, |
478 | | autovector<ReadOnlyMemTable*>* to_delete); |
479 | | |
480 | | // This API is only used by atomic date replacement. To get an edit for |
481 | | // dropping the current `MemTableListVersion`. |
482 | | VersionEdit GetEditForDroppingCurrentVersion( |
483 | | const ColumnFamilyData* cfd, VersionSet* vset, |
484 | | LogsWithPrepTracker* prep_tracker) const; |
485 | | |
486 | | private: |
487 | | friend Status InstallMemtableAtomicFlushResults( |
488 | | const autovector<MemTableList*>* imm_lists, |
489 | | const autovector<ColumnFamilyData*>& cfds, |
490 | | const autovector<const autovector<ReadOnlyMemTable*>*>& mems_list, |
491 | | VersionSet* vset, LogsWithPrepTracker* prep_tracker, |
492 | | InstrumentedMutex* mu, const autovector<FileMetaData*>& file_meta, |
493 | | const autovector<std::list<std::unique_ptr<FlushJobInfo>>*>& |
494 | | committed_flush_jobs_info, |
495 | | autovector<ReadOnlyMemTable*>* to_delete, FSDirectory* db_directory, |
496 | | LogBuffer* log_buffer); |
497 | | |
498 | | // DB mutex held |
499 | | void InstallNewVersion(); |
500 | | |
501 | | // DB mutex held |
502 | | // Called after writing to MANIFEST |
503 | | void RemoveMemTablesOrRestoreFlags(const Status& s, ColumnFamilyData* cfd, |
504 | | size_t batch_count, LogBuffer* log_buffer, |
505 | | autovector<ReadOnlyMemTable*>* to_delete, |
506 | | InstrumentedMutex* mu); |
507 | | |
508 | | const int min_write_buffer_number_to_merge_; |
509 | | |
510 | | MemTableListVersion* current_; |
511 | | |
512 | | // the number of elements that still need flushing |
513 | | int num_flush_not_started_; |
514 | | |
515 | | // committing in progress |
516 | | bool commit_in_progress_; |
517 | | |
518 | | // Requested a flush of memtables to storage. It's possible to request that |
519 | | // a subset of memtables be flushed. |
520 | | bool flush_requested_; |
521 | | |
522 | | // The current memory usage. |
523 | | size_t current_memory_usage_; |
524 | | |
525 | | // Cached value of current_->MemoryAllocatedBytesExcludingLast(). |
526 | | std::atomic<size_t> current_memory_allocted_bytes_excluding_last_; |
527 | | |
528 | | // Cached value of current_->HasHistory(). |
529 | | std::atomic<bool> current_has_history_; |
530 | | |
531 | | // Last memtabe list version id, increase by 1 each time a new |
532 | | // MemtableListVersion is installed. |
533 | | uint64_t last_memtable_list_version_id_; |
534 | | }; |
535 | | |
536 | | // Installs memtable atomic flush results. |
537 | | // In most cases, imm_lists is nullptr, and the function simply uses the |
538 | | // immutable memtable lists associated with the cfds. There are unit tests that |
539 | | // installs flush results for external immutable memtable lists other than the |
540 | | // cfds' own immutable memtable lists, e.g. MemTableLIstTest. In this case, |
541 | | // imm_lists parameter is not nullptr. |
542 | | Status InstallMemtableAtomicFlushResults( |
543 | | const autovector<MemTableList*>* imm_lists, |
544 | | const autovector<ColumnFamilyData*>& cfds, |
545 | | const autovector<const autovector<ReadOnlyMemTable*>*>& mems_list, |
546 | | VersionSet* vset, LogsWithPrepTracker* prep_tracker, InstrumentedMutex* mu, |
547 | | const autovector<FileMetaData*>& file_meta, |
548 | | const autovector<std::list<std::unique_ptr<FlushJobInfo>>*>& |
549 | | committed_flush_jobs_info, |
550 | | autovector<ReadOnlyMemTable*>* to_delete, FSDirectory* db_directory, |
551 | | LogBuffer* log_buffer); |
552 | | } // namespace ROCKSDB_NAMESPACE |