/src/rocksdb/table/get_context.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | |
6 | | #pragma once |
7 | | #include <string> |
8 | | |
9 | | #include "db/read_callback.h" |
10 | | #include "rocksdb/types.h" |
11 | | |
12 | | namespace ROCKSDB_NAMESPACE { |
13 | | class BlobFetcher; |
14 | | class Comparator; |
15 | | class Logger; |
16 | | class MergeContext; |
17 | | class MergeOperator; |
18 | | class PinnableWideColumns; |
19 | | class PinnedIteratorsManager; |
20 | | class Statistics; |
21 | | class SystemClock; |
22 | | struct ParsedInternalKey; |
23 | | |
24 | | // Data structure for accumulating statistics during a point lookup. At the |
25 | | // end of the point lookup, the corresponding ticker stats are updated. This |
26 | | // avoids the overhead of frequent ticker stats updates |
27 | | struct GetContextStats { |
28 | | uint64_t num_cache_hit = 0; |
29 | | uint64_t num_cache_index_hit = 0; |
30 | | uint64_t num_cache_data_hit = 0; |
31 | | uint64_t num_cache_filter_hit = 0; |
32 | | uint64_t num_cache_compression_dict_hit = 0; |
33 | | uint64_t num_cache_index_miss = 0; |
34 | | uint64_t num_cache_filter_miss = 0; |
35 | | uint64_t num_cache_data_miss = 0; |
36 | | uint64_t num_cache_compression_dict_miss = 0; |
37 | | uint64_t num_cache_bytes_read = 0; |
38 | | uint64_t num_cache_miss = 0; |
39 | | uint64_t num_cache_add = 0; |
40 | | uint64_t num_cache_add_redundant = 0; |
41 | | uint64_t num_cache_bytes_write = 0; |
42 | | uint64_t num_cache_index_add = 0; |
43 | | uint64_t num_cache_index_add_redundant = 0; |
44 | | uint64_t num_cache_index_bytes_insert = 0; |
45 | | uint64_t num_cache_data_add = 0; |
46 | | uint64_t num_cache_data_add_redundant = 0; |
47 | | uint64_t num_cache_data_bytes_insert = 0; |
48 | | uint64_t num_cache_filter_add = 0; |
49 | | uint64_t num_cache_filter_add_redundant = 0; |
50 | | uint64_t num_cache_filter_bytes_insert = 0; |
51 | | uint64_t num_cache_compression_dict_add = 0; |
52 | | uint64_t num_cache_compression_dict_add_redundant = 0; |
53 | | uint64_t num_cache_compression_dict_bytes_insert = 0; |
54 | | // MultiGet stats. |
55 | | uint64_t num_filter_read = 0; |
56 | | uint64_t num_index_read = 0; |
57 | | uint64_t num_sst_read = 0; |
58 | | }; |
59 | | |
60 | | // A class to hold context about a point lookup, such as pointer to value |
61 | | // slice, key, merge context etc, as well as the current state of the |
62 | | // lookup. Any user using GetContext to track the lookup result must call |
63 | | // SaveValue() whenever the internal key is found. This can happen |
64 | | // repeatedly in case of merge operands. In case the key may exist with |
65 | | // high probability, but IO is required to confirm and the user doesn't allow |
66 | | // it, MarkKeyMayExist() must be called instead of SaveValue(). |
67 | | class GetContext { |
68 | | public: |
69 | | // Current state of the point lookup. All except kNotFound and kMerge are |
70 | | // terminal states |
71 | | enum GetState { |
72 | | kNotFound, |
73 | | kFound, |
74 | | kDeleted, |
75 | | kCorrupt, |
76 | | kMerge, // saver contains the current merge result (the operands) |
77 | | kUnexpectedBlobIndex, |
78 | | kMergeOperatorFailed, |
79 | | }; |
80 | | GetContextStats get_context_stats_; |
81 | | |
82 | | // Constructor |
83 | | // @param value Holds the value corresponding to user_key. If its nullptr |
84 | | // then return all merge operands corresponding to user_key |
85 | | // via merge_context |
86 | | // @param value_found If non-nullptr, set to false if key may be present |
87 | | // but we can't be certain because we cannot do IO |
88 | | // @param max_covering_tombstone_seq Pointer to highest sequence number of |
89 | | // range deletion covering the key. When an internal key |
90 | | // is found with smaller sequence number, the lookup |
91 | | // terminates |
92 | | // @param seq If non-nullptr, the sequence number of the found key will be |
93 | | // saved here |
94 | | // @param callback Pointer to ReadCallback to perform additional checks |
95 | | // for visibility of a key |
96 | | // @param is_blob_index If non-nullptr, will be used to indicate if a found |
97 | | // key is of type blob index |
98 | | // @param do_merge True if value associated with user_key has to be returned |
99 | | // and false if all the merge operands associated with user_key has to be |
100 | | // returned. Id do_merge=false then all the merge operands are stored in |
101 | | // merge_context and they are never merged. The value pointer is untouched. |
102 | | GetContext(const Comparator* ucmp, const MergeOperator* merge_operator, |
103 | | Logger* logger, Statistics* statistics, GetState init_state, |
104 | | const Slice& user_key, PinnableSlice* value, |
105 | | PinnableWideColumns* columns, bool* value_found, |
106 | | MergeContext* merge_context, bool do_merge, |
107 | | SequenceNumber* max_covering_tombstone_seq, SystemClock* clock, |
108 | | SequenceNumber* seq = nullptr, |
109 | | PinnedIteratorsManager* _pinned_iters_mgr = nullptr, |
110 | | ReadCallback* callback = nullptr, bool* is_blob_index = nullptr, |
111 | | uint64_t tracing_get_id = 0, BlobFetcher* blob_fetcher = nullptr); |
112 | | GetContext(const Comparator* ucmp, const MergeOperator* merge_operator, |
113 | | Logger* logger, Statistics* statistics, GetState init_state, |
114 | | const Slice& user_key, PinnableSlice* value, |
115 | | PinnableWideColumns* columns, std::string* timestamp, |
116 | | bool* value_found, MergeContext* merge_context, bool do_merge, |
117 | | SequenceNumber* max_covering_tombstone_seq, SystemClock* clock, |
118 | | SequenceNumber* seq = nullptr, |
119 | | PinnedIteratorsManager* _pinned_iters_mgr = nullptr, |
120 | | ReadCallback* callback = nullptr, bool* is_blob_index = nullptr, |
121 | | uint64_t tracing_get_id = 0, BlobFetcher* blob_fetcher = nullptr); |
122 | | |
123 | | GetContext() = delete; |
124 | | |
125 | | // This can be called to indicate that a key may be present, but cannot be |
126 | | // confirmed due to IO not allowed |
127 | | void MarkKeyMayExist(); |
128 | | |
129 | | // Records this key, value, and any meta-data (such as sequence number and |
130 | | // state) into this GetContext. |
131 | | // |
132 | | // If the parsed_key matches the user key that we are looking for, sets |
133 | | // matched to true. |
134 | | // |
135 | | // Returns True if more keys need to be read (due to merges) or |
136 | | // False if the complete value has been found. |
137 | | bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value, |
138 | | bool* matched, Status* read_status, |
139 | | Cleanable* value_pinner = nullptr); |
140 | | |
141 | | // Simplified version of the previous function. Should only be used when we |
142 | | // know that the operation is a Put. |
143 | | void SaveValue(const Slice& value, SequenceNumber seq); |
144 | | |
145 | 479 | GetState State() const { return state_; } |
146 | | |
147 | 98 | SequenceNumber* max_covering_tombstone_seq() { |
148 | 98 | return max_covering_tombstone_seq_; |
149 | 98 | } |
150 | | |
151 | 0 | bool NeedTimestamp() { return timestamp_ != nullptr; } |
152 | | |
153 | 0 | inline size_t TimestampSize() { return ucmp_->timestamp_size(); } |
154 | | |
155 | 0 | void SetTimestampFromRangeTombstone(const Slice& timestamp) { |
156 | 0 | assert(timestamp_); |
157 | 0 | timestamp_->assign(timestamp.data(), timestamp.size()); |
158 | 0 | ts_from_rangetombstone_ = true; |
159 | 0 | } |
160 | | |
161 | 0 | PinnedIteratorsManager* pinned_iters_mgr() { return pinned_iters_mgr_; } |
162 | | |
163 | | // If a non-null string is passed, all the SaveValue calls will be |
164 | | // logged into the string. The operations can then be replayed on |
165 | | // another GetContext with replayGetContextLog. |
166 | 196 | void SetReplayLog(std::string* replay_log) { replay_log_ = replay_log; } |
167 | | |
168 | | // Do we need to fetch the SequenceNumber for this key? |
169 | 0 | bool NeedToReadSequence() const { return (seq_ != nullptr); } |
170 | | |
171 | 98 | bool sample() const { return sample_; } |
172 | | |
173 | 94 | bool CheckCallback(SequenceNumber seq) { |
174 | 94 | if (callback_) { |
175 | 0 | return callback_->IsVisible(seq); |
176 | 0 | } |
177 | 94 | return true; |
178 | 94 | } |
179 | | |
180 | | void ReportCounters(); |
181 | | |
182 | 0 | bool has_callback() const { return callback_ != nullptr; } |
183 | | |
184 | 0 | const Slice& ukey_to_get_blob_value() const { |
185 | 0 | if (!ukey_with_ts_found_.empty()) { |
186 | 0 | return ukey_with_ts_found_; |
187 | 0 | } else { |
188 | 0 | return user_key_; |
189 | 0 | } |
190 | 0 | } |
191 | | |
192 | 98 | uint64_t get_tracing_get_id() const { return tracing_get_id_; } |
193 | | |
194 | | void push_operand(const Slice& value, Cleanable* value_pinner); |
195 | | |
196 | | private: |
197 | | // Helper method that postprocesses the results of merge operations, e.g. it |
198 | | // sets the state correctly upon merge errors. |
199 | | void PostprocessMerge(const Status& merge_status); |
200 | | |
201 | | // The following methods perform the actual merge operation for the |
202 | | // no base value/plain base value/wide-column base value cases. |
203 | | void MergeWithNoBaseValue(); |
204 | | void MergeWithPlainBaseValue(const Slice& value); |
205 | | void MergeWithWideColumnBaseValue(const Slice& entity); |
206 | | |
207 | | bool GetBlobValue(const Slice& user_key, const Slice& blob_index, |
208 | | PinnableSlice* blob_value, Status* read_status); |
209 | | |
210 | | void appendToReplayLog(ValueType type, Slice value, Slice ts); |
211 | | |
212 | | const Comparator* ucmp_; |
213 | | const MergeOperator* merge_operator_; |
214 | | // the merge operations encountered; |
215 | | Logger* logger_; |
216 | | Statistics* statistics_; |
217 | | |
218 | | GetState state_; |
219 | | Slice user_key_; |
220 | | // When a blob index is found with the user key containing timestamp, |
221 | | // this copies the corresponding user key on record in the sst file |
222 | | // and is later used for blob verification. |
223 | | PinnableSlice ukey_with_ts_found_; |
224 | | PinnableSlice* pinnable_val_; |
225 | | PinnableWideColumns* columns_; |
226 | | std::string* timestamp_; |
227 | | bool ts_from_rangetombstone_{false}; |
228 | | bool* value_found_; // Is value set correctly? Used by KeyMayExist |
229 | | MergeContext* merge_context_; |
230 | | SequenceNumber* max_covering_tombstone_seq_; |
231 | | SystemClock* clock_; |
232 | | // If a key is found, seq_ will be set to the SequenceNumber of most recent |
233 | | // write to the key or kMaxSequenceNumber if unknown |
234 | | SequenceNumber* seq_; |
235 | | std::string* replay_log_; |
236 | | // Used to temporarily pin blocks when state_ == GetContext::kMerge |
237 | | PinnedIteratorsManager* pinned_iters_mgr_; |
238 | | ReadCallback* callback_; |
239 | | bool sample_; |
240 | | // Value is true if it's called as part of DB Get API and false if it's |
241 | | // called as part of DB GetMergeOperands API. When it's false merge operators |
242 | | // are never merged. |
243 | | bool do_merge_; |
244 | | bool* is_blob_index_; |
245 | | // Used for block cache tracing only. A tracing get id uniquely identifies a |
246 | | // Get or a MultiGet. |
247 | | const uint64_t tracing_get_id_; |
248 | | BlobFetcher* blob_fetcher_; |
249 | | }; |
250 | | |
251 | | // Call this to replay a log and bring the get_context up to date. The replay |
252 | | // log must have been created by another GetContext object, whose replay log |
253 | | // must have been set by calling GetContext::SetReplayLog(). |
254 | | Status replayGetContextLog(const Slice& replay_log, const Slice& user_key, |
255 | | GetContext* get_context, |
256 | | Cleanable* value_pinner = nullptr, |
257 | | SequenceNumber seq_no = kMaxSequenceNumber); |
258 | | |
259 | | } // namespace ROCKSDB_NAMESPACE |