Coverage Report

Created: 2024-09-08 07:17

/src/rocksdb/table/get_context.h
Line
Count
Source (jump to first uncovered line)
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under both the GPLv2 (found in the
3
//  COPYING file in the root directory) and Apache 2.0 License
4
//  (found in the LICENSE.Apache file in the root directory).
5
6
#pragma once
7
#include <string>
8
9
#include "db/read_callback.h"
10
#include "rocksdb/types.h"
11
12
namespace ROCKSDB_NAMESPACE {
13
class BlobFetcher;
14
class Comparator;
15
class Logger;
16
class MergeContext;
17
class MergeOperator;
18
class PinnableWideColumns;
19
class PinnedIteratorsManager;
20
class Statistics;
21
class SystemClock;
22
struct ParsedInternalKey;
23
24
// Data structure for accumulating statistics during a point lookup. At the
25
// end of the point lookup, the corresponding ticker stats are updated. This
26
// avoids the overhead of frequent ticker stats updates
27
struct GetContextStats {
28
  uint64_t num_cache_hit = 0;
29
  uint64_t num_cache_index_hit = 0;
30
  uint64_t num_cache_data_hit = 0;
31
  uint64_t num_cache_filter_hit = 0;
32
  uint64_t num_cache_compression_dict_hit = 0;
33
  uint64_t num_cache_index_miss = 0;
34
  uint64_t num_cache_filter_miss = 0;
35
  uint64_t num_cache_data_miss = 0;
36
  uint64_t num_cache_compression_dict_miss = 0;
37
  uint64_t num_cache_bytes_read = 0;
38
  uint64_t num_cache_miss = 0;
39
  uint64_t num_cache_add = 0;
40
  uint64_t num_cache_add_redundant = 0;
41
  uint64_t num_cache_bytes_write = 0;
42
  uint64_t num_cache_index_add = 0;
43
  uint64_t num_cache_index_add_redundant = 0;
44
  uint64_t num_cache_index_bytes_insert = 0;
45
  uint64_t num_cache_data_add = 0;
46
  uint64_t num_cache_data_add_redundant = 0;
47
  uint64_t num_cache_data_bytes_insert = 0;
48
  uint64_t num_cache_filter_add = 0;
49
  uint64_t num_cache_filter_add_redundant = 0;
50
  uint64_t num_cache_filter_bytes_insert = 0;
51
  uint64_t num_cache_compression_dict_add = 0;
52
  uint64_t num_cache_compression_dict_add_redundant = 0;
53
  uint64_t num_cache_compression_dict_bytes_insert = 0;
54
  // MultiGet stats.
55
  uint64_t num_filter_read = 0;
56
  uint64_t num_index_read = 0;
57
  uint64_t num_sst_read = 0;
58
};
59
60
// A class to hold context about a point lookup, such as pointer to value
61
// slice, key, merge context etc, as well as the current state of the
62
// lookup. Any user using GetContext to track the lookup result must call
63
// SaveValue() whenever the internal key is found. This can happen
64
// repeatedly in case of merge operands. In case the key may exist with
65
// high probability, but IO is required to confirm and the user doesn't allow
66
// it, MarkKeyMayExist() must be called instead of SaveValue().
67
class GetContext {
68
 public:
69
  // Current state of the point lookup. All except kNotFound and kMerge are
70
  // terminal states
71
  enum GetState {
72
    kNotFound,
73
    kFound,
74
    kDeleted,
75
    kCorrupt,
76
    kMerge,  // saver contains the current merge result (the operands)
77
    kUnexpectedBlobIndex,
78
    kMergeOperatorFailed,
79
  };
80
  GetContextStats get_context_stats_;
81
82
  // Constructor
83
  // @param value Holds the value corresponding to user_key. If its nullptr
84
  //              then return all merge operands corresponding to user_key
85
  //              via merge_context
86
  // @param value_found If non-nullptr, set to false if key may be present
87
  //                    but we can't be certain because we cannot do IO
88
  // @param max_covering_tombstone_seq Pointer to highest sequence number of
89
  //                    range deletion covering the key. When an internal key
90
  //                    is found with smaller sequence number, the lookup
91
  //                    terminates
92
  // @param seq If non-nullptr, the sequence number of the found key will be
93
  //            saved here
94
  // @param callback Pointer to ReadCallback to perform additional checks
95
  //                 for visibility of a key
96
  // @param is_blob_index If non-nullptr, will be used to indicate if a found
97
  //                      key is of type blob index
98
  // @param do_merge True if value associated with user_key has to be returned
99
  // and false if all the merge operands associated with user_key has to be
100
  // returned. Id do_merge=false then all the merge operands are stored in
101
  // merge_context and they are never merged. The value pointer is untouched.
102
  GetContext(const Comparator* ucmp, const MergeOperator* merge_operator,
103
             Logger* logger, Statistics* statistics, GetState init_state,
104
             const Slice& user_key, PinnableSlice* value,
105
             PinnableWideColumns* columns, bool* value_found,
106
             MergeContext* merge_context, bool do_merge,
107
             SequenceNumber* max_covering_tombstone_seq, SystemClock* clock,
108
             SequenceNumber* seq = nullptr,
109
             PinnedIteratorsManager* _pinned_iters_mgr = nullptr,
110
             ReadCallback* callback = nullptr, bool* is_blob_index = nullptr,
111
             uint64_t tracing_get_id = 0, BlobFetcher* blob_fetcher = nullptr);
112
  GetContext(const Comparator* ucmp, const MergeOperator* merge_operator,
113
             Logger* logger, Statistics* statistics, GetState init_state,
114
             const Slice& user_key, PinnableSlice* value,
115
             PinnableWideColumns* columns, std::string* timestamp,
116
             bool* value_found, MergeContext* merge_context, bool do_merge,
117
             SequenceNumber* max_covering_tombstone_seq, SystemClock* clock,
118
             SequenceNumber* seq = nullptr,
119
             PinnedIteratorsManager* _pinned_iters_mgr = nullptr,
120
             ReadCallback* callback = nullptr, bool* is_blob_index = nullptr,
121
             uint64_t tracing_get_id = 0, BlobFetcher* blob_fetcher = nullptr);
122
123
  GetContext() = delete;
124
125
  // This can be called to indicate that a key may be present, but cannot be
126
  // confirmed due to IO not allowed
127
  void MarkKeyMayExist();
128
129
  // Records this key, value, and any meta-data (such as sequence number and
130
  // state) into this GetContext.
131
  //
132
  // If the parsed_key matches the user key that we are looking for, sets
133
  // matched to true.
134
  //
135
  // Returns True if more keys need to be read (due to merges) or
136
  //         False if the complete value has been found.
137
  bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value,
138
                 bool* matched, Status* read_status,
139
                 Cleanable* value_pinner = nullptr);
140
141
  // Simplified version of the previous function. Should only be used when we
142
  // know that the operation is a Put.
143
  void SaveValue(const Slice& value, SequenceNumber seq);
144
145
479
  GetState State() const { return state_; }
146
147
98
  SequenceNumber* max_covering_tombstone_seq() {
148
98
    return max_covering_tombstone_seq_;
149
98
  }
150
151
0
  bool NeedTimestamp() { return timestamp_ != nullptr; }
152
153
0
  inline size_t TimestampSize() { return ucmp_->timestamp_size(); }
154
155
0
  void SetTimestampFromRangeTombstone(const Slice& timestamp) {
156
0
    assert(timestamp_);
157
0
    timestamp_->assign(timestamp.data(), timestamp.size());
158
0
    ts_from_rangetombstone_ = true;
159
0
  }
160
161
0
  PinnedIteratorsManager* pinned_iters_mgr() { return pinned_iters_mgr_; }
162
163
  // If a non-null string is passed, all the SaveValue calls will be
164
  // logged into the string. The operations can then be replayed on
165
  // another GetContext with replayGetContextLog.
166
196
  void SetReplayLog(std::string* replay_log) { replay_log_ = replay_log; }
167
168
  // Do we need to fetch the SequenceNumber for this key?
169
0
  bool NeedToReadSequence() const { return (seq_ != nullptr); }
170
171
98
  bool sample() const { return sample_; }
172
173
94
  bool CheckCallback(SequenceNumber seq) {
174
94
    if (callback_) {
175
0
      return callback_->IsVisible(seq);
176
0
    }
177
94
    return true;
178
94
  }
179
180
  void ReportCounters();
181
182
0
  bool has_callback() const { return callback_ != nullptr; }
183
184
0
  const Slice& ukey_to_get_blob_value() const {
185
0
    if (!ukey_with_ts_found_.empty()) {
186
0
      return ukey_with_ts_found_;
187
0
    } else {
188
0
      return user_key_;
189
0
    }
190
0
  }
191
192
98
  uint64_t get_tracing_get_id() const { return tracing_get_id_; }
193
194
  void push_operand(const Slice& value, Cleanable* value_pinner);
195
196
 private:
197
  // Helper method that postprocesses the results of merge operations, e.g. it
198
  // sets the state correctly upon merge errors.
199
  void PostprocessMerge(const Status& merge_status);
200
201
  // The following methods perform the actual merge operation for the
202
  // no base value/plain base value/wide-column base value cases.
203
  void MergeWithNoBaseValue();
204
  void MergeWithPlainBaseValue(const Slice& value);
205
  void MergeWithWideColumnBaseValue(const Slice& entity);
206
207
  bool GetBlobValue(const Slice& user_key, const Slice& blob_index,
208
                    PinnableSlice* blob_value, Status* read_status);
209
210
  void appendToReplayLog(ValueType type, Slice value, Slice ts);
211
212
  const Comparator* ucmp_;
213
  const MergeOperator* merge_operator_;
214
  // the merge operations encountered;
215
  Logger* logger_;
216
  Statistics* statistics_;
217
218
  GetState state_;
219
  Slice user_key_;
220
  // When a blob index is found with the user key containing timestamp,
221
  // this copies the corresponding user key on record in the sst file
222
  // and is later used for blob verification.
223
  PinnableSlice ukey_with_ts_found_;
224
  PinnableSlice* pinnable_val_;
225
  PinnableWideColumns* columns_;
226
  std::string* timestamp_;
227
  bool ts_from_rangetombstone_{false};
228
  bool* value_found_;  // Is value set correctly? Used by KeyMayExist
229
  MergeContext* merge_context_;
230
  SequenceNumber* max_covering_tombstone_seq_;
231
  SystemClock* clock_;
232
  // If a key is found, seq_ will be set to the SequenceNumber of most recent
233
  // write to the key or kMaxSequenceNumber if unknown
234
  SequenceNumber* seq_;
235
  std::string* replay_log_;
236
  // Used to temporarily pin blocks when state_ == GetContext::kMerge
237
  PinnedIteratorsManager* pinned_iters_mgr_;
238
  ReadCallback* callback_;
239
  bool sample_;
240
  // Value is true if it's called as part of DB Get API and false if it's
241
  // called as part of DB GetMergeOperands API. When it's false merge operators
242
  // are never merged.
243
  bool do_merge_;
244
  bool* is_blob_index_;
245
  // Used for block cache tracing only. A tracing get id uniquely identifies a
246
  // Get or a MultiGet.
247
  const uint64_t tracing_get_id_;
248
  BlobFetcher* blob_fetcher_;
249
};
250
251
// Call this to replay a log and bring the get_context up to date. The replay
252
// log must have been created by another GetContext object, whose replay log
253
// must have been set by calling GetContext::SetReplayLog().
254
Status replayGetContextLog(const Slice& replay_log, const Slice& user_key,
255
                           GetContext* get_context,
256
                           Cleanable* value_pinner = nullptr,
257
                           SequenceNumber seq_no = kMaxSequenceNumber);
258
259
}  // namespace ROCKSDB_NAMESPACE