Coverage Report

Created: 2026-05-16 07:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rocksdb/include/rocksdb/merge_operator.h
Line
Count
Source
1
// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under both the GPLv2 (found in the
3
//  COPYING file in the root directory) and Apache 2.0 License
4
//  (found in the LICENSE.Apache file in the root directory).
5
6
#pragma once
7
8
#include <deque>
9
#include <memory>
10
#include <string>
11
#include <utility>
12
#include <variant>
13
#include <vector>
14
15
#include "rocksdb/customizable.h"
16
#include "rocksdb/slice.h"
17
#include "rocksdb/wide_columns.h"
18
19
namespace ROCKSDB_NAMESPACE {
20
21
class Slice;
22
class Logger;
23
24
// The Merge Operator
25
//
26
// Essentially, a MergeOperator specifies the SEMANTICS of a merge, which only
27
// client knows. It could be numeric addition, list append, string
28
// concatenation, edit data structure, ... , anything.
29
// The library, on the other hand, is concerned with the exercise of this
30
// interface, at the right time (during get, iteration, compaction...)
31
//
32
// To use merge, the client needs to provide an object implementing one of
33
// the following interfaces:
34
//  a) AssociativeMergeOperator - for most simple semantics (always take
35
//    two values, and merge them into one value, which is then put back
36
//    into rocksdb); numeric addition and string concatenation are examples;
37
//
38
//  b) MergeOperator - the generic class for all the more abstract / complex
39
//    operations; one method (FullMergeV3) to merge a Put/Delete value with a
40
//    merge operand; and another method (PartialMerge) that merges multiple
41
//    operands together. this is especially useful if your key values have
42
//    complex structures but you would still like to support client-specific
43
//    incremental updates.
44
//
45
// AssociativeMergeOperator is simpler to implement. MergeOperator is simply
46
// more powerful.
47
//
48
// Refer to rocksdb-merge wiki for more details and example implementations.
49
//
50
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
51
// because RocksDB is not exception-safe. This could cause undefined behavior
52
// including data loss, unreported corruption, deadlocks, and more.
53
class MergeOperator : public Customizable {
54
 public:
55
0
  virtual ~MergeOperator() {}
56
16
  static const char* Type() { return "MergeOperator"; }
57
  static Status CreateFromString(const ConfigOptions& opts,
58
                                 const std::string& id,
59
                                 std::shared_ptr<MergeOperator>* result);
60
61
  // Gives the client a way to express the read -> modify -> write semantics
62
  // key:      (IN)    The key that's associated with this merge operation.
63
  //                   Client could multiplex the merge operator based on it
64
  //                   if the key space is partitioned and different subspaces
65
  //                   refer to different types of data which have different
66
  //                   merge operation semantics
67
  // existing: (IN)    null indicates that the key does not exist before this op
68
  // operand_list:(IN) the sequence of merge operations to apply, front() first.
69
  // new_value:(OUT)   Client is responsible for filling the merge result here.
70
  // The string that new_value is pointing to will be empty.
71
  // logger:   (IN)    Client could use this to log errors during merge.
72
  //
73
  // Return true on success.
74
  // All values passed in will be client-specific values. So if this method
75
  // returns false, it is because client specified bad data or there was
76
  // internal corruption. This will be treated as an error by the library.
77
  //
78
  // Also make use of the *logger for error messages.
79
  virtual bool FullMerge(const Slice& /*key*/, const Slice* /*existing_value*/,
80
                         const std::deque<std::string>& /*operand_list*/,
81
0
                         std::string* /*new_value*/, Logger* /*logger*/) const {
82
    // deprecated, please use FullMergeV2()
83
0
    assert(false);
84
0
    return false;
85
0
  }
86
87
  struct MergeOperationInput {
88
    // If user-defined timestamp is enabled, `_key` includes timestamp.
89
    explicit MergeOperationInput(const Slice& _key,
90
                                 const Slice* _existing_value,
91
                                 const std::vector<Slice>& _operand_list,
92
                                 Logger* _logger)
93
0
        : key(_key),
94
0
          existing_value(_existing_value),
95
0
          operand_list(_operand_list),
96
0
          logger(_logger) {}
97
98
    // The key associated with the merge operation.
99
    const Slice& key;
100
    // The existing value of the current key, nullptr means that the
101
    // value doesn't exist.
102
    const Slice* existing_value;
103
    // A list of operands to apply.
104
    const std::vector<Slice>& operand_list;
105
    // Logger could be used by client to log any errors that happen during
106
    // the merge operation.
107
    Logger* logger;
108
  };
109
110
  enum class OpFailureScope {
111
    kDefault,
112
    kTryMerge,
113
    kMustMerge,
114
    kOpFailureScopeMax,
115
  };
116
117
  struct MergeOperationOutput {
118
    explicit MergeOperationOutput(std::string& _new_value,
119
                                  Slice& _existing_operand)
120
0
        : new_value(_new_value), existing_operand(_existing_operand) {}
121
122
    // Client is responsible for filling the merge result here.
123
    std::string& new_value;
124
    // If the merge result is one of the existing operands (or existing_value),
125
    // client can set this field to the operand (or existing_value) instead of
126
    // using new_value.
127
    Slice& existing_operand;
128
    // Indicates the blast radius of the failure. It is only meaningful to
129
    // provide a failure scope when returning `false` from the API populating
130
    // the `MergeOperationOutput`. Currently RocksDB operations handle these
131
    // values as follows:
132
    //
133
    // - `OpFailureScope::kDefault`: fallback to default
134
    //   (`OpFailureScope::kTryMerge`)
135
    // - `OpFailureScope::kTryMerge`: operations that try to merge that key will
136
    //   fail. This includes flush and compaction, which puts the DB in
137
    //   read-only mode.
138
    // - `OpFailureScope::kMustMerge`: operations that must merge that key will
139
    //   fail (e.g., `Get()`, `MultiGet()`, iteration). Flushes/compactions can
140
    //   still proceed by copying the original input operands to the output.
141
    OpFailureScope op_failure_scope = OpFailureScope::kDefault;
142
  };
143
144
  // This function applies a stack of merge operands in chronological order
145
  // on top of an existing value. There are two ways in which this method is
146
  // being used:
147
  // a) During Get() operation, it used to calculate the final value of a key
148
  // b) During compaction, in order to collapse some operands with the based
149
  //    value.
150
  //
151
  // Note: The name of the method is somewhat misleading, as both in the cases
152
  // of Get() or compaction it may be called on a subset of operands:
153
  // K:    0    +1    +2    +7    +4     +5      2     +1     +2
154
  //                              ^
155
  //                              |
156
  //                          snapshot
157
  // In the example above, Get(K) operation will call FullMerge with a base
158
  // value of 2 and operands [+1, +2]. Compaction process might decide to
159
  // collapse the beginning of the history up to the snapshot by performing
160
  // full Merge with base value of 0 and operands [+1, +2, +7, +4].
161
  virtual bool FullMergeV2(const MergeOperationInput& merge_in,
162
                           MergeOperationOutput* merge_out) const;
163
164
  struct MergeOperationInputV3 {
165
    using ExistingValue = std::variant<std::monostate, Slice, WideColumns>;
166
    using OperandList = std::vector<Slice>;
167
168
    explicit MergeOperationInputV3(const Slice& _key,
169
                                   ExistingValue&& _existing_value,
170
                                   const OperandList& _operand_list,
171
                                   Logger* _logger)
172
0
        : key(_key),
173
0
          existing_value(std::move(_existing_value)),
174
0
          operand_list(_operand_list),
175
0
          logger(_logger) {}
176
177
    // The user key, including the user-defined timestamp if applicable.
178
    const Slice& key;
179
    // The base value of the merge operation. Can be one of three things (see
180
    // the ExistingValue variant above): no existing value, plain existing
181
    // value, or wide-column existing value.
182
    ExistingValue existing_value;
183
    // The list of operands to apply.
184
    const OperandList& operand_list;
185
    // The logger to use in case a failure happens during the merge operation.
186
    Logger* logger;
187
  };
188
189
  struct MergeOperationOutputV3 {
190
    using NewColumns = std::vector<std::pair<std::string, std::string>>;
191
    using NewValue = std::variant<std::string, NewColumns, Slice>;
192
193
    // The result of the merge operation. Can be one of three things (see the
194
    // NewValue variant above): a new plain value, a new wide-column value, or
195
    // an existing merge operand.
196
    NewValue new_value;
197
    // The scope of the failure if applicable. See above for more details.
198
    OpFailureScope op_failure_scope = OpFailureScope::kDefault;
199
  };
200
201
  // An extended version of FullMergeV2() that supports wide columns on both the
202
  // input and the output side, enabling the application to perform general
203
  // transformations during merges. For backward compatibility, the default
204
  // implementation calls FullMergeV2(). Specifically, if there is no base value
205
  // or the base value is a plain key-value, the default implementation falls
206
  // back to FullMergeV2(). If the base value is a wide-column entity, the
207
  // default implementation invokes FullMergeV2() to perform the merge on the
208
  // default column, and leaves any other columns unchanged.
209
  virtual bool FullMergeV3(const MergeOperationInputV3& merge_in,
210
                           MergeOperationOutputV3* merge_out) const;
211
212
  // This function performs merge(left_op, right_op)
213
  // when both the operands are themselves merge operation types
214
  // that you would have passed to a DB::Merge() call in the same order
215
  // (i.e.: DB::Merge(key,left_op), followed by DB::Merge(key,right_op)).
216
  //
217
  // PartialMerge should combine them into a single merge operation that is
218
  // saved into *new_value, and then it should return true.
219
  // *new_value should be constructed such that a call to
220
  // DB::Merge(key, *new_value) would yield the same result as a call
221
  // to DB::Merge(key, left_op) followed by DB::Merge(key, right_op).
222
  //
223
  // The string that new_value is pointing to will be empty.
224
  //
225
  // The default implementation of PartialMergeMulti will use this function
226
  // as a helper, for backward compatibility.  Any successor class of
227
  // MergeOperator should either implement PartialMerge or PartialMergeMulti,
228
  // although implementing PartialMergeMulti is suggested as it is in general
229
  // more effective to merge multiple operands at a time instead of two
230
  // operands at a time.
231
  //
232
  // If it is impossible or infeasible to combine the two operations,
233
  // leave new_value unchanged and return false. The library will
234
  // internally keep track of the operations, and apply them in the
235
  // correct order once a base-value (a Put/Delete/End-of-Database) is seen.
236
  //
237
  // TODO: Presently there is no way to differentiate between error/corruption
238
  // and simply "return false". For now, the client should simply return
239
  // false in any case it cannot perform partial-merge, regardless of reason.
240
  // If there is corruption in the data, handle it in the FullMergeV3() function
241
  // and return false there.  The default implementation of PartialMerge will
242
  // always return false.
243
  virtual bool PartialMerge(const Slice& /*key*/, const Slice& /*left_operand*/,
244
                            const Slice& /*right_operand*/,
245
                            std::string* /*new_value*/,
246
0
                            Logger* /*logger*/) const {
247
0
    return false;
248
0
  }
249
250
  // This function performs merge when all the operands are themselves merge
251
  // operation types that you would have passed to a DB::Merge() call in the
252
  // same order (front() first)
253
  // (i.e. DB::Merge(key, operand_list[0]), followed by
254
  //  DB::Merge(key, operand_list[1]), ...)
255
  //
256
  // PartialMergeMulti should combine them into a single merge operation that is
257
  // saved into *new_value, and then it should return true.  *new_value should
258
  // be constructed such that a call to DB::Merge(key, *new_value) would yield
259
  // the same result as sequential individual calls to DB::Merge(key, operand)
260
  // for each operand in operand_list from front() to back().
261
  //
262
  // The string that new_value is pointing to will be empty.
263
  //
264
  // The PartialMergeMulti function will be called when there are at least two
265
  // operands.
266
  //
267
  // In the default implementation, PartialMergeMulti will invoke PartialMerge
268
  // multiple times, where each time it only merges two operands.  Developers
269
  // should either implement PartialMergeMulti, or implement PartialMerge which
270
  // is served as the helper function of the default PartialMergeMulti.
271
  virtual bool PartialMergeMulti(const Slice& key,
272
                                 const std::deque<Slice>& operand_list,
273
                                 std::string* new_value, Logger* logger) const;
274
275
  // The name of the MergeOperator. Used to check for MergeOperator
276
  // mismatches (i.e., a DB created with one MergeOperator is
277
  // accessed using a different MergeOperator)
278
  // TODO: the name is currently not stored persistently and thus
279
  //       no checking is enforced. Client is responsible for providing
280
  //       consistent MergeOperator between DB opens.
281
  const char* Name() const override = 0;
282
283
  // Determines whether the PartialMerge can be called with just a single
284
  // merge operand.
285
  // Override and return true for allowing a single operand. PartialMerge
286
  // and PartialMergeMulti should be overridden and implemented
287
  // correctly to properly handle a single operand.
288
0
  virtual bool AllowSingleOperand() const { return false; }
289
290
  // Allows to control when to invoke a full merge during Get.
291
  // This could be used to limit the number of merge operands that are looked at
292
  // during a point lookup, thereby helping in limiting the number of levels to
293
  // read from.
294
  // Doesn't help with iterators.
295
  //
296
  // Note: the merge operands are passed to this function in the reversed order
297
  // relative to how they were merged (passed to
298
  // FullMerge/FullMergeV2/FullMergeV3) for performance reasons, see also:
299
  // https://github.com/facebook/rocksdb/issues/3865
300
0
  virtual bool ShouldMerge(const std::vector<Slice>& /*operands*/) const {
301
0
    return false;
302
0
  }
303
};
304
305
// The simpler, associative merge operator.
306
class AssociativeMergeOperator : public MergeOperator {
307
 public:
308
0
  ~AssociativeMergeOperator() override {}
309
310
  // Gives the client a way to express the read -> modify -> write semantics
311
  // key:           (IN) The key that's associated with this merge operation.
312
  // existing_value:(IN) null indicates the key does not exist before this op
313
  // value:         (IN) the value to update/merge the existing_value with
314
  // new_value:    (OUT) Client is responsible for filling the merge result
315
  // here. The string that new_value is pointing to will be empty.
316
  // logger:        (IN) Client could use this to log errors during merge.
317
  //
318
  // Return true on success.
319
  // All values passed in will be client-specific values. So if this method
320
  // returns false, it is because client specified bad data or there was
321
  // internal corruption. The client should assume that this will be treated
322
  // as an error by the library.
323
  virtual bool Merge(const Slice& key, const Slice* existing_value,
324
                     const Slice& value, std::string* new_value,
325
                     Logger* logger) const = 0;
326
327
 private:
328
  // Default implementations of the MergeOperator functions
329
  bool FullMergeV2(const MergeOperationInput& merge_in,
330
                   MergeOperationOutput* merge_out) const override;
331
332
  bool PartialMerge(const Slice& key, const Slice& left_operand,
333
                    const Slice& right_operand, std::string* new_value,
334
                    Logger* logger) const override;
335
};
336
337
}  // namespace ROCKSDB_NAMESPACE