Coverage Report

Created: 2026-05-16 07:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rocksdb/table/table_reader.h
Line
Count
Source
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under both the GPLv2 (found in the
3
//  COPYING file in the root directory) and Apache 2.0 License
4
//  (found in the LICENSE.Apache file in the root directory).
5
//
6
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7
// Use of this source code is governed by a BSD-style license that can be
8
// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10
#pragma once
11
#include <memory>
12
13
#include "db/range_tombstone_fragmenter.h"
14
#if USE_COROUTINES
15
#include "folly/coro/Coroutine.h"
16
#include "folly/coro/Task.h"
17
#endif
18
#include "rocksdb/slice_transform.h"
19
#include "rocksdb/table_reader_caller.h"
20
#include "table/get_context.h"
21
#include "table/internal_iterator.h"
22
#include "table/multiget_context.h"
23
24
namespace ROCKSDB_NAMESPACE {
25
26
class Iterator;
27
struct ParsedInternalKey;
28
class Slice;
29
class Arena;
30
struct ReadOptions;
31
struct TableProperties;
32
class GetContext;
33
class MultiGetContext;
34
35
// A Table (also referred to as SST) is a sorted map from strings to strings.
36
// Tables are immutable and persistent.  A Table may be safely accessed from
37
// multiple threads without external synchronization. Table readers are used
38
// for reading various types of table formats supported by rocksdb including
39
// BlockBasedTable, PlainTable and CuckooTable format.
40
class TableReader {
41
 public:
42
109k
  virtual ~TableReader() {}
43
44
  // Returns a new iterator over the table contents.
45
  // The result of NewIterator() is initially invalid (caller must
46
  // call one of the Seek methods on the iterator before using it).
47
  //
48
  // read_options: Must outlive the returned iterator.
49
  // arena: If not null, the arena needs to be used to allocate the Iterator.
50
  //        When destroying the iterator, the caller will not call "delete"
51
  //        but Iterator::~Iterator() directly. The destructor needs to destroy
52
  //        all the states but those allocated in arena.
53
  // skip_filters: disables checking the bloom filters even if they exist. This
54
  //               option is effective only for block-based table format.
55
  // compaction_readahead_size: its value will only be used if caller =
56
  // kCompaction
57
  virtual InternalIterator* NewIterator(
58
      const ReadOptions& read_options, const SliceTransform* prefix_extractor,
59
      Arena* arena, bool skip_filters, TableReaderCaller caller,
60
      size_t compaction_readahead_size = 0,
61
      bool allow_unprepared_value = false) = 0;
62
63
  // read_options.snapshot needs to outlive this call.
64
  virtual FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
65
0
      const ReadOptions& /*read_options*/) {
66
0
    return nullptr;
67
0
  }
68
69
  virtual FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
70
0
      SequenceNumber /* read_seqno */, const Slice* /* timestamp */) {
71
0
    return nullptr;
72
0
  }
73
74
  // Given a key, return an approximate byte offset in the file where
75
  // the data for that key begins (or would begin if the key were
76
  // present in the file).  The returned value is in terms of file
77
  // bytes, and so includes effects like compression of the underlying data.
78
  // E.g., the approximate offset of the last key in the table will
79
  // be close to the file length.
80
  // TODO(peterd): Since this function is only used for approximate size
81
  // from beginning of file, reduce code duplication by removing this
82
  // function and letting ApproximateSize take optional start and end, so
83
  // that absolute start and end can be specified and optimized without
84
  // key / index work.
85
  virtual uint64_t ApproximateOffsetOf(const ReadOptions& read_options,
86
                                       const Slice& key,
87
                                       TableReaderCaller caller) = 0;
88
89
  // Given start and end keys, return the approximate data size in the file
90
  // between the keys. The returned value is in terms of file bytes, and so
91
  // includes effects like compression of the underlying data and applicable
92
  // portions of metadata including filters and indexes. Nullptr for start or
93
  // end (or both) indicates absolute start or end of the table.
94
  virtual uint64_t ApproximateSize(const ReadOptions& read_options,
95
                                   const Slice& start, const Slice& end,
96
                                   TableReaderCaller caller) = 0;
97
98
  struct Anchor {
99
    Anchor(const Slice& _user_key, size_t _range_size)
100
0
        : user_key(_user_key.ToStringView()), range_size(_range_size) {}
101
    std::string user_key;
102
    size_t range_size;
103
  };
104
105
  // Now try to return approximately 128 anchor keys.
106
  // The last one tends to be the largest key.
107
  virtual Status ApproximateKeyAnchors(const ReadOptions& /*read_options*/,
108
0
                                       std::vector<Anchor>& /*anchors*/) {
109
0
    return Status::NotSupported("ApproximateKeyAnchors() not supported.");
110
0
  }
111
112
  // Set up the table for Compaction. Might change some parameters with
113
  // posix_fadvise
114
  virtual void SetupForCompaction() = 0;
115
116
  virtual std::shared_ptr<const TableProperties> GetTableProperties() const = 0;
117
118
  // Prepare work that can be done before the real Get()
119
2.21k
  virtual void Prepare(const Slice& /*target*/) {}
120
121
  // Report an approximation of how much memory has been used.
122
  virtual size_t ApproximateMemoryUsage() const = 0;
123
124
  // Calls get_context->SaveValue() repeatedly, starting with
125
  // the entry found after a call to Seek(key), until it returns false.
126
  // May not make such a call if filter policy says that key is not present.
127
  //
128
  // get_context->MarkKeyMayExist needs to be called when it is configured to be
129
  // memory only and the key is not found in the block cache.
130
  //
131
  // readOptions is the options for the read
132
  // key is the key to search for
133
  // skip_filters: disables checking the bloom filters even if they exist. This
134
  //               option is effective only for block-based table format.
135
  virtual Status Get(const ReadOptions& readOptions, const Slice& key,
136
                     GetContext* get_context,
137
                     const SliceTransform* prefix_extractor,
138
                     bool skip_filters = false) = 0;
139
140
  // Use bloom filters in the table file, if present, to filter out keys. The
141
  // mget_range will be updated to skip keys that get a negative result from
142
  // the filter lookup.
143
  virtual Status MultiGetFilter(const ReadOptions& /*readOptions*/,
144
                                const SliceTransform* /*prefix_extractor*/,
145
0
                                MultiGetContext::Range* /*mget_range*/) {
146
0
    return Status::NotSupported();
147
0
  }
148
149
  virtual void MultiGet(const ReadOptions& readOptions,
150
                        const MultiGetContext::Range* mget_range,
151
                        const SliceTransform* prefix_extractor,
152
0
                        bool skip_filters = false) {
153
0
    for (auto iter = mget_range->begin(); iter != mget_range->end(); ++iter) {
154
0
      *iter->s = Get(readOptions, iter->ikey, iter->get_context,
155
0
                     prefix_extractor, skip_filters);
156
0
    }
157
0
  }
158
159
#if USE_COROUTINES
160
  virtual folly::coro::Task<void> MultiGetCoroutine(
161
      const ReadOptions& readOptions, const MultiGetContext::Range* mget_range,
162
      const SliceTransform* prefix_extractor, bool skip_filters = false) {
163
    MultiGet(readOptions, mget_range, prefix_extractor, skip_filters);
164
    co_return;
165
  }
166
#endif  // USE_COROUTINES
167
168
  // Prefetch data corresponding to a give range of keys
169
  // Typically this functionality is required for table implementations that
170
  // persists the data on a non volatile storage medium like disk/SSD
171
  virtual Status Prefetch(const ReadOptions& /* read_options */,
172
                          const Slice* begin = nullptr,
173
0
                          const Slice* end = nullptr) {
174
0
    (void)begin;
175
0
    (void)end;
176
    // Default implementation is NOOP.
177
    // The child class should implement functionality when applicable
178
0
    return Status::OK();
179
0
  }
180
181
  // convert db file to a human readable form
182
  virtual Status DumpTable(WritableFile* /*out_file*/,
183
0
                           bool /*show_sequence_number_type*/ = false) {
184
0
    return Status::NotSupported("DumpTable() not supported");
185
0
  }
186
187
  // check whether there is corruption in this db file
188
  virtual Status VerifyChecksum(const ReadOptions& /*read_options*/,
189
                                TableReaderCaller /*caller*/,
190
0
                                bool /*meta_blocks_only*/ = false) {
191
0
    return Status::NotSupported("VerifyChecksum() not supported");
192
0
  }
193
194
  // Tell the reader that the file should now be obsolete, e.g. as a hint
195
  // to delete relevant cache entries on destruction. (It might not be safe
196
  // to "unpin" cache entries until destruction time.) NOTE: must be thread
197
  // safe because multiple table cache references might all mark this file as
198
  // obsolete when they are released (the last of which destroys this reader).
199
0
  virtual void MarkObsolete(uint32_t /*uncache_aggressiveness*/) {
200
    // no-op as default
201
0
  }
202
};
203
204
}  // namespace ROCKSDB_NAMESPACE