Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package sstable
6 :
7 : import (
8 : "context"
9 :
10 : "github.com/cockroachdb/pebble/internal/base"
11 : "github.com/cockroachdb/pebble/internal/keyspan"
12 : "github.com/cockroachdb/pebble/internal/rangekey"
13 : "github.com/cockroachdb/pebble/sstable/block"
14 : )
15 :
16 : // VirtualReader wraps Reader. Its purpose is to restrict functionality of the
17 : // Reader which should be inaccessible to virtual sstables, and enforce bounds
18 : // invariants associated with virtual sstables. All reads on virtual sstables
19 : // should go through a VirtualReader.
20 : //
21 : // INVARIANT: Any iterators created through a virtual reader will guarantee that
22 : // they don't expose keys outside the virtual sstable bounds.
23 : type VirtualReader struct {
24 : vState virtualState
25 : reader *Reader
26 : Properties CommonProperties
27 : }
28 :
29 : var _ CommonReader = (*VirtualReader)(nil)
30 :
31 : // Lightweight virtual sstable state which can be passed to sstable iterators.
32 : type virtualState struct {
33 : lower InternalKey
34 : upper InternalKey
35 : fileNum base.FileNum
36 : Compare Compare
37 : isSharedIngested bool
38 : }
39 :
40 : // VirtualReaderParams are the parameters necessary to create a VirtualReader.
41 : type VirtualReaderParams struct {
42 : Lower InternalKey
43 : Upper InternalKey
44 : FileNum base.FileNum
45 : IsSharedIngested bool
46 : // Size is an estimate of the size of the [Lower, Upper) section of the table.
47 : Size uint64
48 : // BackingSize is the total size of the backing table. The ratio between Size
49 : // and BackingSize is used to estimate statistics.
50 : BackingSize uint64
51 : }
52 :
53 : // MakeVirtualReader is used to contruct a reader which can read from virtual
54 : // sstables.
55 2 : func MakeVirtualReader(reader *Reader, p VirtualReaderParams) VirtualReader {
56 2 : vState := virtualState{
57 2 : lower: p.Lower,
58 2 : upper: p.Upper,
59 2 : fileNum: p.FileNum,
60 2 : Compare: reader.Compare,
61 2 : isSharedIngested: p.IsSharedIngested,
62 2 : }
63 2 : v := VirtualReader{
64 2 : vState: vState,
65 2 : reader: reader,
66 2 : }
67 2 :
68 2 : // Scales the given value by the (Size / BackingSize) ratio, rounding up.
69 2 : scale := func(a uint64) uint64 {
70 2 : return (a*p.Size + p.BackingSize - 1) / p.BackingSize
71 2 : }
72 :
73 2 : v.Properties.RawKeySize = scale(reader.Properties.RawKeySize)
74 2 : v.Properties.RawValueSize = scale(reader.Properties.RawValueSize)
75 2 : v.Properties.NumEntries = scale(reader.Properties.NumEntries)
76 2 : v.Properties.NumDeletions = scale(reader.Properties.NumDeletions)
77 2 : v.Properties.NumRangeDeletions = scale(reader.Properties.NumRangeDeletions)
78 2 : v.Properties.NumRangeKeyDels = scale(reader.Properties.NumRangeKeyDels)
79 2 : v.Properties.NumDataBlocks = scale(reader.Properties.NumDataBlocks)
80 2 : v.Properties.NumTombstoneDenseBlocks = scale(reader.Properties.NumTombstoneDenseBlocks)
81 2 :
82 2 : // Note that we rely on NumRangeKeySets for correctness. If the sstable may
83 2 : // contain range keys, then NumRangeKeySets must be > 0. ceilDiv works because
84 2 : // meta.Size will not be 0 for virtual sstables.
85 2 : v.Properties.NumRangeKeySets = scale(reader.Properties.NumRangeKeySets)
86 2 : v.Properties.ValueBlocksSize = scale(reader.Properties.ValueBlocksSize)
87 2 : v.Properties.NumSizedDeletions = scale(reader.Properties.NumSizedDeletions)
88 2 : v.Properties.RawPointTombstoneKeySize = scale(reader.Properties.RawPointTombstoneKeySize)
89 2 : v.Properties.RawPointTombstoneValueSize = scale(reader.Properties.RawPointTombstoneValueSize)
90 2 :
91 2 : return v
92 : }
93 :
94 : // NewCompactionIter is the compaction iterator function for virtual readers.
95 : func (v *VirtualReader) NewCompactionIter(
96 : transforms IterTransforms,
97 : categoryAndQoS CategoryAndQoS,
98 : statsCollector *CategoryStatsCollector,
99 : rp ReaderProvider,
100 : bufferPool *block.BufferPool,
101 2 : ) (Iterator, error) {
102 2 : return v.reader.newCompactionIter(
103 2 : transforms, categoryAndQoS, statsCollector, rp, &v.vState, bufferPool)
104 2 : }
105 :
106 : // NewPointIter returns an iterator for the point keys in the table.
107 : //
108 : // If transform.HideObsoletePoints is set, the callee assumes that filterer
109 : // already includes obsoleteKeyBlockPropertyFilter. The caller can satisfy this
110 : // contract by first calling TryAddBlockPropertyFilterForHideObsoletePoints.
111 : //
112 : // We assume that the [lower, upper) bounds (if specified) will have at least
113 : // some overlap with the virtual sstable bounds. No overlap is not currently
114 : // supported in the iterator.
115 : func (v *VirtualReader) NewPointIter(
116 : ctx context.Context,
117 : transforms IterTransforms,
118 : lower, upper []byte,
119 : filterer *BlockPropertiesFilterer,
120 : filterBlockSizeLimit FilterBlockSizeLimit,
121 : stats *base.InternalIteratorStats,
122 : categoryAndQoS CategoryAndQoS,
123 : statsCollector *CategoryStatsCollector,
124 : rp ReaderProvider,
125 2 : ) (Iterator, error) {
126 2 : return v.reader.newPointIter(
127 2 : ctx, transforms, lower, upper, filterer, filterBlockSizeLimit,
128 2 : stats, categoryAndQoS, statsCollector, rp, &v.vState)
129 2 : }
130 :
131 : // ValidateBlockChecksumsOnBacking will call ValidateBlockChecksumsOnBacking on the underlying reader.
132 : // Note that block checksum validation is NOT restricted to virtual sstable bounds.
133 2 : func (v *VirtualReader) ValidateBlockChecksumsOnBacking() error {
134 2 : return v.reader.ValidateBlockChecksums()
135 2 : }
136 :
137 : // NewRawRangeDelIter wraps Reader.NewRawRangeDelIter.
138 : func (v *VirtualReader) NewRawRangeDelIter(
139 : ctx context.Context, transforms FragmentIterTransforms,
140 2 : ) (keyspan.FragmentIterator, error) {
141 2 : iter, err := v.reader.NewRawRangeDelIter(ctx, transforms)
142 2 : if err != nil {
143 0 : return nil, err
144 0 : }
145 2 : if iter == nil {
146 2 : return nil, nil
147 2 : }
148 :
149 : // Note that if upper is not an exclusive sentinel, Truncate will assert that
150 : // there is no span that contains that key.
151 : //
152 : // As an example, if an sstable contains a rangedel a-c and point keys at
153 : // a.SET.2 and b.SET.3, the file bounds [a#2,SET-b#RANGEDELSENTINEL] are
154 : // allowed (as they exclude b.SET.3), or [a#2,SET-c#RANGEDELSENTINEL] (as it
155 : // includes both point keys), but not [a#2,SET-b#3,SET] (as it would truncate
156 : // the rangedel at b and lead to the point being uncovered).
157 2 : return keyspan.Truncate(
158 2 : v.reader.Compare, iter,
159 2 : base.UserKeyBoundsFromInternal(v.vState.lower, v.vState.upper),
160 2 : ), nil
161 : }
162 :
163 : // NewRawRangeKeyIter wraps Reader.NewRawRangeKeyIter.
164 : func (v *VirtualReader) NewRawRangeKeyIter(
165 : ctx context.Context, transforms FragmentIterTransforms,
166 2 : ) (keyspan.FragmentIterator, error) {
167 2 : syntheticSeqNum := transforms.SyntheticSeqNum
168 2 : if v.vState.isSharedIngested {
169 2 : // Don't pass a synthetic sequence number for shared ingested sstables. We
170 2 : // need to know the materialized sequence numbers, and we will set up the
171 2 : // appropriate sequence number substitution below.
172 2 : transforms.SyntheticSeqNum = 0
173 2 : }
174 2 : iter, err := v.reader.NewRawRangeKeyIter(ctx, transforms)
175 2 : if err != nil {
176 0 : return nil, err
177 0 : }
178 2 : if iter == nil {
179 2 : return nil, nil
180 2 : }
181 :
182 2 : if v.vState.isSharedIngested {
183 2 : // We need to coalesce range keys within each sstable, and then apply the
184 2 : // synthetic sequence number. For this, we use ForeignSSTTransformer.
185 2 : //
186 2 : // TODO(bilal): Avoid these allocations by hoisting the transformer and
187 2 : // transform iter into VirtualReader.
188 2 : transform := &rangekey.ForeignSSTTransformer{
189 2 : Equal: v.reader.Equal,
190 2 : SeqNum: base.SeqNum(syntheticSeqNum),
191 2 : }
192 2 : transformIter := &keyspan.TransformerIter{
193 2 : FragmentIterator: iter,
194 2 : Transformer: transform,
195 2 : SuffixCmp: v.reader.Comparer.CompareRangeSuffixes,
196 2 : }
197 2 : iter = transformIter
198 2 : }
199 :
200 : // Note that if upper is not an exclusive sentinel, Truncate will assert that
201 : // there is no span that contains that key.
202 : //
203 : // As an example, if an sstable contains a range key a-c and point keys at
204 : // a.SET.2 and b.SET.3, the file bounds [a#2,SET-b#RANGEKEYSENTINEL] are
205 : // allowed (as they exclude b.SET.3), or [a#2,SET-c#RANGEKEYSENTINEL] (as it
206 : // includes both point keys), but not [a#2,SET-b#3,SET] (as it would truncate
207 : // the range key at b and lead to the point being uncovered).
208 2 : return keyspan.Truncate(
209 2 : v.reader.Compare, iter,
210 2 : base.UserKeyBoundsFromInternal(v.vState.lower, v.vState.upper),
211 2 : ), nil
212 : }
213 :
214 : // UnsafeReader returns the underlying *sstable.Reader behind a VirtualReader.
215 2 : func (v *VirtualReader) UnsafeReader() *Reader {
216 2 : return v.reader
217 2 : }
218 :
219 : // Constrain bounds will narrow the start, end bounds if they do not fit within
220 : // the virtual sstable. The function will return if the new end key is
221 : // inclusive.
222 : func (v *virtualState) constrainBounds(
223 : start, end []byte, endInclusive bool,
224 2 : ) (lastKeyInclusive bool, first []byte, last []byte) {
225 2 : first = start
226 2 : if start == nil || v.Compare(start, v.lower.UserKey) < 0 {
227 2 : first = v.lower.UserKey
228 2 : }
229 :
230 : // Note that we assume that start, end has some overlap with the virtual
231 : // sstable bounds.
232 2 : last = v.upper.UserKey
233 2 : lastKeyInclusive = !v.upper.IsExclusiveSentinel()
234 2 : if end != nil {
235 2 : cmp := v.Compare(end, v.upper.UserKey)
236 2 : switch {
237 2 : case cmp == 0:
238 2 : lastKeyInclusive = !v.upper.IsExclusiveSentinel() && endInclusive
239 2 : last = v.upper.UserKey
240 2 : case cmp > 0:
241 2 : lastKeyInclusive = !v.upper.IsExclusiveSentinel()
242 2 : last = v.upper.UserKey
243 2 : default:
244 2 : lastKeyInclusive = endInclusive
245 2 : last = end
246 : }
247 : }
248 : // TODO(bananabrick): What if someone passes in bounds completely outside of
249 : // virtual sstable bounds?
250 2 : return lastKeyInclusive, first, last
251 : }
252 :
253 : // EstimateDiskUsage just calls VirtualReader.reader.EstimateDiskUsage after
254 : // enforcing the virtual sstable bounds.
255 2 : func (v *VirtualReader) EstimateDiskUsage(start, end []byte) (uint64, error) {
256 2 : _, f, l := v.vState.constrainBounds(start, end, true /* endInclusive */)
257 2 : return v.reader.EstimateDiskUsage(f, l)
258 2 : }
259 :
260 : // CommonProperties implements the CommonReader interface.
261 2 : func (v *VirtualReader) CommonProperties() *CommonProperties {
262 2 : return &v.Properties
263 2 : }
|