Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package sstable
6 :
7 : import (
8 : "context"
9 :
10 : "github.com/cockroachdb/pebble/internal/base"
11 : "github.com/cockroachdb/pebble/internal/keyspan"
12 : "github.com/cockroachdb/pebble/internal/rangekey"
13 : "github.com/cockroachdb/pebble/sstable/block"
14 : )
15 :
16 : // VirtualReader wraps Reader. Its purpose is to restrict functionality of the
17 : // Reader which should be inaccessible to virtual sstables, and enforce bounds
18 : // invariants associated with virtual sstables. All reads on virtual sstables
19 : // should go through a VirtualReader.
20 : //
21 : // INVARIANT: Any iterators created through a virtual reader will guarantee that
22 : // they don't expose keys outside the virtual sstable bounds.
23 : type VirtualReader struct {
24 : vState virtualState
25 : reader *Reader
26 : Properties CommonProperties
27 : }
28 :
29 : var _ CommonReader = (*VirtualReader)(nil)
30 :
31 : // Lightweight virtual sstable state which can be passed to sstable iterators.
32 : type virtualState struct {
33 : lower InternalKey
34 : upper InternalKey
35 : fileNum base.FileNum
36 : Compare Compare
37 : isSharedIngested bool
38 : }
39 :
40 : // VirtualReaderParams are the parameters necessary to create a VirtualReader.
41 : type VirtualReaderParams struct {
42 : Lower InternalKey
43 : Upper InternalKey
44 : FileNum base.FileNum
45 : IsSharedIngested bool
46 : // Size is an estimate of the size of the [Lower, Upper) section of the table.
47 : Size uint64
48 : // BackingSize is the total size of the backing table. The ratio between Size
49 : // and BackingSize is used to estimate statistics.
50 : BackingSize uint64
51 : }
52 :
53 : // MakeVirtualReader is used to contruct a reader which can read from virtual
54 : // sstables.
55 2 : func MakeVirtualReader(reader *Reader, p VirtualReaderParams) VirtualReader {
56 2 : vState := virtualState{
57 2 : lower: p.Lower,
58 2 : upper: p.Upper,
59 2 : fileNum: p.FileNum,
60 2 : Compare: reader.Compare,
61 2 : isSharedIngested: p.IsSharedIngested,
62 2 : }
63 2 : v := VirtualReader{
64 2 : vState: vState,
65 2 : reader: reader,
66 2 : }
67 2 :
68 2 : // Scales the given value by the (Size / BackingSize) ratio, rounding up.
69 2 : scale := func(a uint64) uint64 {
70 2 : return (a*p.Size + p.BackingSize - 1) / p.BackingSize
71 2 : }
72 :
73 2 : v.Properties.RawKeySize = scale(reader.Properties.RawKeySize)
74 2 : v.Properties.RawValueSize = scale(reader.Properties.RawValueSize)
75 2 : v.Properties.NumEntries = scale(reader.Properties.NumEntries)
76 2 : v.Properties.NumDeletions = scale(reader.Properties.NumDeletions)
77 2 : v.Properties.NumRangeDeletions = scale(reader.Properties.NumRangeDeletions)
78 2 : v.Properties.NumRangeKeyDels = scale(reader.Properties.NumRangeKeyDels)
79 2 :
80 2 : // Note that we rely on NumRangeKeySets for correctness. If the sstable may
81 2 : // contain range keys, then NumRangeKeySets must be > 0. ceilDiv works because
82 2 : // meta.Size will not be 0 for virtual sstables.
83 2 : v.Properties.NumRangeKeySets = scale(reader.Properties.NumRangeKeySets)
84 2 : v.Properties.ValueBlocksSize = scale(reader.Properties.ValueBlocksSize)
85 2 : v.Properties.NumSizedDeletions = scale(reader.Properties.NumSizedDeletions)
86 2 : v.Properties.RawPointTombstoneKeySize = scale(reader.Properties.RawPointTombstoneKeySize)
87 2 : v.Properties.RawPointTombstoneValueSize = scale(reader.Properties.RawPointTombstoneValueSize)
88 2 :
89 2 : return v
90 : }
91 :
92 : // NewCompactionIter is the compaction iterator function for virtual readers.
93 : func (v *VirtualReader) NewCompactionIter(
94 : transforms IterTransforms,
95 : categoryAndQoS CategoryAndQoS,
96 : statsCollector *CategoryStatsCollector,
97 : rp ReaderProvider,
98 : bufferPool *block.BufferPool,
99 2 : ) (Iterator, error) {
100 2 : return v.reader.newCompactionIter(
101 2 : transforms, categoryAndQoS, statsCollector, rp, &v.vState, bufferPool)
102 2 : }
103 :
104 : // NewIterWithBlockPropertyFiltersAndContextEtc wraps
105 : // Reader.NewIterWithBlockPropertyFiltersAndContext. We assume that the passed
106 : // in [lower, upper) bounds will have at least some overlap with the virtual
107 : // sstable bounds. No overlap is not currently supported in the iterator.
108 : func (v *VirtualReader) NewIterWithBlockPropertyFiltersAndContextEtc(
109 : ctx context.Context,
110 : transforms IterTransforms,
111 : lower, upper []byte,
112 : filterer *BlockPropertiesFilterer,
113 : useFilterBlock bool,
114 : stats *base.InternalIteratorStats,
115 : categoryAndQoS CategoryAndQoS,
116 : statsCollector *CategoryStatsCollector,
117 : rp ReaderProvider,
118 2 : ) (Iterator, error) {
119 2 : return v.reader.newIterWithBlockPropertyFiltersAndContext(
120 2 : ctx, transforms, lower, upper, filterer, useFilterBlock,
121 2 : stats, categoryAndQoS, statsCollector, rp, &v.vState)
122 2 : }
123 :
124 : // ValidateBlockChecksumsOnBacking will call ValidateBlockChecksumsOnBacking on the underlying reader.
125 : // Note that block checksum validation is NOT restricted to virtual sstable bounds.
126 1 : func (v *VirtualReader) ValidateBlockChecksumsOnBacking() error {
127 1 : return v.reader.ValidateBlockChecksums()
128 1 : }
129 :
130 : // NewRawRangeDelIter wraps Reader.NewRawRangeDelIter.
131 : func (v *VirtualReader) NewRawRangeDelIter(
132 : transforms FragmentIterTransforms,
133 2 : ) (keyspan.FragmentIterator, error) {
134 2 : iter, err := v.reader.NewRawRangeDelIter(transforms)
135 2 : if err != nil {
136 0 : return nil, err
137 0 : }
138 2 : if iter == nil {
139 2 : return nil, nil
140 2 : }
141 :
142 : // Note that if upper is not an exclusive sentinel, Truncate will assert that
143 : // there is no span that contains that key.
144 : //
145 : // As an example, if an sstable contains a rangedel a-c and point keys at
146 : // a.SET.2 and b.SET.3, the file bounds [a#2,SET-b#RANGEDELSENTINEL] are
147 : // allowed (as they exclude b.SET.3), or [a#2,SET-c#RANGEDELSENTINEL] (as it
148 : // includes both point keys), but not [a#2,SET-b#3,SET] (as it would truncate
149 : // the rangedel at b and lead to the point being uncovered).
150 2 : return keyspan.Truncate(
151 2 : v.reader.Compare, iter,
152 2 : base.UserKeyBoundsFromInternal(v.vState.lower, v.vState.upper),
153 2 : ), nil
154 : }
155 :
156 : // NewRawRangeKeyIter wraps Reader.NewRawRangeKeyIter.
157 : func (v *VirtualReader) NewRawRangeKeyIter(
158 : transforms FragmentIterTransforms,
159 2 : ) (keyspan.FragmentIterator, error) {
160 2 : syntheticSeqNum := transforms.SyntheticSeqNum
161 2 : if v.vState.isSharedIngested {
162 2 : // Don't pass a synthetic sequence number for shared ingested sstables. We
163 2 : // need to know the materialized sequence numbers, and we will set up the
164 2 : // appropriate sequence number substitution below.
165 2 : transforms.SyntheticSeqNum = 0
166 2 : }
167 2 : iter, err := v.reader.NewRawRangeKeyIter(transforms)
168 2 : if err != nil {
169 0 : return nil, err
170 0 : }
171 2 : if iter == nil {
172 2 : return nil, nil
173 2 : }
174 :
175 2 : if v.vState.isSharedIngested {
176 2 : // We need to coalesce range keys within each sstable, and then apply the
177 2 : // synthetic sequence number. For this, we use ForeignSSTTransformer.
178 2 : //
179 2 : // TODO(bilal): Avoid these allocations by hoisting the transformer and
180 2 : // transform iter into VirtualReader.
181 2 : transform := &rangekey.ForeignSSTTransformer{
182 2 : Equal: v.reader.Equal,
183 2 : SeqNum: base.SeqNum(syntheticSeqNum),
184 2 : }
185 2 : transformIter := &keyspan.TransformerIter{
186 2 : FragmentIterator: iter,
187 2 : Transformer: transform,
188 2 : Compare: v.reader.Compare,
189 2 : }
190 2 : iter = transformIter
191 2 : }
192 :
193 : // Note that if upper is not an exclusive sentinel, Truncate will assert that
194 : // there is no span that contains that key.
195 : //
196 : // As an example, if an sstable contains a range key a-c and point keys at
197 : // a.SET.2 and b.SET.3, the file bounds [a#2,SET-b#RANGEKEYSENTINEL] are
198 : // allowed (as they exclude b.SET.3), or [a#2,SET-c#RANGEKEYSENTINEL] (as it
199 : // includes both point keys), but not [a#2,SET-b#3,SET] (as it would truncate
200 : // the range key at b and lead to the point being uncovered).
201 2 : return keyspan.Truncate(
202 2 : v.reader.Compare, iter,
203 2 : base.UserKeyBoundsFromInternal(v.vState.lower, v.vState.upper),
204 2 : ), nil
205 : }
206 :
207 : // Constrain bounds will narrow the start, end bounds if they do not fit within
208 : // the virtual sstable. The function will return if the new end key is
209 : // inclusive.
210 : func (v *virtualState) constrainBounds(
211 : start, end []byte, endInclusive bool,
212 2 : ) (lastKeyInclusive bool, first []byte, last []byte) {
213 2 : first = start
214 2 : if start == nil || v.Compare(start, v.lower.UserKey) < 0 {
215 2 : first = v.lower.UserKey
216 2 : }
217 :
218 : // Note that we assume that start, end has some overlap with the virtual
219 : // sstable bounds.
220 2 : last = v.upper.UserKey
221 2 : lastKeyInclusive = !v.upper.IsExclusiveSentinel()
222 2 : if end != nil {
223 2 : cmp := v.Compare(end, v.upper.UserKey)
224 2 : switch {
225 2 : case cmp == 0:
226 2 : lastKeyInclusive = !v.upper.IsExclusiveSentinel() && endInclusive
227 2 : last = v.upper.UserKey
228 2 : case cmp > 0:
229 2 : lastKeyInclusive = !v.upper.IsExclusiveSentinel()
230 2 : last = v.upper.UserKey
231 2 : default:
232 2 : lastKeyInclusive = endInclusive
233 2 : last = end
234 : }
235 : }
236 : // TODO(bananabrick): What if someone passes in bounds completely outside of
237 : // virtual sstable bounds?
238 2 : return lastKeyInclusive, first, last
239 : }
240 :
241 : // EstimateDiskUsage just calls VirtualReader.reader.EstimateDiskUsage after
242 : // enforcing the virtual sstable bounds.
243 2 : func (v *VirtualReader) EstimateDiskUsage(start, end []byte) (uint64, error) {
244 2 : _, f, l := v.vState.constrainBounds(start, end, true /* endInclusive */)
245 2 : return v.reader.EstimateDiskUsage(f, l)
246 2 : }
247 :
248 : // CommonProperties implements the CommonReader interface.
249 2 : func (v *VirtualReader) CommonProperties() *CommonProperties {
250 2 : return &v.Properties
251 2 : }
|