Line data Source code
1 : // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package sstable
6 :
7 : import (
8 : "context"
9 :
10 : "github.com/cockroachdb/pebble/internal/base"
11 : "github.com/cockroachdb/pebble/internal/keyspan"
12 : "github.com/cockroachdb/pebble/internal/rangekey"
13 : )
14 :
15 : // VirtualReader wraps Reader. Its purpose is to restrict functionality of the
16 : // Reader which should be inaccessible to virtual sstables, and enforce bounds
17 : // invariants associated with virtual sstables. All reads on virtual sstables
18 : // should go through a VirtualReader.
19 : //
20 : // INVARIANT: Any iterators created through a virtual reader will guarantee that
21 : // they don't expose keys outside the virtual sstable bounds.
22 : type VirtualReader struct {
23 : vState virtualState
24 : reader *Reader
25 : Properties CommonProperties
26 : }
27 :
28 : var _ CommonReader = (*VirtualReader)(nil)
29 :
30 : // Lightweight virtual sstable state which can be passed to sstable iterators.
31 : type virtualState struct {
32 : lower InternalKey
33 : upper InternalKey
34 : fileNum base.FileNum
35 : Compare Compare
36 : isSharedIngested bool
37 : }
38 :
39 : // VirtualReaderParams are the parameters necessary to create a VirtualReader.
40 : type VirtualReaderParams struct {
41 : Lower InternalKey
42 : Upper InternalKey
43 : FileNum base.FileNum
44 : IsSharedIngested bool
45 : // Size is an estimate of the size of the [Lower, Upper) section of the table.
46 : Size uint64
47 : // BackingSize is the total size of the backing table. The ratio between Size
48 : // and BackingSize is used to estimate statistics.
49 : BackingSize uint64
50 : }
51 :
52 : // MakeVirtualReader is used to contruct a reader which can read from virtual
53 : // sstables.
54 2 : func MakeVirtualReader(reader *Reader, p VirtualReaderParams) VirtualReader {
55 2 : vState := virtualState{
56 2 : lower: p.Lower,
57 2 : upper: p.Upper,
58 2 : fileNum: p.FileNum,
59 2 : Compare: reader.Compare,
60 2 : isSharedIngested: p.IsSharedIngested,
61 2 : }
62 2 : v := VirtualReader{
63 2 : vState: vState,
64 2 : reader: reader,
65 2 : }
66 2 :
67 2 : // Scales the given value by the (Size / BackingSize) ratio, rounding up.
68 2 : scale := func(a uint64) uint64 {
69 2 : return (a*p.Size + p.BackingSize - 1) / p.BackingSize
70 2 : }
71 :
72 2 : v.Properties.RawKeySize = scale(reader.Properties.RawKeySize)
73 2 : v.Properties.RawValueSize = scale(reader.Properties.RawValueSize)
74 2 : v.Properties.NumEntries = scale(reader.Properties.NumEntries)
75 2 : v.Properties.NumDeletions = scale(reader.Properties.NumDeletions)
76 2 : v.Properties.NumRangeDeletions = scale(reader.Properties.NumRangeDeletions)
77 2 : v.Properties.NumRangeKeyDels = scale(reader.Properties.NumRangeKeyDels)
78 2 :
79 2 : // Note that we rely on NumRangeKeySets for correctness. If the sstable may
80 2 : // contain range keys, then NumRangeKeySets must be > 0. ceilDiv works because
81 2 : // meta.Size will not be 0 for virtual sstables.
82 2 : v.Properties.NumRangeKeySets = scale(reader.Properties.NumRangeKeySets)
83 2 : v.Properties.ValueBlocksSize = scale(reader.Properties.ValueBlocksSize)
84 2 : v.Properties.NumSizedDeletions = scale(reader.Properties.NumSizedDeletions)
85 2 : v.Properties.RawPointTombstoneKeySize = scale(reader.Properties.RawPointTombstoneKeySize)
86 2 : v.Properties.RawPointTombstoneValueSize = scale(reader.Properties.RawPointTombstoneValueSize)
87 2 :
88 2 : return v
89 : }
90 :
91 : // NewCompactionIter is the compaction iterator function for virtual readers.
92 : func (v *VirtualReader) NewCompactionIter(
93 : transforms IterTransforms,
94 : bytesIterated *uint64,
95 : categoryAndQoS CategoryAndQoS,
96 : statsCollector *CategoryStatsCollector,
97 : rp ReaderProvider,
98 : bufferPool *BufferPool,
99 2 : ) (Iterator, error) {
100 2 : return v.reader.newCompactionIter(
101 2 : transforms, bytesIterated, categoryAndQoS, statsCollector, rp, &v.vState, bufferPool)
102 2 : }
103 :
104 : // NewIterWithBlockPropertyFiltersAndContextEtc wraps
105 : // Reader.NewIterWithBlockPropertyFiltersAndContext. We assume that the passed
106 : // in [lower, upper) bounds will have at least some overlap with the virtual
107 : // sstable bounds. No overlap is not currently supported in the iterator.
108 : func (v *VirtualReader) NewIterWithBlockPropertyFiltersAndContextEtc(
109 : ctx context.Context,
110 : transforms IterTransforms,
111 : lower, upper []byte,
112 : filterer *BlockPropertiesFilterer,
113 : useFilterBlock bool,
114 : stats *base.InternalIteratorStats,
115 : categoryAndQoS CategoryAndQoS,
116 : statsCollector *CategoryStatsCollector,
117 : rp ReaderProvider,
118 2 : ) (Iterator, error) {
119 2 : return v.reader.newIterWithBlockPropertyFiltersAndContext(
120 2 : ctx, transforms, lower, upper, filterer, useFilterBlock,
121 2 : stats, categoryAndQoS, statsCollector, rp, &v.vState)
122 2 : }
123 :
124 : // ValidateBlockChecksumsOnBacking will call ValidateBlockChecksumsOnBacking on the underlying reader.
125 : // Note that block checksum validation is NOT restricted to virtual sstable bounds.
126 2 : func (v *VirtualReader) ValidateBlockChecksumsOnBacking() error {
127 2 : return v.reader.ValidateBlockChecksums()
128 2 : }
129 :
130 : // NewRawRangeDelIter wraps Reader.NewRawRangeDelIter.
131 : func (v *VirtualReader) NewRawRangeDelIter(
132 : transforms IterTransforms,
133 2 : ) (keyspan.FragmentIterator, error) {
134 2 : iter, err := v.reader.NewRawRangeDelIter(transforms)
135 2 : if err != nil {
136 0 : return nil, err
137 0 : }
138 2 : if iter == nil {
139 2 : return nil, nil
140 2 : }
141 2 : lower := &v.vState.lower
142 2 : upper := &v.vState.upper
143 2 :
144 2 : // Truncation of spans isn't allowed at a user key that also contains points
145 2 : // in the same virtual sstable, as it would lead to covered points getting
146 2 : // uncovered. Set panicOnUpperTruncate to true if the file's upper bound
147 2 : // is not an exclusive sentinel.
148 2 : //
149 2 : // As an example, if an sstable contains a rangedel a-c and point keys at
150 2 : // a.SET.2 and b.SET.3, the file bounds [a#2,SET-b#RANGEDELSENTINEL] are
151 2 : // allowed (as they exclude b.SET.3), or [a#2,SET-c#RANGEDELSENTINEL] (as it
152 2 : // includes both point keys), but not [a#2,SET-b#3,SET] (as it would truncate
153 2 : // the rangedel at b and lead to the point being uncovered).
154 2 : return keyspan.Truncate(
155 2 : v.reader.Compare, iter, lower.UserKey, upper.UserKey,
156 2 : lower, upper, !v.vState.upper.IsExclusiveSentinel(), /* panicOnUpperTruncate */
157 2 : ), nil
158 : }
159 :
160 : // NewRawRangeKeyIter wraps Reader.NewRawRangeKeyIter.
161 : func (v *VirtualReader) NewRawRangeKeyIter(
162 : transforms IterTransforms,
163 2 : ) (keyspan.FragmentIterator, error) {
164 2 : syntheticSeqNum := transforms.SyntheticSeqNum
165 2 : if v.vState.isSharedIngested {
166 2 : // Don't pass a synthetic sequence number for shared ingested sstables. We
167 2 : // need to know the materialized sequence numbers, and we will set up the
168 2 : // appropriate sequence number substitution below.
169 2 : transforms.SyntheticSeqNum = 0
170 2 : }
171 2 : iter, err := v.reader.NewRawRangeKeyIter(transforms)
172 2 : if err != nil {
173 0 : return nil, err
174 0 : }
175 2 : if iter == nil {
176 2 : return nil, nil
177 2 : }
178 2 : lower := &v.vState.lower
179 2 : upper := &v.vState.upper
180 2 :
181 2 : if v.vState.isSharedIngested {
182 2 : // We need to coalesce range keys within each sstable, and then apply the
183 2 : // synthetic sequence number. For this, we use ForeignSSTTransformer.
184 2 : //
185 2 : // TODO(bilal): Avoid these allocations by hoisting the transformer and
186 2 : // transform iter into VirtualReader.
187 2 : transform := &rangekey.ForeignSSTTransformer{
188 2 : Equal: v.reader.Equal,
189 2 : SeqNum: uint64(syntheticSeqNum),
190 2 : }
191 2 : transformIter := &keyspan.TransformerIter{
192 2 : FragmentIterator: iter,
193 2 : Transformer: transform,
194 2 : Compare: v.reader.Compare,
195 2 : }
196 2 : iter = transformIter
197 2 : }
198 :
199 : // Truncation of spans isn't allowed at a user key that also contains points
200 : // in the same virtual sstable, as it would lead to covered points getting
201 : // uncovered. Set panicOnUpperTruncate to true if the file's upper bound
202 : // is not an exclusive sentinel.
203 : //
204 : // As an example, if an sstable contains a range key a-c and point keys at
205 : // a.SET.2 and b.SET.3, the file bounds [a#2,SET-b#RANGEKEYSENTINEL] are
206 : // allowed (as they exclude b.SET.3), or [a#2,SET-c#RANGEKEYSENTINEL] (as it
207 : // includes both point keys), but not [a#2,SET-b#3,SET] (as it would truncate
208 : // the range key at b and lead to the point being uncovered).
209 2 : return keyspan.Truncate(
210 2 : v.reader.Compare, iter, lower.UserKey, upper.UserKey,
211 2 : lower, upper, !v.vState.upper.IsExclusiveSentinel(), /* panicOnUpperTruncate */
212 2 : ), nil
213 : }
214 :
215 : // Constrain bounds will narrow the start, end bounds if they do not fit within
216 : // the virtual sstable. The function will return if the new end key is
217 : // inclusive.
218 : func (v *virtualState) constrainBounds(
219 : start, end []byte, endInclusive bool,
220 2 : ) (lastKeyInclusive bool, first []byte, last []byte) {
221 2 : first = start
222 2 : if start == nil || v.Compare(start, v.lower.UserKey) < 0 {
223 2 : first = v.lower.UserKey
224 2 : }
225 :
226 : // Note that we assume that start, end has some overlap with the virtual
227 : // sstable bounds.
228 2 : last = v.upper.UserKey
229 2 : lastKeyInclusive = !v.upper.IsExclusiveSentinel()
230 2 : if end != nil {
231 2 : cmp := v.Compare(end, v.upper.UserKey)
232 2 : switch {
233 2 : case cmp == 0:
234 2 : lastKeyInclusive = !v.upper.IsExclusiveSentinel() && endInclusive
235 2 : last = v.upper.UserKey
236 2 : case cmp > 0:
237 2 : lastKeyInclusive = !v.upper.IsExclusiveSentinel()
238 2 : last = v.upper.UserKey
239 2 : default:
240 2 : lastKeyInclusive = endInclusive
241 2 : last = end
242 : }
243 : }
244 : // TODO(bananabrick): What if someone passes in bounds completely outside of
245 : // virtual sstable bounds?
246 2 : return lastKeyInclusive, first, last
247 : }
248 :
249 : // EstimateDiskUsage just calls VirtualReader.reader.EstimateDiskUsage after
250 : // enforcing the virtual sstable bounds.
251 2 : func (v *VirtualReader) EstimateDiskUsage(start, end []byte) (uint64, error) {
252 2 : _, f, l := v.vState.constrainBounds(start, end, true /* endInclusive */)
253 2 : return v.reader.EstimateDiskUsage(f, l)
254 2 : }
255 :
256 : // CommonProperties implements the CommonReader interface.
257 2 : func (v *VirtualReader) CommonProperties() *CommonProperties {
258 2 : return &v.Properties
259 2 : }
|