Line data Source code
1 : // Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package manifest
6 :
7 : import (
8 : "bytes"
9 : stdcmp "cmp"
10 : "fmt"
11 : "slices"
12 :
13 : "github.com/cockroachdb/errors"
14 : "github.com/cockroachdb/pebble/internal/base"
15 : )
16 :
17 : // VirtualBackings maintains information about the set of backings that support
18 : // virtual tables in the latest version.
19 : //
20 : // The VirtualBackings set internally maintains for each backing the number of
21 : // virtual tables that use that backing and the sum of their virtual sizes. When
22 : // a backing is added to the set, it initially is not associated with any
23 : // tables. AddTable/RemoveTable are used to maintain the set of tables that are
24 : // associated with a backing. Finally, a backing can only be removed from the
25 : // set when it is no longer in use.
26 : //
27 : // -- Protection API --
28 : //
29 : // VirtualBackings exposes a Protect/Unprotect API. This is used to allow
30 : // external file ingestions to reuse existing virtual backings. Because
31 : // ingestions can run in parallel with other operations like compactions, it is
32 : // possible for a backing to "go away" in-between the time the ingestion decides
33 : // to use it and the time the ingestion installs a new version. The protection
34 : // API solves this problem by keeping backings alive, even if they become
35 : // otherwise unused by any tables.
36 : //
37 : // Backing protection achieves two goals:
38 : // - it must prevent the removal of the backing from the latest version, where
39 : // removal means becoming part of a VersionEdit.RemovedBackingTables. This
40 : // is achieved by treating the backing as "in use", preventing Unused() from
41 : // reporting it.
42 : // - it must prevent the backing from becoming obsolete (i.e. reaching a ref
43 : // count of 0). To achieve this, VirtualBackings takes a ref on each backing
44 : // when it is added; this ref must be released after the backing is removed
45 : // (when it is ok for the backing to be reported as obsolete).
46 : //
47 : // For example, say we have virtual table T1 with backing B1 and an ingestion tries
48 : // to reuse the file. This is what will usually happen (the happy case):
49 : // - latest version is V1 and it contains T1(B1).
50 : // - ingestion request comes for another virtual portion of B1. Ingestion process
51 : // finds B1 and calls Protect(B1).
52 : // - ingestion completes, installs version V2 which has T1(B1) and a new
53 : // T2(B1), and calls Unprotect(B1).
54 : //
55 : // In this path, the Protect/Unprotect calls do nothing. But here is what could
56 : // happen (the corner case):
57 : // - latest version is V1 and it contains T1(B1).
58 : // - ingestion request comes for another virtual portion of B1. Ingestion process
59 : // finds B1 and calls Protect(B1).
60 : // - compaction completes and installs version V2 which no longer has T1.
61 : // But because B1 is protected, V2 still has B1.
62 : // - ingestion completes, installs version V3 which has a new T2(B1) and calls
63 : // Unprotect(B1).
64 : //
65 : // If instead the ingestion fails to complete, the last step becomes:
66 : // - ingestion fails, calls Unprotect(B1). B1 is now Unused() and the next
67 : // version (applied by whatever next operation is) will remove B1.
68 : type VirtualBackings struct {
69 : m map[base.DiskFileNum]backingWithMetadata
70 :
71 : // unused are all the backings in m that are not inUse(). Used for
72 : // implementing Unused() efficiently.
73 : unused map[*FileBacking]struct{}
74 :
75 : totalSize uint64
76 : }
77 :
78 : // MakeVirtualBackings returns empty initialized VirtualBackings.
79 1 : func MakeVirtualBackings() VirtualBackings {
80 1 : return VirtualBackings{
81 1 : m: make(map[base.DiskFileNum]backingWithMetadata),
82 1 : unused: make(map[*FileBacking]struct{}),
83 1 : }
84 1 : }
85 :
86 : type backingWithMetadata struct {
87 : backing *FileBacking
88 :
89 : // A backing initially has a useCount of 0. The useCount is increased by
90 : // AddTable and decreased by RemoveTable. Backings that have useCount=0 are
91 :
92 : useCount int32
93 : // protectionCount is used by Protect to temporarily prevent a backing from
94 : // being reported as unused.
95 : protectionCount int32
96 : // virtualizedSize is the sum of the sizes of the useCount virtual tables
97 : // associated with this backing.
98 : virtualizedSize uint64
99 : }
100 :
101 : // AddAndRef adds a new backing to the set and takes a reference on it. Another
102 : // backing for the same DiskFilNum must not exist.
103 : //
104 : // The added backing is unused until it is associated with a table via AddTable
105 : // or protected via Protect.
106 1 : func (bv *VirtualBackings) AddAndRef(backing *FileBacking) {
107 1 : // We take a reference on the backing because in case of protected backings
108 1 : // (see Protect), we might be the only ones holding on to a backing.
109 1 : backing.Ref()
110 1 : bv.mustAdd(backingWithMetadata{
111 1 : backing: backing,
112 1 : })
113 1 : bv.unused[backing] = struct{}{}
114 1 : bv.totalSize += backing.Size
115 1 : }
116 :
117 : // Remove removes a backing. The backing must not be in use; normally backings
118 : // are removed once they are reported by Unused().
119 : //
120 : // It is up to the caller to release the reference took by AddAndRef.
121 1 : func (bv *VirtualBackings) Remove(n base.DiskFileNum) {
122 1 : v := bv.mustGet(n)
123 1 : if v.inUse() {
124 0 : panic(errors.AssertionFailedf(
125 0 : "backing %s still in use (useCount=%d protectionCount=%d)",
126 0 : v.backing.DiskFileNum, v.useCount, v.protectionCount,
127 0 : ))
128 : }
129 1 : delete(bv.m, n)
130 1 : delete(bv.unused, v.backing)
131 1 : bv.totalSize -= v.backing.Size
132 : }
133 :
134 : // AddTable is used when a new table is using an exiting backing. The backing
135 : // must be in the set already.
136 1 : func (bv *VirtualBackings) AddTable(m *FileMetadata) {
137 1 : if !m.Virtual {
138 0 : panic(errors.AssertionFailedf("table %s not virtual", m.FileNum))
139 : }
140 1 : v := bv.mustGet(m.FileBacking.DiskFileNum)
141 1 : if !v.inUse() {
142 1 : delete(bv.unused, v.backing)
143 1 : }
144 1 : v.useCount++
145 1 : v.virtualizedSize += m.Size
146 1 : bv.m[m.FileBacking.DiskFileNum] = v
147 : }
148 :
149 : // RemoveTable is used when a table using a backing is removed. The backing is
150 : // not removed from the set, even if it becomes unused.
151 1 : func (bv *VirtualBackings) RemoveTable(m *FileMetadata) {
152 1 : if !m.Virtual {
153 0 : panic(errors.AssertionFailedf("table %s not virtual", m.FileNum))
154 : }
155 1 : v := bv.mustGet(m.FileBacking.DiskFileNum)
156 1 :
157 1 : if v.useCount <= 0 {
158 0 : panic(errors.AssertionFailedf("invalid useCount"))
159 : }
160 1 : v.useCount--
161 1 : v.virtualizedSize -= m.Size
162 1 : bv.m[m.FileBacking.DiskFileNum] = v
163 1 : if !v.inUse() {
164 1 : bv.unused[v.backing] = struct{}{}
165 1 : }
166 : }
167 :
168 : // Protect prevents a backing from being reported as unused until a
169 : // corresponding Unprotect call is made. The backing must be in the set.
170 : //
171 : // Multiple Protect calls can be made for the same backing; each must have a
172 : // corresponding Unprotect call before the backing can become unused.
173 1 : func (bv *VirtualBackings) Protect(n base.DiskFileNum) {
174 1 : v := bv.mustGet(n)
175 1 : if !v.inUse() {
176 1 : delete(bv.unused, v.backing)
177 1 : }
178 1 : v.protectionCount++
179 1 : bv.m[n] = v
180 : }
181 :
182 : // Unprotect reverses a Protect call.
183 1 : func (bv *VirtualBackings) Unprotect(n base.DiskFileNum) {
184 1 : v := bv.mustGet(n)
185 1 :
186 1 : if v.protectionCount <= 0 {
187 0 : panic(errors.AssertionFailedf("invalid protectionCount"))
188 : }
189 1 : v.protectionCount--
190 1 : bv.m[n] = v
191 1 : if !v.inUse() {
192 1 : bv.unused[v.backing] = struct{}{}
193 1 : }
194 : }
195 :
196 : // Stats returns the number and total size of all the virtual backings.
197 1 : func (bv *VirtualBackings) Stats() (count int, totalSize uint64) {
198 1 : return len(bv.m), bv.totalSize
199 1 : }
200 :
201 : // Usage returns information about the usage of a backing, specifically:
202 : // - useCount: the number of virtual tables that use this backing;
203 : // - virtualizedSize: the sum of sizes of virtual tables that use the
204 : // backing.
205 : //
206 : // During compaction picking, we compensate a virtual sstable file size by
207 : // (FileBacking.Size - virtualizedSize) / useCount.
208 : // The intuition is that if FileBacking.Size - virtualizedSize is high, then the
209 : // space amplification due to virtual sstables is high, and we should pick the
210 : // virtual sstable with a higher priority.
211 1 : func (bv *VirtualBackings) Usage(n base.DiskFileNum) (useCount int, virtualizedSize uint64) {
212 1 : v := bv.mustGet(n)
213 1 : return int(v.useCount), v.virtualizedSize
214 1 : }
215 :
216 : // Unused returns all backings that are and no longer used by the latest version
217 : // and are not protected, in DiskFileNum order.
218 1 : func (bv *VirtualBackings) Unused() []*FileBacking {
219 1 : res := make([]*FileBacking, 0, len(bv.unused))
220 1 : for b := range bv.unused {
221 1 : res = append(res, b)
222 1 : }
223 1 : slices.SortFunc(res, func(a, b *FileBacking) int {
224 1 : return stdcmp.Compare(a.DiskFileNum, b.DiskFileNum)
225 1 : })
226 1 : return res
227 : }
228 :
229 : // Get returns the backing with the given DiskFileNum, if it is in the set.
230 1 : func (bv *VirtualBackings) Get(n base.DiskFileNum) (_ *FileBacking, ok bool) {
231 1 : v, ok := bv.m[n]
232 1 : if ok {
233 1 : return v.backing, true
234 1 : }
235 1 : return nil, false
236 : }
237 :
238 : // ForEach calls fn on each backing, in unspecified order.
239 1 : func (bv *VirtualBackings) ForEach(fn func(backing *FileBacking)) {
240 1 : for _, v := range bv.m {
241 1 : fn(v.backing)
242 1 : }
243 : }
244 :
245 : // DiskFileNums returns disk file nums of all the backing in the set, in sorted
246 : // order.
247 0 : func (bv *VirtualBackings) DiskFileNums() []base.DiskFileNum {
248 0 : res := make([]base.DiskFileNum, 0, len(bv.m))
249 0 : for n := range bv.m {
250 0 : res = append(res, n)
251 0 : }
252 0 : slices.Sort(res)
253 0 : return res
254 : }
255 :
256 : // Backings returns all backings in the set, in unspecified order.
257 1 : func (bv *VirtualBackings) Backings() []*FileBacking {
258 1 : res := make([]*FileBacking, 0, len(bv.m))
259 1 : for _, v := range bv.m {
260 1 : res = append(res, v.backing)
261 1 : }
262 1 : return res
263 : }
264 :
265 0 : func (bv *VirtualBackings) String() string {
266 0 : nums := bv.DiskFileNums()
267 0 :
268 0 : var buf bytes.Buffer
269 0 : count, totalSize := bv.Stats()
270 0 : if count == 0 {
271 0 : fmt.Fprintf(&buf, "no virtual backings\n")
272 0 : } else {
273 0 : fmt.Fprintf(&buf, "%d virtual backings, total size %d:\n", count, totalSize)
274 0 : for _, n := range nums {
275 0 : v := bv.m[n]
276 0 : fmt.Fprintf(&buf, " %s: size=%d useCount=%d protectionCount=%d virtualizedSize=%d\n",
277 0 : n, v.backing.Size, v.useCount, v.protectionCount, v.virtualizedSize)
278 0 : }
279 : }
280 0 : unused := bv.Unused()
281 0 : if len(unused) > 0 {
282 0 : fmt.Fprintf(&buf, "unused virtual backings:")
283 0 : for _, b := range unused {
284 0 : fmt.Fprintf(&buf, " %s", b.DiskFileNum)
285 0 : }
286 0 : fmt.Fprintf(&buf, "\n")
287 : }
288 0 : return buf.String()
289 : }
290 :
291 1 : func (bv *VirtualBackings) mustAdd(v backingWithMetadata) {
292 1 : _, ok := bv.m[v.backing.DiskFileNum]
293 1 : if ok {
294 0 : panic("pebble: trying to add an existing file backing")
295 : }
296 1 : bv.m[v.backing.DiskFileNum] = v
297 : }
298 :
299 1 : func (bv *VirtualBackings) mustGet(n base.DiskFileNum) backingWithMetadata {
300 1 : v, ok := bv.m[n]
301 1 : if !ok {
302 0 : panic(fmt.Sprintf("unknown backing %s", n))
303 : }
304 1 : return v
305 : }
306 :
307 : // inUse returns true if b is used to back at least one virtual table.
308 1 : func (v *backingWithMetadata) inUse() bool {
309 1 : return v.useCount > 0 || v.protectionCount > 0
310 1 : }
|