Line data Source code
1 : // Copyright 2025 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package pebble
6 :
7 : import (
8 : "github.com/cockroachdb/errors"
9 : "github.com/cockroachdb/pebble/internal/base"
10 : "github.com/cockroachdb/pebble/internal/manifest"
11 : )
12 :
13 : // EstimateDiskUsage returns the estimated filesystem space used in bytes for
14 : // storing the range `[start, end]`. The estimation is computed as follows:
15 : //
16 : // - For sstables fully contained in the range the whole file size is included.
17 : // - For sstables partially contained in the range the overlapping data block sizes
18 : // are included. Even if a data block partially overlaps, or we cannot determine
19 : // overlap due to abbreviated index keys, the full data block size is included in
20 : // the estimation. Note that unlike fully contained sstables, none of the
21 : // meta-block space is counted for partially overlapped files.
22 : // - For virtual sstables, we use the overlap between start, end and the virtual
23 : // sstable bounds to determine disk usage.
24 : // - There may also exist WAL entries for unflushed keys in this range. This
25 : // estimation currently excludes space used for the range in the WAL.
26 1 : func (d *DB) EstimateDiskUsage(start, end []byte) (uint64, error) {
27 1 : bytes, _, _, err := d.EstimateDiskUsageByBackingType(start, end)
28 1 : return bytes, err
29 1 : }
30 :
31 : // EstimateDiskUsageByBackingType is like EstimateDiskUsage but additionally
32 : // returns the subsets of that size in remote and external files.
33 : func (d *DB) EstimateDiskUsageByBackingType(
34 : start, end []byte,
35 1 : ) (totalSize, remoteSize, externalSize uint64, _ error) {
36 1 : if err := d.closed.Load(); err != nil {
37 0 : panic(err)
38 : }
39 :
40 1 : bounds := base.UserKeyBoundsInclusive(start, end)
41 1 : if !bounds.Valid(d.cmp) {
42 0 : return 0, 0, 0, errors.New("invalid key-range specified (start > end)")
43 0 : }
44 :
45 : // Grab and reference the current readState. This prevents the underlying
46 : // files in the associated version from being deleted if there is a concurrent
47 : // compaction.
48 1 : readState := d.loadReadState()
49 1 : defer readState.unref()
50 1 :
51 1 : sizes := d.fileSizeAnnotator.VersionRangeAnnotation(readState.current, bounds)
52 1 : return sizes.totalSize, sizes.remoteSize, sizes.externalSize, nil
53 : }
54 :
55 : // fileSizeByBacking contains the estimated file size for LSM data within some
56 : // bounds. It is broken down by backing type. The file size refers to both the
57 : // sstable size and an estimate of the referenced blob sizes.
58 : type fileSizeByBacking struct {
59 : // totalSize is the estimated size of all files for the given bounds.
60 : totalSize uint64
61 : // remoteSize is the estimated size of remote files for the given bounds.
62 : remoteSize uint64
63 : // externalSize is the estimated size of external files for the given bounds.
64 : externalSize uint64
65 : }
66 :
67 : func (d *DB) singleFileSizeByBacking(
68 : fileSize uint64, t *manifest.TableMetadata,
69 1 : ) (_ fileSizeByBacking, ok bool) {
70 1 : res := fileSizeByBacking{
71 1 : totalSize: fileSize,
72 1 : }
73 1 :
74 1 : objMeta, err := d.objProvider.Lookup(base.FileTypeTable, t.TableBacking.DiskFileNum)
75 1 : if err != nil {
76 0 : return res, false
77 0 : }
78 1 : if objMeta.IsRemote() {
79 1 : res.remoteSize += fileSize
80 1 : if objMeta.IsExternal() {
81 1 : res.externalSize += fileSize
82 1 : }
83 : }
84 1 : return res, true
85 : }
86 :
87 : var fileSizeAnnotatorIdx = manifest.NewTableAnnotationIdx()
88 :
89 : // makeFileSizeAnnotator returns an annotator that computes the storage size of
90 : // files. When applicable, this includes both the sstable size and the size of
91 : // any referenced blob files.
92 1 : func (d *DB) makeFileSizeAnnotator() manifest.TableAnnotator[fileSizeByBacking] {
93 1 : return manifest.MakeTableAnnotator[fileSizeByBacking](
94 1 : fileSizeAnnotatorIdx,
95 1 : manifest.TableAnnotatorFuncs[fileSizeByBacking]{
96 1 : Merge: func(dst *fileSizeByBacking, src fileSizeByBacking) {
97 1 : dst.totalSize += src.totalSize
98 1 : dst.remoteSize += src.remoteSize
99 1 : dst.externalSize += src.externalSize
100 1 : },
101 1 : Table: func(f *manifest.TableMetadata) (v fileSizeByBacking, cacheOK bool) {
102 1 : return d.singleFileSizeByBacking(f.Size+f.EstimatedReferenceSize(), f)
103 1 : },
104 1 : PartialOverlap: func(f *manifest.TableMetadata, bounds base.UserKeyBounds) fileSizeByBacking {
105 1 : overlappingFileSize, err := d.fileCache.estimateSize(f, bounds.Start, bounds.End.Key)
106 1 : if err != nil {
107 0 : return fileSizeByBacking{}
108 0 : }
109 1 : overlapFraction := float64(overlappingFileSize) / float64(f.Size)
110 1 : // Scale the blob reference size proportionally to the file
111 1 : // overlap from the bounds to approximate only the blob
112 1 : // references that overlap with the requested bounds.
113 1 : size := overlappingFileSize + uint64(float64(f.EstimatedReferenceSize())*overlapFraction)
114 1 : res, _ := d.singleFileSizeByBacking(size, f)
115 1 : return res
116 : },
117 : })
118 : }
|