Line data Source code
1 : // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package pebble
6 :
7 : import (
8 : "fmt"
9 : "math"
10 : "time"
11 : "unsafe"
12 :
13 : "github.com/cockroachdb/pebble/internal/base"
14 : "github.com/cockroachdb/pebble/internal/cache"
15 : "github.com/cockroachdb/pebble/internal/humanize"
16 : "github.com/cockroachdb/pebble/internal/manifest"
17 : "github.com/cockroachdb/pebble/internal/manual"
18 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider/sharedcache"
19 : "github.com/cockroachdb/pebble/record"
20 : "github.com/cockroachdb/pebble/sstable"
21 : "github.com/cockroachdb/pebble/sstable/blob"
22 : "github.com/cockroachdb/pebble/sstable/block"
23 : "github.com/cockroachdb/pebble/wal"
24 : "github.com/cockroachdb/redact"
25 : "github.com/prometheus/client_golang/prometheus"
26 : )
27 :
28 : // CacheMetrics holds metrics for the block and file cache.
29 : type CacheMetrics = cache.Metrics
30 :
31 : // FilterMetrics holds metrics for the filter policy
32 : type FilterMetrics = sstable.FilterMetrics
33 :
34 : // ThroughputMetric is a cumulative throughput metric. See the detailed
35 : // comment in base.
36 : type ThroughputMetric = base.ThroughputMetric
37 :
38 : // SecondaryCacheMetrics holds metrics for the persistent secondary cache
39 : // that caches commonly accessed blocks from blob storage on a local
40 : // file system.
41 : type SecondaryCacheMetrics = sharedcache.Metrics
42 :
43 : // LevelMetrics holds per-level metrics such as the number of files and total
44 : // size of the files, and compaction related metrics.
45 : type LevelMetrics struct {
46 : // The number of sublevels within the level. The sublevel count corresponds
47 : // to the read amplification for the level. An empty level will have a
48 : // sublevel count of 0, implying no read amplification. Only L0 will have
49 : // a sublevel count other than 0 or 1.
50 : Sublevels int32
51 : // The total count of sstables in the level.
52 : TablesCount int64
53 : // The total size in bytes of the sstables in the level. Note that if tables
54 : // contain references to blob files, this quantity does not include the the
55 : // size of the blob files or the referenced values.
56 : TablesSize int64
57 : // The total number of virtual sstables in the level.
58 : VirtualTablesCount uint64
59 : // The total size of the virtual sstables in the level.
60 : VirtualTablesSize uint64
61 : // The estimated total physical size of all blob references across all
62 : // sstables in the level. The physical size is estimated based on the size
63 : // of referenced values and the values' blob file's compression ratios.
64 : EstimatedReferencesSize uint64
65 : // The level's compaction score, used to rank levels (0 if the level doesn't
66 : // need compaction). See candidateLevelInfo.
67 : Score float64
68 : // The level's fill factor (the ratio between the size of the level and the
69 : // ideal size). See candidateLevelInfo.
70 : FillFactor float64
71 : // The level's compensated fill factor. See candidateLevelInfo.
72 : CompensatedFillFactor float64
73 : // The number of incoming bytes from other levels' sstables read during
74 : // compactions. This excludes bytes moved and bytes ingested. For L0 this is
75 : // the bytes written to the WAL.
76 : TableBytesIn uint64
77 : // The number of sstable bytes ingested. The sibling metric for tables is
78 : // TablesIngested.
79 : TableBytesIngested uint64
80 : // The number of sstable bytes moved into the level by a "move" compaction.
81 : // The sibling metric for tables is TablesMoved.
82 : TableBytesMoved uint64
83 : // The number of bytes read for compactions at the level. This includes bytes
84 : // read from other levels (BytesIn), as well as bytes read for the level.
85 : TableBytesRead uint64
86 : // The number of bytes written to sstables during compactions. The sibling
87 : // metric for tables is TablesCompacted. This metric may be summed with
88 : // BytesFlushed to compute the total bytes written for the level.
89 : TableBytesCompacted uint64
90 : // The number of bytes written to sstables during flushes. The sibling
91 : // metrics for tables is TablesFlushed. This metric is always zero for all
92 : // levels other than L0.
93 : TableBytesFlushed uint64
94 : // The number of sstables compacted to this level.
95 : TablesCompacted uint64
96 : // The number of sstables flushed to this level.
97 : TablesFlushed uint64
98 : // The number of sstables ingested into the level.
99 : TablesIngested uint64
100 : // The number of sstables moved to this level by a "move" compaction.
101 : TablesMoved uint64
102 : // The number of sstables deleted in a level by a delete-only compaction.
103 : TablesDeleted uint64
104 : // The number of sstables excised in a level by a delete-only compaction.
105 : TablesExcised uint64
106 : // BlobBytesReadEstimate is an estimate of the physical bytes corresponding
107 : // to values referenced by sstables that were inputs into compactions
108 : // outputting into this level.
109 : BlobBytesReadEstimate uint64
110 : // BlobBytesCompacted is the number of bytes written to blob files while
111 : // compacting sstables in this level.
112 : BlobBytesCompacted uint64
113 : // BlobBytesFlushed is the number of bytes written to blob files while
114 : // flushing sstables. This metric is always zero for all levels other than
115 : // L0.
116 : BlobBytesFlushed uint64
117 :
118 : MultiLevel struct {
119 : // TableBytesInTop are the total bytes in a multilevel compaction coming
120 : // from the top level.
121 : TableBytesInTop uint64
122 :
123 : // TableBytesIn, exclusively for multiLevel compactions.
124 : TableBytesIn uint64
125 :
126 : // TableBytesRead, exclusively for multilevel compactions.
127 : TableBytesRead uint64
128 : }
129 :
130 : // Additional contains misc additional metrics that are not always printed.
131 : Additional struct {
132 : // The sum of Properties.ValueBlocksSize for all the sstables in this
133 : // level. Printed by LevelMetrics.format iff there is at least one level
134 : // with a non-zero value.
135 : ValueBlocksSize uint64
136 : // Cumulative metrics about bytes written to data blocks and value blocks,
137 : // via compactions (except move compactions) or flushes. Not printed by
138 : // LevelMetrics.format, but are available to sophisticated clients.
139 : BytesWrittenDataBlocks uint64
140 : BytesWrittenValueBlocks uint64
141 : }
142 : }
143 :
144 : // AggregateSize returns an estimated physical size of the level's sstables and
145 : // their referenced values stored in blob files. The size of physical sstables
146 : // is exactly known. Virtual sstables' sizes are estimated, and the size of
147 : // values stored in blob files is estimated based on the volume of referenced
148 : // data and the blob file's compression ratio.
149 1 : func (m *LevelMetrics) AggregateSize() int64 {
150 1 : return m.TablesSize + int64(m.EstimatedReferencesSize)
151 1 : }
152 :
153 : // Add updates the counter metrics for the level.
154 1 : func (m *LevelMetrics) Add(u *LevelMetrics) {
155 1 : m.TablesCount += u.TablesCount
156 1 : m.TablesSize += u.TablesSize
157 1 : m.VirtualTablesCount += u.VirtualTablesCount
158 1 : m.VirtualTablesSize += u.VirtualTablesSize
159 1 : m.EstimatedReferencesSize += u.EstimatedReferencesSize
160 1 : m.TableBytesIn += u.TableBytesIn
161 1 : m.TableBytesIngested += u.TableBytesIngested
162 1 : m.TableBytesMoved += u.TableBytesMoved
163 1 : m.TableBytesRead += u.TableBytesRead
164 1 : m.TableBytesCompacted += u.TableBytesCompacted
165 1 : m.TableBytesFlushed += u.TableBytesFlushed
166 1 : m.TablesCompacted += u.TablesCompacted
167 1 : m.TablesFlushed += u.TablesFlushed
168 1 : m.TablesIngested += u.TablesIngested
169 1 : m.TablesMoved += u.TablesMoved
170 1 : m.BlobBytesCompacted += u.BlobBytesCompacted
171 1 : m.BlobBytesFlushed += u.BlobBytesFlushed
172 1 : m.BlobBytesReadEstimate += u.BlobBytesReadEstimate
173 1 : m.MultiLevel.TableBytesInTop += u.MultiLevel.TableBytesInTop
174 1 : m.MultiLevel.TableBytesRead += u.MultiLevel.TableBytesRead
175 1 : m.MultiLevel.TableBytesIn += u.MultiLevel.TableBytesIn
176 1 : m.Additional.BytesWrittenDataBlocks += u.Additional.BytesWrittenDataBlocks
177 1 : m.Additional.BytesWrittenValueBlocks += u.Additional.BytesWrittenValueBlocks
178 1 : m.Additional.ValueBlocksSize += u.Additional.ValueBlocksSize
179 1 : }
180 :
181 : // WriteAmp computes the write amplification for compactions at this
182 : // level.
183 : //
184 : // The write amplification is computed as the quantity of physical bytes written
185 : // divided by the quantity of logical bytes written.
186 : //
187 : // Concretely, it's computed as:
188 : //
189 : // TableBytesFlushed + TableBytesCompacted + BlobBytesFlushed + BlobBytesCompacted
190 : // -------------------------------------------------------------------------------
191 : // TableBytesIn
192 0 : func (m *LevelMetrics) WriteAmp() float64 {
193 0 : if m.TableBytesIn == 0 {
194 0 : return 0
195 0 : }
196 0 : return float64(m.TableBytesFlushed+m.TableBytesCompacted+m.BlobBytesFlushed+m.BlobBytesCompacted) /
197 0 : float64(m.TableBytesIn)
198 : }
199 :
200 : var categoryCompaction = block.RegisterCategory("pebble-compaction", block.NonLatencySensitiveQoSLevel)
201 : var categoryIngest = block.RegisterCategory("pebble-ingest", block.LatencySensitiveQoSLevel)
202 : var categoryGet = block.RegisterCategory("pebble-get", block.LatencySensitiveQoSLevel)
203 :
204 : // Metrics holds metrics for various subsystems of the DB such as the Cache,
205 : // Compactions, WAL, and per-Level metrics.
206 : //
207 : // TODO(peter): The testing of these metrics is relatively weak. There should
208 : // be testing that performs various operations on a DB and verifies that the
209 : // metrics reflect those operations.
210 : type Metrics struct {
211 : BlockCache CacheMetrics
212 :
213 : Compact struct {
214 : // The total number of compactions, and per-compaction type counts.
215 : Count int64
216 : DefaultCount int64
217 : DeleteOnlyCount int64
218 : ElisionOnlyCount int64
219 : CopyCount int64
220 : MoveCount int64
221 : ReadCount int64
222 : TombstoneDensityCount int64
223 : RewriteCount int64
224 : MultiLevelCount int64
225 : BlobFileRewriteCount int64
226 : CounterLevelCount int64
227 : // An estimate of the number of bytes that need to be compacted for the LSM
228 : // to reach a stable state.
229 : EstimatedDebt uint64
230 : // Number of bytes present in sstables being written by in-progress
231 : // compactions. This value will be zero if there are no in-progress
232 : // compactions.
233 : InProgressBytes int64
234 : // Number of compactions that are in-progress.
235 : NumInProgress int64
236 : // Number of compactions that were cancelled.
237 : CancelledCount int64
238 : // CancelledBytes the number of bytes written by compactions that were
239 : // cancelled.
240 : CancelledBytes int64
241 : // Total number of compactions that hit an error.
242 : FailedCount int64
243 : // NumProblemSpans is the current (instantaneous) count of "problem spans"
244 : // which temporarily block compactions.
245 : NumProblemSpans int
246 : // MarkedFiles is a count of files that are marked for
247 : // compaction. Such files are compacted in a rewrite compaction
248 : // when no other compactions are picked.
249 : MarkedFiles int
250 : // Duration records the cumulative duration of all compactions since the
251 : // database was opened.
252 : Duration time.Duration
253 : }
254 :
255 : Ingest struct {
256 : // The total number of ingestions
257 : Count uint64
258 : }
259 :
260 : Flush struct {
261 : // The total number of flushes.
262 : Count int64
263 : // TODO(sumeer): the IdleDuration in this metric is flawed. It only
264 : // measures idle duration when a flush finishes, representing the idleness
265 : // before the start of a flush. So computing deltas over this metric over
266 : // some time interval D may observe the sum of IdleDuration+WorkDuration
267 : // to be either much smaller or much larger than D.
268 : WriteThroughput ThroughputMetric
269 : // Number of flushes that are in-progress. In the current implementation
270 : // this will always be zero or one.
271 : NumInProgress int64
272 : // AsIngestCount is a monotonically increasing counter of flush operations
273 : // handling ingested tables.
274 : AsIngestCount uint64
275 : // AsIngestCount is a monotonically increasing counter of tables ingested as
276 : // flushables.
277 : AsIngestTableCount uint64
278 : // AsIngestBytes is a monotonically increasing counter of the bytes flushed
279 : // for flushables that originated as ingestion operations.
280 : AsIngestBytes uint64
281 : }
282 :
283 : Filter FilterMetrics
284 :
285 : Levels [numLevels]LevelMetrics
286 :
287 : MemTable struct {
288 : // The number of bytes allocated by memtables and large (flushable)
289 : // batches.
290 : Size uint64
291 : // The count of memtables.
292 : Count int64
293 : // The number of bytes present in zombie memtables which are no longer
294 : // referenced by the current DB state. An unbounded number of memtables
295 : // may be zombie if they're still in use by an iterator. One additional
296 : // memtable may be zombie if it's no longer in use and waiting to be
297 : // recycled.
298 : ZombieSize uint64
299 : // The count of zombie memtables.
300 : ZombieCount int64
301 : }
302 :
303 : Keys struct {
304 : // The approximate count of internal range key set keys in the database.
305 : RangeKeySetsCount uint64
306 : // The approximate count of internal tombstones (DEL, SINGLEDEL and
307 : // RANGEDEL key kinds) within the database.
308 : TombstoneCount uint64
309 : // A cumulative total number of missized DELSIZED keys encountered by
310 : // compactions since the database was opened.
311 : MissizedTombstonesCount uint64
312 : }
313 :
314 : Snapshots struct {
315 : // The number of currently open snapshots.
316 : Count int
317 : // The sequence number of the earliest, currently open snapshot.
318 : EarliestSeqNum base.SeqNum
319 : // A running tally of keys written to sstables during flushes or
320 : // compactions that would've been elided if it weren't for open
321 : // snapshots.
322 : PinnedKeys uint64
323 : // A running cumulative sum of the size of keys and values written to
324 : // sstables during flushes or compactions that would've been elided if
325 : // it weren't for open snapshots.
326 : PinnedSize uint64
327 : }
328 :
329 : Table struct {
330 : // The number of bytes present in obsolete tables which are no longer
331 : // referenced by the current DB state or any open iterators.
332 : ObsoleteSize uint64
333 : // The count of obsolete tables.
334 : ObsoleteCount int64
335 : // The number of bytes present in zombie tables which are no longer
336 : // referenced by the current DB state but are still in use by an iterator.
337 : ZombieSize uint64
338 : // The count of zombie tables.
339 : ZombieCount int64
340 : // The count of sstables backing virtual tables.
341 : BackingTableCount uint64
342 : // The sum of the sizes of the BackingTableCount sstables that are backing virtual tables.
343 : BackingTableSize uint64
344 : // The number of sstables that are compressed with an unknown compression
345 : // algorithm.
346 : CompressedCountUnknown int64
347 : // The number of sstables that are compressed with the default compression
348 : // algorithm, snappy.
349 : CompressedCountSnappy int64
350 : // The number of sstables that are compressed with zstd.
351 : CompressedCountZstd int64
352 : // The number of sstables that are compressed with minlz.
353 : CompressedCountMinLZ int64
354 : // The number of sstables that are uncompressed.
355 : CompressedCountNone int64
356 :
357 : // Local file sizes.
358 : Local struct {
359 : // LiveSize is the number of bytes in live tables.
360 : LiveSize uint64
361 : // LiveCount is the number of live tables.
362 : LiveCount uint64
363 : // ObsoleteSize is the number of bytes in obsolete tables.
364 : ObsoleteSize uint64
365 : // ObsoleteCount is the number of obsolete tables.
366 : ObsoleteCount uint64
367 : // ZombieSize is the number of bytes in zombie tables.
368 : ZombieSize uint64
369 : // ZombieCount is the number of zombie tables.
370 : ZombieCount uint64
371 : }
372 :
373 : // Garbage bytes.
374 : Garbage struct {
375 : // PointDeletionsBytesEstimate is the estimated file bytes that will be
376 : // saved by compacting all point deletions. This is dependent on table
377 : // stats collection, so can be very incomplete until
378 : // InitialStatsCollectionComplete becomes true.
379 : PointDeletionsBytesEstimate uint64
380 : // RangeDeletionsBytesEstimate is the estimated file bytes that will be
381 : // saved by compacting all range deletions. This is dependent on table
382 : // stats collection, so can be very incomplete until
383 : // InitialStatsCollectionComplete becomes true.
384 : RangeDeletionsBytesEstimate uint64
385 : }
386 :
387 : // Whether the initial stats collection (for existing tables on Open) is
388 : // complete.
389 : InitialStatsCollectionComplete bool
390 : // The count of recently created sstables that need stats collection. This
391 : // does not include sstables that existed when the DB was opened, so the
392 : // value is only useful when InitialStatsCollectionComplete is true.
393 : PendingStatsCollectionCount int64
394 : }
395 :
396 : BlobFiles struct {
397 : // The count of all live blob files.
398 : LiveCount uint64
399 : // The physical file size of all live blob files.
400 : LiveSize uint64
401 : // ValueSize is the sum of the length of the uncompressed values in all
402 : // live (referenced by some sstable(s) within the current version) blob
403 : // files. ValueSize may be greater than LiveSize when compression is
404 : // effective. ValueSize includes bytes in live blob files that are not
405 : // actually reachable by any sstable key. If any value within the blob
406 : // file is reachable by a key in a live sstable, then the entirety of
407 : // the blob file's values are included within ValueSize.
408 : ValueSize uint64
409 : // ReferencedValueSize is the sum of the length of the uncompressed
410 : // values (in all live blob files) that are still referenced by keys
411 : // within live tables. Over the lifetime of a blob file, a blob file's
412 : // references are removed as some compactions choose to write new blob
413 : // files containing the same values or keys referencing the file's
414 : // values are deleted. ReferencedValueSize accounts the volume of bytes
415 : // that are actually reachable by some key in a live table.
416 : //
417 : // The difference between ValueSize and ReferencedValueSize is
418 : // (uncompressed) space amplification that could be reclaimed if all
419 : // blob files were rewritten, discarding values that are no longer
420 : // referenced by any keys in any sstables within the current version.
421 : ReferencedValueSize uint64
422 : // The count of all obsolete blob files.
423 : ObsoleteCount uint64
424 : // The physical size of all obsolete blob files.
425 : ObsoleteSize uint64
426 : // The count of all zombie blob files.
427 : ZombieCount uint64
428 : // The physical size of all zombie blob files.
429 : ZombieSize uint64
430 : // Local file sizes.
431 : Local struct {
432 : // LiveSize is the physical size of local live blob files.
433 : LiveSize uint64
434 : // LiveCount is the number of local live blob files.
435 : LiveCount uint64
436 : // ObsoleteSize is the physical size of local obsolete blob files.
437 : ObsoleteSize uint64
438 : // ObsoleteCount is the number of local obsolete blob files.
439 : ObsoleteCount uint64
440 : // ZombieSize is the physical size of local zombie blob files.
441 : ZombieSize uint64
442 : // ZombieCount is the number of local zombie blob files.
443 : ZombieCount uint64
444 : }
445 : }
446 :
447 : FileCache FileCacheMetrics
448 :
449 : // Count of the number of open sstable iterators.
450 : TableIters int64
451 : // Uptime is the total time since this DB was opened.
452 : Uptime time.Duration
453 :
454 : WAL struct {
455 : // Number of live WAL files.
456 : Files int64
457 : // Number of obsolete WAL files.
458 : ObsoleteFiles int64
459 : // Physical size of the obsolete WAL files.
460 : ObsoletePhysicalSize uint64
461 : // Size of the live data in the WAL files. Note that with WAL file
462 : // recycling this is less than the actual on-disk size of the WAL files.
463 : Size uint64
464 : // Physical size of the WAL files on-disk. With WAL file recycling,
465 : // this is greater than the live data in WAL files.
466 : //
467 : // TODO(sumeer): it seems this does not include ObsoletePhysicalSize.
468 : // Should the comment be updated?
469 : PhysicalSize uint64
470 : // Number of logical bytes written to the WAL.
471 : BytesIn uint64
472 : // Number of bytes written to the WAL.
473 : BytesWritten uint64
474 : // Failover contains failover stats. Empty if failover is not enabled.
475 : Failover wal.FailoverStats
476 : }
477 :
478 : LogWriter struct {
479 : FsyncLatency prometheus.Histogram
480 : record.LogWriterMetrics
481 : }
482 :
483 : CategoryStats []block.CategoryStatsAggregate
484 :
485 : SecondaryCacheMetrics SecondaryCacheMetrics
486 :
487 : private struct {
488 : optionsFileSize uint64
489 : manifestFileSize uint64
490 : }
491 :
492 : manualMemory manual.Metrics
493 : }
494 :
495 : var (
496 : // FsyncLatencyBuckets are prometheus histogram buckets suitable for a histogram
497 : // that records latencies for fsyncs.
498 : FsyncLatencyBuckets = append(
499 : prometheus.LinearBuckets(0.0, float64(time.Microsecond*100), 50),
500 : prometheus.ExponentialBucketsRange(float64(time.Millisecond*5), float64(10*time.Second), 50)...,
501 : )
502 :
503 : // SecondaryCacheIOBuckets exported to enable exporting from package pebble to
504 : // enable exporting metrics with below buckets in CRDB.
505 : SecondaryCacheIOBuckets = sharedcache.IOBuckets
506 : // SecondaryCacheChannelWriteBuckets exported to enable exporting from package
507 : // pebble to enable exporting metrics with below buckets in CRDB.
508 : SecondaryCacheChannelWriteBuckets = sharedcache.ChannelWriteBuckets
509 : )
510 :
511 : // DiskSpaceUsage returns the total disk space used by the database in bytes,
512 : // including live and obsolete files. This only includes local files, i.e.,
513 : // remote files (as known to objstorage.Provider) are not included.
514 0 : func (m *Metrics) DiskSpaceUsage() uint64 {
515 0 : var usageBytes uint64
516 0 : usageBytes += m.WAL.PhysicalSize
517 0 : usageBytes += m.WAL.ObsoletePhysicalSize
518 0 : usageBytes += m.Table.Local.LiveSize
519 0 : usageBytes += m.Table.Local.ObsoleteSize
520 0 : usageBytes += m.Table.Local.ZombieSize
521 0 : usageBytes += m.BlobFiles.Local.LiveSize
522 0 : usageBytes += m.BlobFiles.Local.ObsoleteSize
523 0 : usageBytes += m.BlobFiles.Local.ZombieSize
524 0 : usageBytes += m.private.optionsFileSize
525 0 : usageBytes += m.private.manifestFileSize
526 0 : // TODO(sumeer): InProgressBytes does not distinguish between local and
527 0 : // remote files. This causes a small error. Fix.
528 0 : usageBytes += uint64(m.Compact.InProgressBytes)
529 0 : return usageBytes
530 0 : }
531 :
532 : // NumVirtual is the number of virtual sstables in the latest version
533 : // summed over every level in the lsm.
534 0 : func (m *Metrics) NumVirtual() uint64 {
535 0 : var n uint64
536 0 : for _, level := range m.Levels {
537 0 : n += level.VirtualTablesCount
538 0 : }
539 0 : return n
540 : }
541 :
542 : // VirtualSize is the sum of the sizes of the virtual sstables in the
543 : // latest version. BackingTableSize - VirtualSize gives an estimate for
544 : // the space amplification caused by not compacting virtual sstables.
545 0 : func (m *Metrics) VirtualSize() uint64 {
546 0 : var size uint64
547 0 : for _, level := range m.Levels {
548 0 : size += level.VirtualTablesSize
549 0 : }
550 0 : return size
551 : }
552 :
553 : // ReadAmp returns the current read amplification of the database.
554 : // It's computed as the number of sublevels in L0 + the number of non-empty
555 : // levels below L0.
556 0 : func (m *Metrics) ReadAmp() int {
557 0 : var ramp int32
558 0 : for _, l := range m.Levels {
559 0 : ramp += l.Sublevels
560 0 : }
561 0 : return int(ramp)
562 : }
563 :
564 : // Total returns the sum of the per-level metrics and WAL metrics.
565 1 : func (m *Metrics) Total() LevelMetrics {
566 1 : var total LevelMetrics
567 1 : for level := 0; level < numLevels; level++ {
568 1 : l := &m.Levels[level]
569 1 : total.Add(l)
570 1 : total.Sublevels += l.Sublevels
571 1 : }
572 : // Compute total bytes-in as the bytes written to the WAL + bytes ingested.
573 1 : total.TableBytesIn = m.WAL.BytesWritten + total.TableBytesIngested
574 1 : // Add the total bytes-in to the total bytes-flushed. This is to account for
575 1 : // the bytes written to the log and bytes written externally and then
576 1 : // ingested.
577 1 : total.TableBytesFlushed += total.TableBytesIn
578 1 : return total
579 : }
580 :
581 : // RemoteTablesTotal returns the total number of remote tables and their total
582 : // size. Remote tables are computed as the difference between total tables
583 : // (live + obsolete + zombie) and local tables.
584 0 : func (m *Metrics) RemoteTablesTotal() (count uint64, size uint64) {
585 0 : var liveTables, liveTableBytes int64
586 0 : for level := 0; level < numLevels; level++ {
587 0 : liveTables += m.Levels[level].TablesCount
588 0 : liveTableBytes += m.Levels[level].TablesSize
589 0 : }
590 0 : totalCount := liveTables + m.Table.ObsoleteCount + m.Table.ZombieCount
591 0 : localCount := m.Table.Local.LiveCount + m.Table.Local.ObsoleteCount + m.Table.Local.ZombieCount
592 0 : remoteCount := uint64(totalCount) - localCount
593 0 :
594 0 : totalSize := uint64(liveTableBytes) + m.Table.ObsoleteSize + m.Table.ZombieSize
595 0 : localSize := m.Table.Local.LiveSize + m.Table.Local.ObsoleteSize + m.Table.Local.ZombieSize
596 0 : remoteSize := totalSize - localSize
597 0 :
598 0 : return remoteCount, remoteSize
599 : }
600 :
601 : // String pretty-prints the metrics as below:
602 : //
603 : // | | | | ingested | moved | written | | amp | multilevel
604 : // level | tables size val-bl vtables | score uc c | in | tables size | tables size | tables size | read | r w | top in read
605 : // ------+-----------------------------+----------------+-------+--------------+--------------+--------------+-------+----------+------------------
606 : // 0 | 101 102B 0B 101 | 1.10 2.10 0.30 | 104B | 112 104B | 113 106B | 221 217B | 107B | 1 2.09 | 104B 104B 104B
607 : // 1 | 201 202B 0B 201 | 1.20 2.20 0.60 | 204B | 212 204B | 213 206B | 421 417B | 207B | 2 2.04 | 204B 204B 204B
608 : // 2 | 301 302B 0B 301 | 1.30 2.30 0.90 | 304B | 312 304B | 313 306B | 621 617B | 307B | 3 2.03 | 304B 304B 304B
609 : // 3 | 401 402B 0B 401 | 1.40 2.40 1.20 | 404B | 412 404B | 413 406B | 821 817B | 407B | 4 2.02 | 404B 404B 404B
610 : // 4 | 501 502B 0B 501 | 1.50 2.50 1.50 | 504B | 512 504B | 513 506B | 1.0K 1017B | 507B | 5 2.02 | 504B 504B 504B
611 : // 5 | 601 602B 0B 601 | 1.60 2.60 1.80 | 604B | 612 604B | 613 606B | 1.2K 1.2KB | 607B | 6 2.01 | 604B 604B 604B
612 : // 6 | 701 702B 0B 701 | - 2.70 2.10 | 704B | 712 704B | 713 706B | 1.4K 1.4KB | 707B | 7 2.01 | 704B 704B 704B
613 : // total | 2.8K 2.7KB 0B 2.8K | - - - | 2.8KB | 2.9K 2.8KB | 2.9K 2.8KB | 5.7K 8.4KB | 2.8KB | 28 3.00 | 2.8KB 2.8KB 2.8KB
614 : // ------------------------------------------------------------------------------------------------------------------------------------------------
615 : // WAL: 22 files (24B) in: 25B written: 26B (4% overhead)
616 : // Flushes: 8
617 : // Compactions: 5 estimated debt: 6B in progress: 2 (7B)
618 : // default: 27 delete: 28 elision: 29 move: 30 read: 31 tombstone-density: 16 rewrite: 32 copy: 33 multi-level: 34
619 : // MemTables: 12 (11B) zombie: 14 (13B)
620 : // Zombie tables: 16 (15B, local: 30B)
621 : // Backing tables: 1 (2.0MB)
622 : // Virtual tables: 2807 (2.8KB)
623 : // Local tables size: 28B
624 : // Compression types:
625 : // Table stats: 31
626 : // Block cache: 2 entries (1B) hit rate: 42.9%
627 : // Table cache: 18 entries (17B) hit rate: 48.7%
628 : // Range key sets: 123 Tombstones: 456 Total missized tombstones encountered: 789
629 : // Snapshots: 4 earliest seq num: 1024
630 : // Table iters: 21
631 : // Filter utility: 47.4%
632 : // Ingestions: 27 as flushable: 36 (34B in 35 tables)
633 : // Cgo memory usage: 15KB block cache: 9.0KB (data: 4.0KB, maps: 2.0KB, entries: 3.0KB) memtables: 5.0KB
634 0 : func (m *Metrics) String() string {
635 0 : return redact.StringWithoutMarkers(m)
636 0 : }
637 :
638 : var _ redact.SafeFormatter = &Metrics{}
639 :
640 : // SafeFormat implements redact.SafeFormatter.
641 0 : func (m *Metrics) SafeFormat(w redact.SafePrinter, _ rune) {
642 0 : // NB: Pebble does not make any assumptions as to which Go primitive types
643 0 : // have been registered as safe with redact.RegisterSafeType and does not
644 0 : // register any types itself. Some of the calls to `redact.Safe`, etc are
645 0 : // superfluous in the context of CockroachDB, which registers all the Go
646 0 : // numeric types as safe.
647 0 :
648 0 : multiExists := m.Compact.MultiLevelCount > 0
649 0 : appendIfMulti := func(line redact.SafeString) {
650 0 : if multiExists {
651 0 : w.SafeString(line)
652 0 : }
653 : }
654 0 : newline := func() {
655 0 : w.SafeString("\n")
656 0 : }
657 :
658 0 : w.SafeString(" | | | | ingested | moved | written | | amp")
659 0 : appendIfMulti(" | multilevel")
660 0 : newline()
661 0 : w.SafeString("level | tables size val-bl vtables | score ff cff | in | tables size | tables size | tables size | read | r w")
662 0 : appendIfMulti(" | top in read")
663 0 : newline()
664 0 : w.SafeString("------+-----------------------------+----------------+-------+--------------+--------------+--------------+-------+---------")
665 0 : appendIfMulti("-+------------------")
666 0 : newline()
667 0 :
668 0 : // formatRow prints out a row of the table.
669 0 : formatRow := func(m *LevelMetrics) {
670 0 : score := m.Score
671 0 : if score == 0 {
672 0 : // Format a zero level score as a dash.
673 0 : score = math.NaN()
674 0 : }
675 0 : w.Printf("| %5s %6s %6s %7s | %4s %4s %4s | %5s | %5s %6s | %5s %6s | %5s %6s | %5s | %3d %4s",
676 0 : humanize.Count.Int64(m.TablesCount),
677 0 : humanize.Bytes.Int64(m.TablesSize),
678 0 : humanize.Bytes.Uint64(m.Additional.ValueBlocksSize),
679 0 : humanize.Count.Uint64(m.VirtualTablesCount),
680 0 : humanizeFloat(score, 4),
681 0 : humanizeFloat(m.FillFactor, 4),
682 0 : humanizeFloat(m.CompensatedFillFactor, 4),
683 0 : humanize.Bytes.Uint64(m.TableBytesIn),
684 0 : humanize.Count.Uint64(m.TablesIngested),
685 0 : humanize.Bytes.Uint64(m.TableBytesIngested),
686 0 : humanize.Count.Uint64(m.TablesMoved),
687 0 : humanize.Bytes.Uint64(m.TableBytesMoved),
688 0 : humanize.Count.Uint64(m.TablesFlushed+m.TablesCompacted),
689 0 : humanize.Bytes.Uint64(m.TableBytesFlushed+m.TableBytesCompacted),
690 0 : humanize.Bytes.Uint64(m.TableBytesRead),
691 0 : redact.Safe(m.Sublevels),
692 0 : humanizeFloat(m.WriteAmp(), 4),
693 0 : )
694 0 :
695 0 : if multiExists {
696 0 : w.Printf(" | %5s %5s %5s",
697 0 : humanize.Bytes.Uint64(m.MultiLevel.TableBytesInTop),
698 0 : humanize.Bytes.Uint64(m.MultiLevel.TableBytesIn),
699 0 : humanize.Bytes.Uint64(m.MultiLevel.TableBytesRead))
700 0 : }
701 0 : newline()
702 : }
703 :
704 0 : var total LevelMetrics
705 0 : for level := 0; level < numLevels; level++ {
706 0 : l := &m.Levels[level]
707 0 : w.Printf("%5d ", redact.Safe(level))
708 0 : formatRow(l)
709 0 : total.Add(l)
710 0 : total.Sublevels += l.Sublevels
711 0 : }
712 : // Compute total bytes-in as the bytes written to the WAL + bytes ingested.
713 0 : total.TableBytesIn = m.WAL.BytesWritten + total.TableBytesIngested
714 0 : // Add the total bytes-in to the total bytes-flushed. This is to account for
715 0 : // the bytes written to the log and bytes written externally and then
716 0 : // ingested.
717 0 : total.TableBytesFlushed += total.TableBytesIn
718 0 : total.Score = math.NaN()
719 0 : total.FillFactor = math.NaN()
720 0 : total.CompensatedFillFactor = math.NaN()
721 0 : w.SafeString("total ")
722 0 : formatRow(&total)
723 0 :
724 0 : w.SafeString("----------------------------------------------------------------------------------------------------------------------------")
725 0 : appendIfMulti("--------------------")
726 0 : newline()
727 0 : w.Printf("WAL: %d files (%s) in: %s written: %s (%.0f%% overhead)",
728 0 : redact.Safe(m.WAL.Files),
729 0 : humanize.Bytes.Uint64(m.WAL.Size),
730 0 : humanize.Bytes.Uint64(m.WAL.BytesIn),
731 0 : humanize.Bytes.Uint64(m.WAL.BytesWritten),
732 0 : redact.Safe(percent(int64(m.WAL.BytesWritten)-int64(m.WAL.BytesIn), int64(m.WAL.BytesIn))))
733 0 : failoverStats := m.WAL.Failover
734 0 : failoverStats.FailoverWriteAndSyncLatency = nil
735 0 : if failoverStats == (wal.FailoverStats{}) {
736 0 : w.Printf("\n")
737 0 : } else {
738 0 : w.Printf(" failover: (switches: %d, primary: %s, secondary: %s)\n", m.WAL.Failover.DirSwitchCount,
739 0 : m.WAL.Failover.PrimaryWriteDuration.String(), m.WAL.Failover.SecondaryWriteDuration.String())
740 0 : }
741 :
742 0 : w.Printf("Flushes: %d\n", redact.Safe(m.Flush.Count))
743 0 :
744 0 : w.Printf("Compactions: %d estimated debt: %s in progress: %d (%s) canceled: %d (%s) failed: %d problem spans: %d\n",
745 0 : redact.Safe(m.Compact.Count),
746 0 : humanize.Bytes.Uint64(m.Compact.EstimatedDebt),
747 0 : redact.Safe(m.Compact.NumInProgress),
748 0 : humanize.Bytes.Int64(m.Compact.InProgressBytes),
749 0 : redact.Safe(m.Compact.CancelledCount),
750 0 : humanize.Bytes.Int64(m.Compact.CancelledBytes),
751 0 : redact.Safe(m.Compact.FailedCount),
752 0 : redact.Safe(m.Compact.NumProblemSpans),
753 0 : )
754 0 :
755 0 : w.Printf(" default: %d delete: %d elision: %d move: %d read: %d tombstone-density: %d rewrite: %d copy: %d multi-level: %d blob-file-rewrite: %d\n",
756 0 : redact.Safe(m.Compact.DefaultCount),
757 0 : redact.Safe(m.Compact.DeleteOnlyCount),
758 0 : redact.Safe(m.Compact.ElisionOnlyCount),
759 0 : redact.Safe(m.Compact.MoveCount),
760 0 : redact.Safe(m.Compact.ReadCount),
761 0 : redact.Safe(m.Compact.TombstoneDensityCount),
762 0 : redact.Safe(m.Compact.RewriteCount),
763 0 : redact.Safe(m.Compact.CopyCount),
764 0 : redact.Safe(m.Compact.MultiLevelCount),
765 0 : redact.Safe(m.Compact.BlobFileRewriteCount),
766 0 : )
767 0 :
768 0 : w.Printf("MemTables: %d (%s) zombie: %d (%s)\n",
769 0 : redact.Safe(m.MemTable.Count),
770 0 : humanize.Bytes.Uint64(m.MemTable.Size),
771 0 : redact.Safe(m.MemTable.ZombieCount),
772 0 : humanize.Bytes.Uint64(m.MemTable.ZombieSize))
773 0 :
774 0 : w.Printf("Zombie tables: %d (%s, local: %s)\n",
775 0 : redact.Safe(m.Table.ZombieCount),
776 0 : humanize.Bytes.Uint64(m.Table.ZombieSize),
777 0 : humanize.Bytes.Uint64(m.Table.Local.ZombieSize))
778 0 :
779 0 : w.Printf("Backing tables: %d (%s)\n",
780 0 : redact.Safe(m.Table.BackingTableCount),
781 0 : humanize.Bytes.Uint64(m.Table.BackingTableSize))
782 0 : w.Printf("Virtual tables: %d (%s)\n",
783 0 : redact.Safe(m.NumVirtual()),
784 0 : humanize.Bytes.Uint64(m.VirtualSize()))
785 0 : w.Printf("Local tables size: %s\n", humanize.Bytes.Uint64(m.Table.Local.LiveSize))
786 0 : w.SafeString("Compression types:")
787 0 : if count := m.Table.CompressedCountSnappy; count > 0 {
788 0 : w.Printf(" snappy: %d", redact.Safe(count))
789 0 : }
790 0 : if count := m.Table.CompressedCountZstd; count > 0 {
791 0 : w.Printf(" zstd: %d", redact.Safe(count))
792 0 : }
793 0 : if count := m.Table.CompressedCountMinLZ; count > 0 {
794 0 : w.Printf(" minlz: %d", redact.Safe(count))
795 0 : }
796 0 : if count := m.Table.CompressedCountNone; count > 0 {
797 0 : w.Printf(" none: %d", redact.Safe(count))
798 0 : }
799 0 : if count := m.Table.CompressedCountUnknown; count > 0 {
800 0 : w.Printf(" unknown: %d", redact.Safe(count))
801 0 : }
802 0 : w.Printf("\n")
803 0 : if m.Table.Garbage.PointDeletionsBytesEstimate > 0 || m.Table.Garbage.RangeDeletionsBytesEstimate > 0 {
804 0 : w.Printf("Garbage: point-deletions %s range-deletions %s\n",
805 0 : humanize.Bytes.Uint64(m.Table.Garbage.PointDeletionsBytesEstimate),
806 0 : humanize.Bytes.Uint64(m.Table.Garbage.RangeDeletionsBytesEstimate))
807 0 : }
808 0 : w.Printf("Table stats: ")
809 0 : if !m.Table.InitialStatsCollectionComplete {
810 0 : w.Printf("initial load in progress")
811 0 : } else if m.Table.PendingStatsCollectionCount == 0 {
812 0 : w.Printf("all loaded")
813 0 : } else {
814 0 : w.Printf("%s", humanize.Count.Int64(m.Table.PendingStatsCollectionCount))
815 0 : }
816 0 : w.Printf("\n")
817 0 :
818 0 : w.Printf("Block cache: %s entries (%s) hit rate: %.1f%%\n",
819 0 : humanize.Count.Int64(m.BlockCache.Count),
820 0 : humanize.Bytes.Int64(m.BlockCache.Size),
821 0 : redact.Safe(hitRate(m.BlockCache.Hits, m.BlockCache.Misses)))
822 0 :
823 0 : w.Printf("File cache: %s tables, %s blobfiles (%s) hit rate: %.1f%%\n",
824 0 : humanize.Count.Int64(m.FileCache.TableCount),
825 0 : humanize.Count.Int64(m.FileCache.BlobFileCount),
826 0 : humanize.Bytes.Int64(m.FileCache.Size),
827 0 : redact.Safe(hitRate(m.FileCache.Hits, m.FileCache.Misses)))
828 0 :
829 0 : formatSharedCacheMetrics := func(w redact.SafePrinter, m *SecondaryCacheMetrics, name redact.SafeString) {
830 0 : w.Printf("%s: %s entries (%s) hit rate: %.1f%%\n",
831 0 : name,
832 0 : humanize.Count.Int64(m.Count),
833 0 : humanize.Bytes.Int64(m.Size),
834 0 : redact.Safe(hitRate(m.ReadsWithFullHit, m.ReadsWithPartialHit+m.ReadsWithNoHit)))
835 0 : }
836 0 : if m.SecondaryCacheMetrics.Size > 0 || m.SecondaryCacheMetrics.ReadsWithFullHit > 0 {
837 0 : formatSharedCacheMetrics(w, &m.SecondaryCacheMetrics, "Secondary cache")
838 0 : }
839 :
840 0 : w.Printf("Range key sets: %s Tombstones: %s Total missized tombstones encountered: %s\n",
841 0 : humanize.Count.Uint64(m.Keys.RangeKeySetsCount),
842 0 : humanize.Count.Uint64(m.Keys.TombstoneCount),
843 0 : humanize.Count.Uint64(m.Keys.MissizedTombstonesCount),
844 0 : )
845 0 :
846 0 : w.Printf("Snapshots: %d earliest seq num: %d\n",
847 0 : redact.Safe(m.Snapshots.Count),
848 0 : redact.Safe(m.Snapshots.EarliestSeqNum))
849 0 :
850 0 : w.Printf("Table iters: %d\n", redact.Safe(m.TableIters))
851 0 : w.Printf("Filter utility: %.1f%%\n", redact.Safe(hitRate(m.Filter.Hits, m.Filter.Misses)))
852 0 : w.Printf("Ingestions: %d as flushable: %d (%s in %d tables)\n",
853 0 : redact.Safe(m.Ingest.Count),
854 0 : redact.Safe(m.Flush.AsIngestCount),
855 0 : humanize.Bytes.Uint64(m.Flush.AsIngestBytes),
856 0 : redact.Safe(m.Flush.AsIngestTableCount))
857 0 :
858 0 : var inUseTotal uint64
859 0 : for i := range m.manualMemory {
860 0 : inUseTotal += m.manualMemory[i].InUseBytes
861 0 : }
862 0 : inUse := func(purpose manual.Purpose) uint64 {
863 0 : return m.manualMemory[purpose].InUseBytes
864 0 : }
865 0 : w.Printf("Cgo memory usage: %s block cache: %s (data: %s, maps: %s, entries: %s) memtables: %s\n",
866 0 : humanize.Bytes.Uint64(inUseTotal),
867 0 : humanize.Bytes.Uint64(inUse(manual.BlockCacheData)+inUse(manual.BlockCacheMap)+inUse(manual.BlockCacheEntry)),
868 0 : humanize.Bytes.Uint64(inUse(manual.BlockCacheData)),
869 0 : humanize.Bytes.Uint64(inUse(manual.BlockCacheMap)),
870 0 : humanize.Bytes.Uint64(inUse(manual.BlockCacheEntry)),
871 0 : humanize.Bytes.Uint64(inUse(manual.MemTable)),
872 0 : )
873 : }
874 :
875 0 : func hitRate(hits, misses int64) float64 {
876 0 : return percent(hits, hits+misses)
877 0 : }
878 :
879 0 : func percent(numerator, denominator int64) float64 {
880 0 : if denominator == 0 {
881 0 : return 0
882 0 : }
883 0 : return 100 * float64(numerator) / float64(denominator)
884 : }
885 :
886 : // StringForTests is identical to m.String() on 64-bit platforms. It is used to
887 : // provide a platform-independent result for tests.
888 0 : func (m *Metrics) StringForTests() string {
889 0 : mCopy := *m
890 0 :
891 0 : // We recalculate the file cache size using the 64-bit sizes, and we ignore
892 0 : // the genericcache metadata size which is harder to adjust.
893 0 : const sstableReaderSize64bit = 280
894 0 : const blobFileReaderSize64bit = 96
895 0 : mCopy.FileCache.Size = mCopy.FileCache.TableCount*sstableReaderSize64bit + mCopy.FileCache.BlobFileCount*blobFileReaderSize64bit
896 0 : if math.MaxInt == math.MaxInt64 {
897 0 : // Verify the 64-bit sizes, so they are kept updated.
898 0 : if sstableReaderSize64bit != unsafe.Sizeof(sstable.Reader{}) {
899 0 : panic(fmt.Sprintf("sstableReaderSize64bit should be updated to %d", unsafe.Sizeof(sstable.Reader{})))
900 : }
901 0 : if blobFileReaderSize64bit != unsafe.Sizeof(blob.FileReader{}) {
902 0 : panic(fmt.Sprintf("blobFileReaderSize64bit should be updated to %d", unsafe.Sizeof(blob.FileReader{})))
903 : }
904 : }
905 : // Don't show cgo memory statistics as they can vary based on architecture,
906 : // invariants tag, etc.
907 0 : mCopy.manualMemory = manual.Metrics{}
908 0 : return redact.StringWithoutMarkers(&mCopy)
909 : }
910 :
911 : // levelMetricsDelta accumulates incremental ("delta") level metric updates
912 : // (e.g. from compactions or flushes).
913 : type levelMetricsDelta [manifest.NumLevels]*LevelMetrics
914 :
915 1 : func (m *levelMetricsDelta) level(level int) *LevelMetrics {
916 1 : if m[level] == nil {
917 1 : m[level] = &LevelMetrics{}
918 1 : }
919 1 : return m[level]
920 : }
921 :
922 1 : func (m *Metrics) updateLevelMetrics(updates levelMetricsDelta) {
923 1 : for i, u := range updates {
924 1 : if u != nil {
925 1 : m.Levels[i].Add(u)
926 1 : }
927 : }
928 : }
929 :
930 : // humanizeFloat formats a float64 value as a string. It shows up to two
931 : // decimals, depending on the target length. NaN is shown as "-".
932 0 : func humanizeFloat(v float64, targetLength int) redact.SafeString {
933 0 : if math.IsNaN(v) {
934 0 : return "-"
935 0 : }
936 : // We treat 0 specially. Values near zero will show up as 0.00.
937 0 : if v == 0 {
938 0 : return "0"
939 0 : }
940 0 : res := fmt.Sprintf("%.2f", v)
941 0 : if len(res) <= targetLength {
942 0 : return redact.SafeString(res)
943 0 : }
944 0 : if len(res) == targetLength+1 {
945 0 : return redact.SafeString(fmt.Sprintf("%.1f", v))
946 0 : }
947 0 : return redact.SafeString(fmt.Sprintf("%.0f", v))
948 : }
|