Line data Source code
1 : // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package pebble
6 :
7 : import (
8 : "fmt"
9 : "math"
10 : "time"
11 :
12 : "github.com/cockroachdb/pebble/internal/base"
13 : "github.com/cockroachdb/pebble/internal/cache"
14 : "github.com/cockroachdb/pebble/internal/humanize"
15 : "github.com/cockroachdb/pebble/internal/manifest"
16 : "github.com/cockroachdb/pebble/internal/manual"
17 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider/sharedcache"
18 : "github.com/cockroachdb/pebble/record"
19 : "github.com/cockroachdb/pebble/sstable"
20 : "github.com/cockroachdb/pebble/sstable/block"
21 : "github.com/cockroachdb/pebble/wal"
22 : "github.com/cockroachdb/redact"
23 : "github.com/prometheus/client_golang/prometheus"
24 : )
25 :
26 : // CacheMetrics holds metrics for the block and file cache.
27 : type CacheMetrics = cache.Metrics
28 :
29 : // FilterMetrics holds metrics for the filter policy
30 : type FilterMetrics = sstable.FilterMetrics
31 :
32 : // ThroughputMetric is a cumulative throughput metric. See the detailed
33 : // comment in base.
34 : type ThroughputMetric = base.ThroughputMetric
35 :
36 : // SecondaryCacheMetrics holds metrics for the persistent secondary cache
37 : // that caches commonly accessed blocks from blob storage on a local
38 : // file system.
39 : type SecondaryCacheMetrics = sharedcache.Metrics
40 :
41 : // LevelMetrics holds per-level metrics such as the number of files and total
42 : // size of the files, and compaction related metrics.
43 : type LevelMetrics struct {
44 : // The number of sublevels within the level. The sublevel count corresponds
45 : // to the read amplification for the level. An empty level will have a
46 : // sublevel count of 0, implying no read amplification. Only L0 will have
47 : // a sublevel count other than 0 or 1.
48 : Sublevels int32
49 : // The total count of sstables in the level.
50 : TablesCount int64
51 : // The total size in bytes of the sstables in the level. Note that if tables
52 : // contain references to blob files, this quantity does not include the the
53 : // size of the blob files or the referenced values.
54 : TablesSize int64
55 : // The total number of virtual sstables in the level.
56 : VirtualTablesCount uint64
57 : // The total size of the virtual sstables in the level.
58 : VirtualTablesSize uint64
59 : // The estimated total physical size of all blob references across all
60 : // sstables in the level. The physical size is estimated based on the size
61 : // of referenced values and the values' blob file's compression ratios.
62 : EstimatedReferencesSize uint64
63 : // The level's compaction score, used to rank levels (0 if the level doesn't
64 : // need compaction). See candidateLevelInfo.
65 : Score float64
66 : // The level's fill factor (the ratio between the size of the level and the
67 : // ideal size). See candidateLevelInfo.
68 : FillFactor float64
69 : // The level's compensated fill factor. See candidateLevelInfo.
70 : CompensatedFillFactor float64
71 : // The number of incoming bytes from other levels' sstables read during
72 : // compactions. This excludes bytes moved and bytes ingested. For L0 this is
73 : // the bytes written to the WAL.
74 : TableBytesIn uint64
75 : // The number of sstable bytes ingested. The sibling metric for tables is
76 : // TablesIngested.
77 : TableBytesIngested uint64
78 : // The number of sstable bytes moved into the level by a "move" compaction.
79 : // The sibling metric for tables is TablesMoved.
80 : TableBytesMoved uint64
81 : // The number of bytes read for compactions at the level. This includes bytes
82 : // read from other levels (BytesIn), as well as bytes read for the level.
83 : TableBytesRead uint64
84 : // The number of bytes written to sstables during compactions. The sibling
85 : // metric for tables is TablesCompacted. This metric may be summed with
86 : // BytesFlushed to compute the total bytes written for the level.
87 : TableBytesCompacted uint64
88 : // The number of bytes written to sstables during flushes. The sibling
89 : // metrics for tables is TablesFlushed. This metric is always zero for all
90 : // levels other than L0.
91 : TableBytesFlushed uint64
92 : // The number of sstables compacted to this level.
93 : TablesCompacted uint64
94 : // The number of sstables flushed to this level.
95 : TablesFlushed uint64
96 : // The number of sstables ingested into the level.
97 : TablesIngested uint64
98 : // The number of sstables moved to this level by a "move" compaction.
99 : TablesMoved uint64
100 : // The number of sstables deleted in a level by a delete-only compaction.
101 : TablesDeleted uint64
102 : // The number of sstables excised in a level by a delete-only compaction.
103 : TablesExcised uint64
104 : // BlobBytesReadEstimate is an estimate of the physical bytes corresponding
105 : // to values referenced by sstables that were inputs into compactions
106 : // outputting into this level.
107 : BlobBytesReadEstimate uint64
108 : // BlobBytesWritten is the number of bytes written to blob files while
109 : // compacting sstables in this level.
110 : BlobBytesWritten uint64
111 : // BlobBytesFlushed is the number of bytes written to blob files while
112 : // flushing sstables. This metric is always zero for all levels other than
113 : // L0.
114 : BlobBytesFlushed uint64
115 :
116 : MultiLevel struct {
117 : // TableBytesInTop are the total bytes in a multilevel compaction coming
118 : // from the top level.
119 : TableBytesInTop uint64
120 :
121 : // TableBytesIn, exclusively for multiLevel compactions.
122 : TableBytesIn uint64
123 :
124 : // TableBytesRead, exclusively for multilevel compactions.
125 : TableBytesRead uint64
126 : }
127 :
128 : // Additional contains misc additional metrics that are not always printed.
129 : Additional struct {
130 : // The sum of Properties.ValueBlocksSize for all the sstables in this
131 : // level. Printed by LevelMetrics.format iff there is at least one level
132 : // with a non-zero value.
133 : ValueBlocksSize uint64
134 : // Cumulative metrics about bytes written to data blocks and value blocks,
135 : // via compactions (except move compactions) or flushes. Not printed by
136 : // LevelMetrics.format, but are available to sophisticated clients.
137 : BytesWrittenDataBlocks uint64
138 : BytesWrittenValueBlocks uint64
139 : }
140 : }
141 :
142 : // AggregateSize returns an estimated physical size of the level's sstables and
143 : // their referenced values stored in blob files. The size of physical sstables
144 : // is exactly known. Virtual sstables' sizes are estimated, and the size of
145 : // values stored in blob files is estimated based on the volume of referenced
146 : // data and the blob file's compression ratio.
147 1 : func (m *LevelMetrics) AggregateSize() int64 {
148 1 : return m.TablesSize + int64(m.EstimatedReferencesSize)
149 1 : }
150 :
151 : // Add updates the counter metrics for the level.
152 1 : func (m *LevelMetrics) Add(u *LevelMetrics) {
153 1 : m.TablesCount += u.TablesCount
154 1 : m.TablesSize += u.TablesSize
155 1 : m.VirtualTablesCount += u.VirtualTablesCount
156 1 : m.VirtualTablesSize += u.VirtualTablesSize
157 1 : m.EstimatedReferencesSize += u.EstimatedReferencesSize
158 1 : m.TableBytesIn += u.TableBytesIn
159 1 : m.TableBytesIngested += u.TableBytesIngested
160 1 : m.TableBytesMoved += u.TableBytesMoved
161 1 : m.TableBytesRead += u.TableBytesRead
162 1 : m.TableBytesCompacted += u.TableBytesCompacted
163 1 : m.TableBytesFlushed += u.TableBytesFlushed
164 1 : m.TablesCompacted += u.TablesCompacted
165 1 : m.TablesFlushed += u.TablesFlushed
166 1 : m.TablesIngested += u.TablesIngested
167 1 : m.TablesMoved += u.TablesMoved
168 1 : m.BlobBytesWritten += u.BlobBytesWritten
169 1 : m.BlobBytesFlushed += u.BlobBytesFlushed
170 1 : m.BlobBytesReadEstimate += u.BlobBytesReadEstimate
171 1 : m.MultiLevel.TableBytesInTop += u.MultiLevel.TableBytesInTop
172 1 : m.MultiLevel.TableBytesRead += u.MultiLevel.TableBytesRead
173 1 : m.MultiLevel.TableBytesIn += u.MultiLevel.TableBytesIn
174 1 : m.Additional.BytesWrittenDataBlocks += u.Additional.BytesWrittenDataBlocks
175 1 : m.Additional.BytesWrittenValueBlocks += u.Additional.BytesWrittenValueBlocks
176 1 : m.Additional.ValueBlocksSize += u.Additional.ValueBlocksSize
177 1 : }
178 :
179 : // WriteAmp computes the write amplification for compactions at this
180 : // level. Computed as:
181 : //
182 : // TableBytesFlushed + TableBytesCompacted + BlobBytesFlushed
183 : // ---------------------------------------------------------
184 : // TableBytesIn + BlobBytesWritten
185 1 : func (m *LevelMetrics) WriteAmp() float64 {
186 1 : if m.TableBytesIn == 0 {
187 1 : return 0
188 1 : }
189 1 : return float64(m.TableBytesFlushed+m.TableBytesCompacted+m.BlobBytesFlushed) / float64(m.TableBytesIn+m.BlobBytesWritten)
190 : }
191 :
192 : var categoryCompaction = block.RegisterCategory("pebble-compaction", block.NonLatencySensitiveQoSLevel)
193 : var categoryIngest = block.RegisterCategory("pebble-ingest", block.LatencySensitiveQoSLevel)
194 : var categoryGet = block.RegisterCategory("pebble-get", block.LatencySensitiveQoSLevel)
195 :
196 : // Metrics holds metrics for various subsystems of the DB such as the Cache,
197 : // Compactions, WAL, and per-Level metrics.
198 : //
199 : // TODO(peter): The testing of these metrics is relatively weak. There should
200 : // be testing that performs various operations on a DB and verifies that the
201 : // metrics reflect those operations.
202 : type Metrics struct {
203 : BlockCache CacheMetrics
204 :
205 : Compact struct {
206 : // The total number of compactions, and per-compaction type counts.
207 : Count int64
208 : DefaultCount int64
209 : DeleteOnlyCount int64
210 : ElisionOnlyCount int64
211 : CopyCount int64
212 : MoveCount int64
213 : ReadCount int64
214 : TombstoneDensityCount int64
215 : RewriteCount int64
216 : MultiLevelCount int64
217 : CounterLevelCount int64
218 : // An estimate of the number of bytes that need to be compacted for the LSM
219 : // to reach a stable state.
220 : EstimatedDebt uint64
221 : // Number of bytes present in sstables being written by in-progress
222 : // compactions. This value will be zero if there are no in-progress
223 : // compactions.
224 : InProgressBytes int64
225 : // Number of compactions that are in-progress.
226 : NumInProgress int64
227 : // Number of compactions that were cancelled.
228 : CancelledCount int64
229 : // CancelledBytes the number of bytes written by compactions that were
230 : // cancelled.
231 : CancelledBytes int64
232 : // Total number of compactions that hit an error.
233 : FailedCount int64
234 : // NumProblemSpans is the current (instantaneous) count of "problem spans"
235 : // which temporarily block compactions.
236 : NumProblemSpans int
237 : // MarkedFiles is a count of files that are marked for
238 : // compaction. Such files are compacted in a rewrite compaction
239 : // when no other compactions are picked.
240 : MarkedFiles int
241 : // Duration records the cumulative duration of all compactions since the
242 : // database was opened.
243 : Duration time.Duration
244 : }
245 :
246 : Ingest struct {
247 : // The total number of ingestions
248 : Count uint64
249 : }
250 :
251 : Flush struct {
252 : // The total number of flushes.
253 : Count int64
254 : WriteThroughput ThroughputMetric
255 : // Number of flushes that are in-progress. In the current implementation
256 : // this will always be zero or one.
257 : NumInProgress int64
258 : // AsIngestCount is a monotonically increasing counter of flush operations
259 : // handling ingested tables.
260 : AsIngestCount uint64
261 : // AsIngestCount is a monotonically increasing counter of tables ingested as
262 : // flushables.
263 : AsIngestTableCount uint64
264 : // AsIngestBytes is a monotonically increasing counter of the bytes flushed
265 : // for flushables that originated as ingestion operations.
266 : AsIngestBytes uint64
267 : }
268 :
269 : Filter FilterMetrics
270 :
271 : Levels [numLevels]LevelMetrics
272 :
273 : MemTable struct {
274 : // The number of bytes allocated by memtables and large (flushable)
275 : // batches.
276 : Size uint64
277 : // The count of memtables.
278 : Count int64
279 : // The number of bytes present in zombie memtables which are no longer
280 : // referenced by the current DB state. An unbounded number of memtables
281 : // may be zombie if they're still in use by an iterator. One additional
282 : // memtable may be zombie if it's no longer in use and waiting to be
283 : // recycled.
284 : ZombieSize uint64
285 : // The count of zombie memtables.
286 : ZombieCount int64
287 : }
288 :
289 : Keys struct {
290 : // The approximate count of internal range key set keys in the database.
291 : RangeKeySetsCount uint64
292 : // The approximate count of internal tombstones (DEL, SINGLEDEL and
293 : // RANGEDEL key kinds) within the database.
294 : TombstoneCount uint64
295 : // A cumulative total number of missized DELSIZED keys encountered by
296 : // compactions since the database was opened.
297 : MissizedTombstonesCount uint64
298 : }
299 :
300 : Snapshots struct {
301 : // The number of currently open snapshots.
302 : Count int
303 : // The sequence number of the earliest, currently open snapshot.
304 : EarliestSeqNum base.SeqNum
305 : // A running tally of keys written to sstables during flushes or
306 : // compactions that would've been elided if it weren't for open
307 : // snapshots.
308 : PinnedKeys uint64
309 : // A running cumulative sum of the size of keys and values written to
310 : // sstables during flushes or compactions that would've been elided if
311 : // it weren't for open snapshots.
312 : PinnedSize uint64
313 : }
314 :
315 : Table struct {
316 : // The number of bytes present in obsolete tables which are no longer
317 : // referenced by the current DB state or any open iterators.
318 : ObsoleteSize uint64
319 : // The count of obsolete tables.
320 : ObsoleteCount int64
321 : // The number of bytes present in zombie tables which are no longer
322 : // referenced by the current DB state but are still in use by an iterator.
323 : ZombieSize uint64
324 : // The count of zombie tables.
325 : ZombieCount int64
326 : // The count of sstables backing virtual tables.
327 : BackingTableCount uint64
328 : // The sum of the sizes of the BackingTableCount sstables that are backing virtual tables.
329 : BackingTableSize uint64
330 : // The number of sstables that are compressed with an unknown compression
331 : // algorithm.
332 : CompressedCountUnknown int64
333 : // The number of sstables that are compressed with the default compression
334 : // algorithm, snappy.
335 : CompressedCountSnappy int64
336 : // The number of sstables that are compressed with zstd.
337 : CompressedCountZstd int64
338 : // The number of sstables that are compressed with minlz.
339 : CompressedCountMinLZ int64
340 : // The number of sstables that are uncompressed.
341 : CompressedCountNone int64
342 :
343 : // Local file sizes.
344 : Local struct {
345 : // LiveSize is the number of bytes in live tables.
346 : LiveSize uint64
347 : // LiveCount is the number of live tables.
348 : LiveCount uint64
349 : // ObsoleteSize is the number of bytes in obsolete tables.
350 : ObsoleteSize uint64
351 : // ObsoleteCount is the number of obsolete tables.
352 : ObsoleteCount uint64
353 : // ZombieSize is the number of bytes in zombie tables.
354 : ZombieSize uint64
355 : // ZombieCount is the number of zombie tables.
356 : ZombieCount uint64
357 : }
358 :
359 : // Garbage bytes.
360 : Garbage struct {
361 : // PointDeletionsBytesEstimate is the estimated file bytes that will be
362 : // saved by compacting all point deletions. This is dependent on table
363 : // stats collection, so can be very incomplete until
364 : // InitialStatsCollectionComplete becomes true.
365 : PointDeletionsBytesEstimate uint64
366 : // RangeDeletionsBytesEstimate is the estimated file bytes that will be
367 : // saved by compacting all range deletions. This is dependent on table
368 : // stats collection, so can be very incomplete until
369 : // InitialStatsCollectionComplete becomes true.
370 : RangeDeletionsBytesEstimate uint64
371 : }
372 :
373 : // Whether the initial stats collection (for existing tables on Open) is
374 : // complete.
375 : InitialStatsCollectionComplete bool
376 : // The count of recently created sstables that need stats collection. This
377 : // does not include sstables that existed when the DB was opened, so the
378 : // value is only useful when InitialStatsCollectionComplete is true.
379 : PendingStatsCollectionCount int64
380 : }
381 :
382 : BlobFiles struct {
383 : // The count of all live blob files.
384 : LiveCount uint64
385 : // The physical file size of all live blob files.
386 : LiveSize uint64
387 : // ValueSize is the sum of the length of the uncompressed values in all
388 : // live (referenced by some sstable(s) within the current version) blob
389 : // files. ValueSize may be greater than LiveSize when compression is
390 : // effective. ValueSize includes bytes in live blob files that are not
391 : // actually reachable by any sstable key. If any value within the blob
392 : // file is reachable by a key in a live sstable, then the entirety of
393 : // the blob file's values are included within ValueSize.
394 : ValueSize uint64
395 : // ReferencedValueSize is the sum of the length of the uncompressed
396 : // values (in all live blob files) that are still referenced by keys
397 : // within live tables. Over the lifetime of a blob file, a blob file's
398 : // references are removed as some compactions choose to write new blob
399 : // files containing the same values or keys referencing the file's
400 : // values are deleted. ReferencedValueSize accounts the volume of bytes
401 : // that are actually reachable by some key in a live table.
402 : //
403 : // The difference between ValueSize and ReferencedValueSize is
404 : // (uncompressed) space amplification that could be reclaimed if all
405 : // blob files were rewritten, discarding values that are no longer
406 : // referenced by any keys in any sstables within the current version.
407 : ReferencedValueSize uint64
408 : // The count of all obsolete blob files.
409 : ObsoleteCount uint64
410 : // The physical size of all obsolete blob files.
411 : ObsoleteSize uint64
412 : // The count of all zombie blob files.
413 : ZombieCount uint64
414 : // The physical size of all zombie blob files.
415 : ZombieSize uint64
416 : // Local file sizes.
417 : Local struct {
418 : // LiveSize is the physical size of local live blob files.
419 : LiveSize uint64
420 : // LiveCount is the number of local live blob files.
421 : LiveCount uint64
422 : // ObsoleteSize is the physical size of local obsolete blob files.
423 : ObsoleteSize uint64
424 : // ObsoleteCount is the number of local obsolete blob files.
425 : ObsoleteCount uint64
426 : // ZombieSize is the physical size of local zombie blob files.
427 : ZombieSize uint64
428 : // ZombieCount is the number of local zombie blob files.
429 : ZombieCount uint64
430 : }
431 : }
432 :
433 : FileCache CacheMetrics
434 :
435 : // Count of the number of open sstable iterators.
436 : TableIters int64
437 : // Uptime is the total time since this DB was opened.
438 : Uptime time.Duration
439 :
440 : WAL struct {
441 : // Number of live WAL files.
442 : Files int64
443 : // Number of obsolete WAL files.
444 : ObsoleteFiles int64
445 : // Physical size of the obsolete WAL files.
446 : ObsoletePhysicalSize uint64
447 : // Size of the live data in the WAL files. Note that with WAL file
448 : // recycling this is less than the actual on-disk size of the WAL files.
449 : Size uint64
450 : // Physical size of the WAL files on-disk. With WAL file recycling,
451 : // this is greater than the live data in WAL files.
452 : //
453 : // TODO(sumeer): it seems this does not include ObsoletePhysicalSize.
454 : // Should the comment be updated?
455 : PhysicalSize uint64
456 : // Number of logical bytes written to the WAL.
457 : BytesIn uint64
458 : // Number of bytes written to the WAL.
459 : BytesWritten uint64
460 : // Failover contains failover stats. Empty if failover is not enabled.
461 : Failover wal.FailoverStats
462 : }
463 :
464 : LogWriter struct {
465 : FsyncLatency prometheus.Histogram
466 : record.LogWriterMetrics
467 : }
468 :
469 : CategoryStats []block.CategoryStatsAggregate
470 :
471 : SecondaryCacheMetrics SecondaryCacheMetrics
472 :
473 : private struct {
474 : optionsFileSize uint64
475 : manifestFileSize uint64
476 : }
477 :
478 : manualMemory manual.Metrics
479 : }
480 :
481 : var (
482 : // FsyncLatencyBuckets are prometheus histogram buckets suitable for a histogram
483 : // that records latencies for fsyncs.
484 : FsyncLatencyBuckets = append(
485 : prometheus.LinearBuckets(0.0, float64(time.Microsecond*100), 50),
486 : prometheus.ExponentialBucketsRange(float64(time.Millisecond*5), float64(10*time.Second), 50)...,
487 : )
488 :
489 : // SecondaryCacheIOBuckets exported to enable exporting from package pebble to
490 : // enable exporting metrics with below buckets in CRDB.
491 : SecondaryCacheIOBuckets = sharedcache.IOBuckets
492 : // SecondaryCacheChannelWriteBuckets exported to enable exporting from package
493 : // pebble to enable exporting metrics with below buckets in CRDB.
494 : SecondaryCacheChannelWriteBuckets = sharedcache.ChannelWriteBuckets
495 : )
496 :
497 : // DiskSpaceUsage returns the total disk space used by the database in bytes,
498 : // including live and obsolete files. This only includes local files, i.e.,
499 : // remote files (as known to objstorage.Provider) are not included.
500 1 : func (m *Metrics) DiskSpaceUsage() uint64 {
501 1 : var usageBytes uint64
502 1 : usageBytes += m.WAL.PhysicalSize
503 1 : usageBytes += m.WAL.ObsoletePhysicalSize
504 1 : usageBytes += m.Table.Local.LiveSize
505 1 : usageBytes += m.Table.Local.ObsoleteSize
506 1 : usageBytes += m.Table.Local.ZombieSize
507 1 : usageBytes += m.private.optionsFileSize
508 1 : usageBytes += m.private.manifestFileSize
509 1 : // TODO(sumeer): InProgressBytes does not distinguish between local and
510 1 : // remote files. This causes a small error. Fix.
511 1 : usageBytes += uint64(m.Compact.InProgressBytes)
512 1 : return usageBytes
513 1 : }
514 :
515 : // NumVirtual is the number of virtual sstables in the latest version
516 : // summed over every level in the lsm.
517 1 : func (m *Metrics) NumVirtual() uint64 {
518 1 : var n uint64
519 1 : for _, level := range m.Levels {
520 1 : n += level.VirtualTablesCount
521 1 : }
522 1 : return n
523 : }
524 :
525 : // VirtualSize is the sum of the sizes of the virtual sstables in the
526 : // latest version. BackingTableSize - VirtualSize gives an estimate for
527 : // the space amplification caused by not compacting virtual sstables.
528 1 : func (m *Metrics) VirtualSize() uint64 {
529 1 : var size uint64
530 1 : for _, level := range m.Levels {
531 1 : size += level.VirtualTablesSize
532 1 : }
533 1 : return size
534 : }
535 :
536 : // ReadAmp returns the current read amplification of the database.
537 : // It's computed as the number of sublevels in L0 + the number of non-empty
538 : // levels below L0.
539 1 : func (m *Metrics) ReadAmp() int {
540 1 : var ramp int32
541 1 : for _, l := range m.Levels {
542 1 : ramp += l.Sublevels
543 1 : }
544 1 : return int(ramp)
545 : }
546 :
547 : // Total returns the sum of the per-level metrics and WAL metrics.
548 1 : func (m *Metrics) Total() LevelMetrics {
549 1 : var total LevelMetrics
550 1 : for level := 0; level < numLevels; level++ {
551 1 : l := &m.Levels[level]
552 1 : total.Add(l)
553 1 : total.Sublevels += l.Sublevels
554 1 : }
555 : // Compute total bytes-in as the bytes written to the WAL + bytes ingested.
556 1 : total.TableBytesIn = m.WAL.BytesWritten + total.TableBytesIngested
557 1 : // Add the total bytes-in to the total bytes-flushed. This is to account for
558 1 : // the bytes written to the log and bytes written externally and then
559 1 : // ingested.
560 1 : total.TableBytesFlushed += total.TableBytesIn
561 1 : return total
562 : }
563 :
564 : // RemoteTablesTotal returns the total number of remote tables and their total
565 : // size. Remote tables are computed as the difference between total tables
566 : // (live + obsolete + zombie) and local tables.
567 1 : func (m *Metrics) RemoteTablesTotal() (count uint64, size uint64) {
568 1 : var liveTables, liveTableBytes int64
569 1 : for level := 0; level < numLevels; level++ {
570 1 : liveTables += m.Levels[level].TablesCount
571 1 : liveTableBytes += m.Levels[level].TablesSize
572 1 : }
573 1 : totalCount := liveTables + m.Table.ObsoleteCount + m.Table.ZombieCount
574 1 : localCount := m.Table.Local.LiveCount + m.Table.Local.ObsoleteCount + m.Table.Local.ZombieCount
575 1 : remoteCount := uint64(totalCount) - localCount
576 1 :
577 1 : totalSize := uint64(liveTableBytes) + m.Table.ObsoleteSize + m.Table.ZombieSize
578 1 : localSize := m.Table.Local.LiveSize + m.Table.Local.ObsoleteSize + m.Table.Local.ZombieSize
579 1 : remoteSize := totalSize - localSize
580 1 :
581 1 : return remoteCount, remoteSize
582 : }
583 :
584 : // String pretty-prints the metrics as below:
585 : //
586 : // | | | | ingested | moved | written | | amp | multilevel
587 : // level | tables size val-bl vtables | score uc c | in | tables size | tables size | tables size | read | r w | top in read
588 : // ------+-----------------------------+----------------+-------+--------------+--------------+--------------+-------+----------+------------------
589 : // 0 | 101 102B 0B 101 | 1.10 2.10 0.30 | 104B | 112 104B | 113 106B | 221 217B | 107B | 1 2.09 | 104B 104B 104B
590 : // 1 | 201 202B 0B 201 | 1.20 2.20 0.60 | 204B | 212 204B | 213 206B | 421 417B | 207B | 2 2.04 | 204B 204B 204B
591 : // 2 | 301 302B 0B 301 | 1.30 2.30 0.90 | 304B | 312 304B | 313 306B | 621 617B | 307B | 3 2.03 | 304B 304B 304B
592 : // 3 | 401 402B 0B 401 | 1.40 2.40 1.20 | 404B | 412 404B | 413 406B | 821 817B | 407B | 4 2.02 | 404B 404B 404B
593 : // 4 | 501 502B 0B 501 | 1.50 2.50 1.50 | 504B | 512 504B | 513 506B | 1.0K 1017B | 507B | 5 2.02 | 504B 504B 504B
594 : // 5 | 601 602B 0B 601 | 1.60 2.60 1.80 | 604B | 612 604B | 613 606B | 1.2K 1.2KB | 607B | 6 2.01 | 604B 604B 604B
595 : // 6 | 701 702B 0B 701 | - 2.70 2.10 | 704B | 712 704B | 713 706B | 1.4K 1.4KB | 707B | 7 2.01 | 704B 704B 704B
596 : // total | 2.8K 2.7KB 0B 2.8K | - - - | 2.8KB | 2.9K 2.8KB | 2.9K 2.8KB | 5.7K 8.4KB | 2.8KB | 28 3.00 | 2.8KB 2.8KB 2.8KB
597 : // ------------------------------------------------------------------------------------------------------------------------------------------------
598 : // WAL: 22 files (24B) in: 25B written: 26B (4% overhead)
599 : // Flushes: 8
600 : // Compactions: 5 estimated debt: 6B in progress: 2 (7B)
601 : // default: 27 delete: 28 elision: 29 move: 30 read: 31 tombstone-density: 16 rewrite: 32 copy: 33 multi-level: 34
602 : // MemTables: 12 (11B) zombie: 14 (13B)
603 : // Zombie tables: 16 (15B, local: 30B)
604 : // Backing tables: 1 (2.0MB)
605 : // Virtual tables: 2807 (2.8KB)
606 : // Local tables size: 28B
607 : // Compression types:
608 : // Table stats: 31
609 : // Block cache: 2 entries (1B) hit rate: 42.9%
610 : // Table cache: 18 entries (17B) hit rate: 48.7%
611 : // Range key sets: 123 Tombstones: 456 Total missized tombstones encountered: 789
612 : // Snapshots: 4 earliest seq num: 1024
613 : // Table iters: 21
614 : // Filter utility: 47.4%
615 : // Ingestions: 27 as flushable: 36 (34B in 35 tables)
616 : // Cgo memory usage: 15KB block cache: 9.0KB (data: 4.0KB, maps: 2.0KB, entries: 3.0KB) memtables: 5.0KB
617 1 : func (m *Metrics) String() string {
618 1 : return redact.StringWithoutMarkers(m)
619 1 : }
620 :
621 : var _ redact.SafeFormatter = &Metrics{}
622 :
623 : // SafeFormat implements redact.SafeFormatter.
624 1 : func (m *Metrics) SafeFormat(w redact.SafePrinter, _ rune) {
625 1 : // NB: Pebble does not make any assumptions as to which Go primitive types
626 1 : // have been registered as safe with redact.RegisterSafeType and does not
627 1 : // register any types itself. Some of the calls to `redact.Safe`, etc are
628 1 : // superfluous in the context of CockroachDB, which registers all the Go
629 1 : // numeric types as safe.
630 1 :
631 1 : multiExists := m.Compact.MultiLevelCount > 0
632 1 : appendIfMulti := func(line redact.SafeString) {
633 1 : if multiExists {
634 1 : w.SafeString(line)
635 1 : }
636 : }
637 1 : newline := func() {
638 1 : w.SafeString("\n")
639 1 : }
640 :
641 1 : w.SafeString(" | | | | ingested | moved | written | | amp")
642 1 : appendIfMulti(" | multilevel")
643 1 : newline()
644 1 : w.SafeString("level | tables size val-bl vtables | score ff cff | in | tables size | tables size | tables size | read | r w")
645 1 : appendIfMulti(" | top in read")
646 1 : newline()
647 1 : w.SafeString("------+-----------------------------+----------------+-------+--------------+--------------+--------------+-------+---------")
648 1 : appendIfMulti("-+------------------")
649 1 : newline()
650 1 :
651 1 : // formatRow prints out a row of the table.
652 1 : formatRow := func(m *LevelMetrics) {
653 1 : score := m.Score
654 1 : if score == 0 {
655 1 : // Format a zero level score as a dash.
656 1 : score = math.NaN()
657 1 : }
658 1 : w.Printf("| %5s %6s %6s %7s | %4s %4s %4s | %5s | %5s %6s | %5s %6s | %5s %6s | %5s | %3d %4s",
659 1 : humanize.Count.Int64(m.TablesCount),
660 1 : humanize.Bytes.Int64(m.TablesSize),
661 1 : humanize.Bytes.Uint64(m.Additional.ValueBlocksSize),
662 1 : humanize.Count.Uint64(m.VirtualTablesCount),
663 1 : humanizeFloat(score, 4),
664 1 : humanizeFloat(m.FillFactor, 4),
665 1 : humanizeFloat(m.CompensatedFillFactor, 4),
666 1 : humanize.Bytes.Uint64(m.TableBytesIn),
667 1 : humanize.Count.Uint64(m.TablesIngested),
668 1 : humanize.Bytes.Uint64(m.TableBytesIngested),
669 1 : humanize.Count.Uint64(m.TablesMoved),
670 1 : humanize.Bytes.Uint64(m.TableBytesMoved),
671 1 : humanize.Count.Uint64(m.TablesFlushed+m.TablesCompacted),
672 1 : humanize.Bytes.Uint64(m.TableBytesFlushed+m.TableBytesCompacted),
673 1 : humanize.Bytes.Uint64(m.TableBytesRead),
674 1 : redact.Safe(m.Sublevels),
675 1 : humanizeFloat(m.WriteAmp(), 4),
676 1 : )
677 1 :
678 1 : if multiExists {
679 1 : w.Printf(" | %5s %5s %5s",
680 1 : humanize.Bytes.Uint64(m.MultiLevel.TableBytesInTop),
681 1 : humanize.Bytes.Uint64(m.MultiLevel.TableBytesIn),
682 1 : humanize.Bytes.Uint64(m.MultiLevel.TableBytesRead))
683 1 : }
684 1 : newline()
685 : }
686 :
687 1 : var total LevelMetrics
688 1 : for level := 0; level < numLevels; level++ {
689 1 : l := &m.Levels[level]
690 1 : w.Printf("%5d ", redact.Safe(level))
691 1 : formatRow(l)
692 1 : total.Add(l)
693 1 : total.Sublevels += l.Sublevels
694 1 : }
695 : // Compute total bytes-in as the bytes written to the WAL + bytes ingested.
696 1 : total.TableBytesIn = m.WAL.BytesWritten + total.TableBytesIngested
697 1 : // Add the total bytes-in to the total bytes-flushed. This is to account for
698 1 : // the bytes written to the log and bytes written externally and then
699 1 : // ingested.
700 1 : total.TableBytesFlushed += total.TableBytesIn
701 1 : total.Score = math.NaN()
702 1 : total.FillFactor = math.NaN()
703 1 : total.CompensatedFillFactor = math.NaN()
704 1 : w.SafeString("total ")
705 1 : formatRow(&total)
706 1 :
707 1 : w.SafeString("----------------------------------------------------------------------------------------------------------------------------")
708 1 : appendIfMulti("--------------------")
709 1 : newline()
710 1 : w.Printf("WAL: %d files (%s) in: %s written: %s (%.0f%% overhead)",
711 1 : redact.Safe(m.WAL.Files),
712 1 : humanize.Bytes.Uint64(m.WAL.Size),
713 1 : humanize.Bytes.Uint64(m.WAL.BytesIn),
714 1 : humanize.Bytes.Uint64(m.WAL.BytesWritten),
715 1 : redact.Safe(percent(int64(m.WAL.BytesWritten)-int64(m.WAL.BytesIn), int64(m.WAL.BytesIn))))
716 1 : failoverStats := m.WAL.Failover
717 1 : failoverStats.FailoverWriteAndSyncLatency = nil
718 1 : if failoverStats == (wal.FailoverStats{}) {
719 1 : w.Printf("\n")
720 1 : } else {
721 0 : w.Printf(" failover: (switches: %d, primary: %s, secondary: %s)\n", m.WAL.Failover.DirSwitchCount,
722 0 : m.WAL.Failover.PrimaryWriteDuration.String(), m.WAL.Failover.SecondaryWriteDuration.String())
723 0 : }
724 :
725 1 : w.Printf("Flushes: %d\n", redact.Safe(m.Flush.Count))
726 1 :
727 1 : w.Printf("Compactions: %d estimated debt: %s in progress: %d (%s) canceled: %d (%s) failed: %d problem spans: %d\n",
728 1 : redact.Safe(m.Compact.Count),
729 1 : humanize.Bytes.Uint64(m.Compact.EstimatedDebt),
730 1 : redact.Safe(m.Compact.NumInProgress),
731 1 : humanize.Bytes.Int64(m.Compact.InProgressBytes),
732 1 : redact.Safe(m.Compact.CancelledCount),
733 1 : humanize.Bytes.Int64(m.Compact.CancelledBytes),
734 1 : redact.Safe(m.Compact.FailedCount),
735 1 : redact.Safe(m.Compact.NumProblemSpans),
736 1 : )
737 1 :
738 1 : w.Printf(" default: %d delete: %d elision: %d move: %d read: %d tombstone-density: %d rewrite: %d copy: %d multi-level: %d\n",
739 1 : redact.Safe(m.Compact.DefaultCount),
740 1 : redact.Safe(m.Compact.DeleteOnlyCount),
741 1 : redact.Safe(m.Compact.ElisionOnlyCount),
742 1 : redact.Safe(m.Compact.MoveCount),
743 1 : redact.Safe(m.Compact.ReadCount),
744 1 : redact.Safe(m.Compact.TombstoneDensityCount),
745 1 : redact.Safe(m.Compact.RewriteCount),
746 1 : redact.Safe(m.Compact.CopyCount),
747 1 : redact.Safe(m.Compact.MultiLevelCount),
748 1 : )
749 1 :
750 1 : w.Printf("MemTables: %d (%s) zombie: %d (%s)\n",
751 1 : redact.Safe(m.MemTable.Count),
752 1 : humanize.Bytes.Uint64(m.MemTable.Size),
753 1 : redact.Safe(m.MemTable.ZombieCount),
754 1 : humanize.Bytes.Uint64(m.MemTable.ZombieSize))
755 1 :
756 1 : w.Printf("Zombie tables: %d (%s, local: %s)\n",
757 1 : redact.Safe(m.Table.ZombieCount),
758 1 : humanize.Bytes.Uint64(m.Table.ZombieSize),
759 1 : humanize.Bytes.Uint64(m.Table.Local.ZombieSize))
760 1 :
761 1 : w.Printf("Backing tables: %d (%s)\n",
762 1 : redact.Safe(m.Table.BackingTableCount),
763 1 : humanize.Bytes.Uint64(m.Table.BackingTableSize))
764 1 : w.Printf("Virtual tables: %d (%s)\n",
765 1 : redact.Safe(m.NumVirtual()),
766 1 : humanize.Bytes.Uint64(m.VirtualSize()))
767 1 : w.Printf("Local tables size: %s\n", humanize.Bytes.Uint64(m.Table.Local.LiveSize))
768 1 : w.SafeString("Compression types:")
769 1 : if count := m.Table.CompressedCountSnappy; count > 0 {
770 1 : w.Printf(" snappy: %d", redact.Safe(count))
771 1 : }
772 1 : if count := m.Table.CompressedCountZstd; count > 0 {
773 0 : w.Printf(" zstd: %d", redact.Safe(count))
774 0 : }
775 1 : if count := m.Table.CompressedCountMinLZ; count > 0 {
776 0 : w.Printf(" minlz: %d", redact.Safe(count))
777 0 : }
778 1 : if count := m.Table.CompressedCountNone; count > 0 {
779 0 : w.Printf(" none: %d", redact.Safe(count))
780 0 : }
781 1 : if count := m.Table.CompressedCountUnknown; count > 0 {
782 1 : w.Printf(" unknown: %d", redact.Safe(count))
783 1 : }
784 1 : w.Printf("\n")
785 1 : if m.Table.Garbage.PointDeletionsBytesEstimate > 0 || m.Table.Garbage.RangeDeletionsBytesEstimate > 0 {
786 1 : w.Printf("Garbage: point-deletions %s range-deletions %s\n",
787 1 : humanize.Bytes.Uint64(m.Table.Garbage.PointDeletionsBytesEstimate),
788 1 : humanize.Bytes.Uint64(m.Table.Garbage.RangeDeletionsBytesEstimate))
789 1 : }
790 1 : w.Printf("Table stats: ")
791 1 : if !m.Table.InitialStatsCollectionComplete {
792 1 : w.Printf("initial load in progress")
793 1 : } else if m.Table.PendingStatsCollectionCount == 0 {
794 1 : w.Printf("all loaded")
795 1 : } else {
796 1 : w.Printf("%s", humanize.Count.Int64(m.Table.PendingStatsCollectionCount))
797 1 : }
798 1 : w.Printf("\n")
799 1 :
800 1 : formatCacheMetrics := func(m *CacheMetrics, name redact.SafeString) {
801 1 : w.Printf("%s: %s entries (%s) hit rate: %.1f%%\n",
802 1 : name,
803 1 : humanize.Count.Int64(m.Count),
804 1 : humanize.Bytes.Int64(m.Size),
805 1 : redact.Safe(hitRate(m.Hits, m.Misses)))
806 1 : }
807 1 : formatCacheMetrics(&m.BlockCache, "Block cache")
808 1 : formatCacheMetrics(&m.FileCache, "Table cache")
809 1 :
810 1 : formatSharedCacheMetrics := func(w redact.SafePrinter, m *SecondaryCacheMetrics, name redact.SafeString) {
811 0 : w.Printf("%s: %s entries (%s) hit rate: %.1f%%\n",
812 0 : name,
813 0 : humanize.Count.Int64(m.Count),
814 0 : humanize.Bytes.Int64(m.Size),
815 0 : redact.Safe(hitRate(m.ReadsWithFullHit, m.ReadsWithPartialHit+m.ReadsWithNoHit)))
816 0 : }
817 1 : if m.SecondaryCacheMetrics.Size > 0 || m.SecondaryCacheMetrics.ReadsWithFullHit > 0 {
818 0 : formatSharedCacheMetrics(w, &m.SecondaryCacheMetrics, "Secondary cache")
819 0 : }
820 :
821 1 : w.Printf("Range key sets: %s Tombstones: %s Total missized tombstones encountered: %s\n",
822 1 : humanize.Count.Uint64(m.Keys.RangeKeySetsCount),
823 1 : humanize.Count.Uint64(m.Keys.TombstoneCount),
824 1 : humanize.Count.Uint64(m.Keys.MissizedTombstonesCount),
825 1 : )
826 1 :
827 1 : w.Printf("Snapshots: %d earliest seq num: %d\n",
828 1 : redact.Safe(m.Snapshots.Count),
829 1 : redact.Safe(m.Snapshots.EarliestSeqNum))
830 1 :
831 1 : w.Printf("Table iters: %d\n", redact.Safe(m.TableIters))
832 1 : w.Printf("Filter utility: %.1f%%\n", redact.Safe(hitRate(m.Filter.Hits, m.Filter.Misses)))
833 1 : w.Printf("Ingestions: %d as flushable: %d (%s in %d tables)\n",
834 1 : redact.Safe(m.Ingest.Count),
835 1 : redact.Safe(m.Flush.AsIngestCount),
836 1 : humanize.Bytes.Uint64(m.Flush.AsIngestBytes),
837 1 : redact.Safe(m.Flush.AsIngestTableCount))
838 1 :
839 1 : var inUseTotal uint64
840 1 : for i := range m.manualMemory {
841 1 : inUseTotal += m.manualMemory[i].InUseBytes
842 1 : }
843 1 : inUse := func(purpose manual.Purpose) uint64 {
844 1 : return m.manualMemory[purpose].InUseBytes
845 1 : }
846 1 : w.Printf("Cgo memory usage: %s block cache: %s (data: %s, maps: %s, entries: %s) memtables: %s\n",
847 1 : humanize.Bytes.Uint64(inUseTotal),
848 1 : humanize.Bytes.Uint64(inUse(manual.BlockCacheData)+inUse(manual.BlockCacheMap)+inUse(manual.BlockCacheEntry)),
849 1 : humanize.Bytes.Uint64(inUse(manual.BlockCacheData)),
850 1 : humanize.Bytes.Uint64(inUse(manual.BlockCacheMap)),
851 1 : humanize.Bytes.Uint64(inUse(manual.BlockCacheEntry)),
852 1 : humanize.Bytes.Uint64(inUse(manual.MemTable)),
853 1 : )
854 : }
855 :
856 1 : func hitRate(hits, misses int64) float64 {
857 1 : return percent(hits, hits+misses)
858 1 : }
859 :
860 1 : func percent(numerator, denominator int64) float64 {
861 1 : if denominator == 0 {
862 1 : return 0
863 1 : }
864 1 : return 100 * float64(numerator) / float64(denominator)
865 : }
866 :
867 : // StringForTests is identical to m.String() on 64-bit platforms. It is used to
868 : // provide a platform-independent result for tests.
869 1 : func (m *Metrics) StringForTests() string {
870 1 : mCopy := *m
871 1 : if math.MaxInt == math.MaxInt32 {
872 0 : // This is the difference in Sizeof(sstable.Reader{})) between 64 and 32 bit
873 0 : // platforms.
874 0 : const tableCacheSizeAdjustment = 212
875 0 : mCopy.FileCache.Size += mCopy.FileCache.Count * tableCacheSizeAdjustment
876 0 : }
877 : // Don't show cgo memory statistics as they can vary based on architecture,
878 : // invariants tag, etc.
879 1 : mCopy.manualMemory = manual.Metrics{}
880 1 : return redact.StringWithoutMarkers(&mCopy)
881 : }
882 :
883 : // levelMetricsDelta accumulates incremental ("delta") level metric updates
884 : // (e.g. from compactions or flushes).
885 : type levelMetricsDelta [manifest.NumLevels]*LevelMetrics
886 :
887 1 : func (m *Metrics) updateLevelMetrics(updates levelMetricsDelta) {
888 1 : for i, u := range updates {
889 1 : if u != nil {
890 1 : m.Levels[i].Add(u)
891 1 : }
892 : }
893 : }
894 :
895 : // humanizeFloat formats a float64 value as a string. It shows up to two
896 : // decimals, depending on the target length. NaN is shown as "-".
897 1 : func humanizeFloat(v float64, targetLength int) redact.SafeString {
898 1 : if math.IsNaN(v) {
899 1 : return "-"
900 1 : }
901 : // We treat 0 specially. Values near zero will show up as 0.00.
902 1 : if v == 0 {
903 1 : return "0"
904 1 : }
905 1 : res := fmt.Sprintf("%.2f", v)
906 1 : if len(res) <= targetLength {
907 1 : return redact.SafeString(res)
908 1 : }
909 1 : if len(res) == targetLength+1 {
910 1 : return redact.SafeString(fmt.Sprintf("%.1f", v))
911 1 : }
912 1 : return redact.SafeString(fmt.Sprintf("%.0f", v))
913 : }
|