Line data Source code
1 : // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package pebble
6 :
7 : import (
8 : "fmt"
9 : "math"
10 : "time"
11 : "unsafe"
12 :
13 : "github.com/cockroachdb/pebble/internal/base"
14 : "github.com/cockroachdb/pebble/internal/cache"
15 : "github.com/cockroachdb/pebble/internal/humanize"
16 : "github.com/cockroachdb/pebble/internal/manifest"
17 : "github.com/cockroachdb/pebble/internal/manual"
18 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider/sharedcache"
19 : "github.com/cockroachdb/pebble/record"
20 : "github.com/cockroachdb/pebble/sstable"
21 : "github.com/cockroachdb/pebble/sstable/blob"
22 : "github.com/cockroachdb/pebble/sstable/block"
23 : "github.com/cockroachdb/pebble/wal"
24 : "github.com/cockroachdb/redact"
25 : "github.com/prometheus/client_golang/prometheus"
26 : )
27 :
28 : // CacheMetrics holds metrics for the block and file cache.
29 : type CacheMetrics = cache.Metrics
30 :
31 : // FilterMetrics holds metrics for the filter policy
32 : type FilterMetrics = sstable.FilterMetrics
33 :
34 : // ThroughputMetric is a cumulative throughput metric. See the detailed
35 : // comment in base.
36 : type ThroughputMetric = base.ThroughputMetric
37 :
38 : // SecondaryCacheMetrics holds metrics for the persistent secondary cache
39 : // that caches commonly accessed blocks from blob storage on a local
40 : // file system.
41 : type SecondaryCacheMetrics = sharedcache.Metrics
42 :
43 : // LevelMetrics holds per-level metrics such as the number of files and total
44 : // size of the files, and compaction related metrics.
45 : type LevelMetrics struct {
46 : // The number of sublevels within the level. The sublevel count corresponds
47 : // to the read amplification for the level. An empty level will have a
48 : // sublevel count of 0, implying no read amplification. Only L0 will have
49 : // a sublevel count other than 0 or 1.
50 : Sublevels int32
51 : // The total count of sstables in the level.
52 : TablesCount int64
53 : // The total size in bytes of the sstables in the level. Note that if tables
54 : // contain references to blob files, this quantity does not include the the
55 : // size of the blob files or the referenced values.
56 : TablesSize int64
57 : // The total number of virtual sstables in the level.
58 : VirtualTablesCount uint64
59 : // The total size of the virtual sstables in the level.
60 : VirtualTablesSize uint64
61 : // The estimated total physical size of all blob references across all
62 : // sstables in the level. The physical size is estimated based on the size
63 : // of referenced values and the values' blob file's compression ratios.
64 : EstimatedReferencesSize uint64
65 : // The level's compaction score, used to rank levels (0 if the level doesn't
66 : // need compaction). See candidateLevelInfo.
67 : Score float64
68 : // The level's fill factor (the ratio between the size of the level and the
69 : // ideal size). See candidateLevelInfo.
70 : FillFactor float64
71 : // The level's compensated fill factor. See candidateLevelInfo.
72 : CompensatedFillFactor float64
73 : // The number of incoming bytes from other levels' sstables read during
74 : // compactions. This excludes bytes moved and bytes ingested. For L0 this is
75 : // the bytes written to the WAL.
76 : TableBytesIn uint64
77 : // The number of sstable bytes ingested. The sibling metric for tables is
78 : // TablesIngested.
79 : TableBytesIngested uint64
80 : // The number of sstable bytes moved into the level by a "move" compaction.
81 : // The sibling metric for tables is TablesMoved.
82 : TableBytesMoved uint64
83 : // The number of bytes read for compactions at the level. This includes bytes
84 : // read from other levels (BytesIn), as well as bytes read for the level.
85 : TableBytesRead uint64
86 : // The number of bytes written to sstables during compactions. The sibling
87 : // metric for tables is TablesCompacted. This metric may be summed with
88 : // BytesFlushed to compute the total bytes written for the level.
89 : TableBytesCompacted uint64
90 : // The number of bytes written to sstables during flushes. The sibling
91 : // metrics for tables is TablesFlushed. This metric is always zero for all
92 : // levels other than L0.
93 : TableBytesFlushed uint64
94 : // The number of sstables compacted to this level.
95 : TablesCompacted uint64
96 : // The number of sstables flushed to this level.
97 : TablesFlushed uint64
98 : // The number of sstables ingested into the level.
99 : TablesIngested uint64
100 : // The number of sstables moved to this level by a "move" compaction.
101 : TablesMoved uint64
102 : // The number of sstables deleted in a level by a delete-only compaction.
103 : TablesDeleted uint64
104 : // The number of sstables excised in a level by a delete-only compaction.
105 : TablesExcised uint64
106 : // BlobBytesReadEstimate is an estimate of the physical bytes corresponding
107 : // to values referenced by sstables that were inputs into compactions
108 : // outputting into this level.
109 : BlobBytesReadEstimate uint64
110 : // BlobBytesCompacted is the number of bytes written to blob files while
111 : // compacting sstables in this level.
112 : BlobBytesCompacted uint64
113 : // BlobBytesFlushed is the number of bytes written to blob files while
114 : // flushing sstables. This metric is always zero for all levels other than
115 : // L0.
116 : BlobBytesFlushed uint64
117 :
118 : MultiLevel struct {
119 : // TableBytesInTop are the total bytes in a multilevel compaction coming
120 : // from the top level.
121 : TableBytesInTop uint64
122 :
123 : // TableBytesIn, exclusively for multiLevel compactions.
124 : TableBytesIn uint64
125 :
126 : // TableBytesRead, exclusively for multilevel compactions.
127 : TableBytesRead uint64
128 : }
129 :
130 : // Additional contains misc additional metrics that are not always printed.
131 : Additional struct {
132 : // The sum of Properties.ValueBlocksSize for all the sstables in this
133 : // level. Printed by LevelMetrics.format iff there is at least one level
134 : // with a non-zero value.
135 : ValueBlocksSize uint64
136 : // Cumulative metrics about bytes written to data blocks and value blocks,
137 : // via compactions (except move compactions) or flushes. Not printed by
138 : // LevelMetrics.format, but are available to sophisticated clients.
139 : BytesWrittenDataBlocks uint64
140 : BytesWrittenValueBlocks uint64
141 : }
142 : }
143 :
144 : // AggregateSize returns an estimated physical size of the level's sstables and
145 : // their referenced values stored in blob files. The size of physical sstables
146 : // is exactly known. Virtual sstables' sizes are estimated, and the size of
147 : // values stored in blob files is estimated based on the volume of referenced
148 : // data and the blob file's compression ratio.
149 1 : func (m *LevelMetrics) AggregateSize() int64 {
150 1 : return m.TablesSize + int64(m.EstimatedReferencesSize)
151 1 : }
152 :
153 : // Add updates the counter metrics for the level.
154 1 : func (m *LevelMetrics) Add(u *LevelMetrics) {
155 1 : m.TablesCount += u.TablesCount
156 1 : m.TablesSize += u.TablesSize
157 1 : m.VirtualTablesCount += u.VirtualTablesCount
158 1 : m.VirtualTablesSize += u.VirtualTablesSize
159 1 : m.EstimatedReferencesSize += u.EstimatedReferencesSize
160 1 : m.TableBytesIn += u.TableBytesIn
161 1 : m.TableBytesIngested += u.TableBytesIngested
162 1 : m.TableBytesMoved += u.TableBytesMoved
163 1 : m.TableBytesRead += u.TableBytesRead
164 1 : m.TableBytesCompacted += u.TableBytesCompacted
165 1 : m.TableBytesFlushed += u.TableBytesFlushed
166 1 : m.TablesCompacted += u.TablesCompacted
167 1 : m.TablesFlushed += u.TablesFlushed
168 1 : m.TablesIngested += u.TablesIngested
169 1 : m.TablesMoved += u.TablesMoved
170 1 : m.BlobBytesCompacted += u.BlobBytesCompacted
171 1 : m.BlobBytesFlushed += u.BlobBytesFlushed
172 1 : m.BlobBytesReadEstimate += u.BlobBytesReadEstimate
173 1 : m.MultiLevel.TableBytesInTop += u.MultiLevel.TableBytesInTop
174 1 : m.MultiLevel.TableBytesRead += u.MultiLevel.TableBytesRead
175 1 : m.MultiLevel.TableBytesIn += u.MultiLevel.TableBytesIn
176 1 : m.Additional.BytesWrittenDataBlocks += u.Additional.BytesWrittenDataBlocks
177 1 : m.Additional.BytesWrittenValueBlocks += u.Additional.BytesWrittenValueBlocks
178 1 : m.Additional.ValueBlocksSize += u.Additional.ValueBlocksSize
179 1 : }
180 :
181 : // WriteAmp computes the write amplification for compactions at this
182 : // level.
183 : //
184 : // The write amplification is computed as the quantity of physical bytes written
185 : // divided by the quantity of logical bytes written.
186 : //
187 : // Concretely, it's computed as:
188 : //
189 : // TableBytesFlushed + TableBytesCompacted + BlobBytesFlushed + BlobBytesCompacted
190 : // -------------------------------------------------------------------------------
191 : // TableBytesIn
192 1 : func (m *LevelMetrics) WriteAmp() float64 {
193 1 : if m.TableBytesIn == 0 {
194 1 : return 0
195 1 : }
196 1 : return float64(m.TableBytesFlushed+m.TableBytesCompacted+m.BlobBytesFlushed+m.BlobBytesCompacted) /
197 1 : float64(m.TableBytesIn)
198 : }
199 :
200 : var categoryCompaction = block.RegisterCategory("pebble-compaction", block.NonLatencySensitiveQoSLevel)
201 : var categoryIngest = block.RegisterCategory("pebble-ingest", block.LatencySensitiveQoSLevel)
202 : var categoryGet = block.RegisterCategory("pebble-get", block.LatencySensitiveQoSLevel)
203 :
204 : // Metrics holds metrics for various subsystems of the DB such as the Cache,
205 : // Compactions, WAL, and per-Level metrics.
206 : //
207 : // TODO(peter): The testing of these metrics is relatively weak. There should
208 : // be testing that performs various operations on a DB and verifies that the
209 : // metrics reflect those operations.
210 : type Metrics struct {
211 : BlockCache CacheMetrics
212 :
213 : Compact struct {
214 : // The total number of compactions, and per-compaction type counts.
215 : Count int64
216 : DefaultCount int64
217 : DeleteOnlyCount int64
218 : ElisionOnlyCount int64
219 : CopyCount int64
220 : MoveCount int64
221 : ReadCount int64
222 : TombstoneDensityCount int64
223 : RewriteCount int64
224 : MultiLevelCount int64
225 : BlobFileRewriteCount int64
226 : CounterLevelCount int64
227 : // An estimate of the number of bytes that need to be compacted for the LSM
228 : // to reach a stable state.
229 : EstimatedDebt uint64
230 : // Number of bytes present in sstables being written by in-progress
231 : // compactions. This value will be zero if there are no in-progress
232 : // compactions.
233 : InProgressBytes int64
234 : // Number of compactions that are in-progress.
235 : NumInProgress int64
236 : // Number of compactions that were cancelled.
237 : CancelledCount int64
238 : // CancelledBytes the number of bytes written by compactions that were
239 : // cancelled.
240 : CancelledBytes int64
241 : // Total number of compactions that hit an error.
242 : FailedCount int64
243 : // NumProblemSpans is the current (instantaneous) count of "problem spans"
244 : // which temporarily block compactions.
245 : NumProblemSpans int
246 : // MarkedFiles is a count of files that are marked for
247 : // compaction. Such files are compacted in a rewrite compaction
248 : // when no other compactions are picked.
249 : MarkedFiles int
250 : // Duration records the cumulative duration of all compactions since the
251 : // database was opened.
252 : Duration time.Duration
253 : }
254 :
255 : Ingest struct {
256 : // The total number of ingestions
257 : Count uint64
258 : }
259 :
260 : Flush struct {
261 : // The total number of flushes.
262 : Count int64
263 : // TODO(sumeer): the IdleDuration in this metric is flawed. It only
264 : // measures idle duration when a flush finishes, representing the idleness
265 : // before the start of a flush. So computing deltas over this metric over
266 : // some time interval D may observe the sum of IdleDuration+WorkDuration
267 : // to be either much smaller or much larger than D.
268 : WriteThroughput ThroughputMetric
269 : // Number of flushes that are in-progress. In the current implementation
270 : // this will always be zero or one.
271 : NumInProgress int64
272 : // AsIngestCount is a monotonically increasing counter of flush operations
273 : // handling ingested tables.
274 : AsIngestCount uint64
275 : // AsIngestCount is a monotonically increasing counter of tables ingested as
276 : // flushables.
277 : AsIngestTableCount uint64
278 : // AsIngestBytes is a monotonically increasing counter of the bytes flushed
279 : // for flushables that originated as ingestion operations.
280 : AsIngestBytes uint64
281 : }
282 :
283 : Filter FilterMetrics
284 :
285 : Levels [numLevels]LevelMetrics
286 :
287 : MemTable struct {
288 : // The number of bytes allocated by memtables and large (flushable)
289 : // batches.
290 : Size uint64
291 : // The count of memtables.
292 : Count int64
293 : // The number of bytes present in zombie memtables which are no longer
294 : // referenced by the current DB state. An unbounded number of memtables
295 : // may be zombie if they're still in use by an iterator. One additional
296 : // memtable may be zombie if it's no longer in use and waiting to be
297 : // recycled.
298 : ZombieSize uint64
299 : // The count of zombie memtables.
300 : ZombieCount int64
301 : }
302 :
303 : Keys struct {
304 : // The approximate count of internal range key set keys in the database.
305 : RangeKeySetsCount uint64
306 : // The approximate count of internal tombstones (DEL, SINGLEDEL and
307 : // RANGEDEL key kinds) within the database.
308 : TombstoneCount uint64
309 : // A cumulative total number of missized DELSIZED keys encountered by
310 : // compactions since the database was opened.
311 : MissizedTombstonesCount uint64
312 : }
313 :
314 : Snapshots struct {
315 : // The number of currently open snapshots.
316 : Count int
317 : // The sequence number of the earliest, currently open snapshot.
318 : EarliestSeqNum base.SeqNum
319 : // A running tally of keys written to sstables during flushes or
320 : // compactions that would've been elided if it weren't for open
321 : // snapshots.
322 : PinnedKeys uint64
323 : // A running cumulative sum of the size of keys and values written to
324 : // sstables during flushes or compactions that would've been elided if
325 : // it weren't for open snapshots.
326 : PinnedSize uint64
327 : }
328 :
329 : Table struct {
330 : // The number of bytes present in obsolete tables which are no longer
331 : // referenced by the current DB state or any open iterators.
332 : ObsoleteSize uint64
333 : // The count of obsolete tables.
334 : ObsoleteCount int64
335 : // The number of bytes present in zombie tables which are no longer
336 : // referenced by the current DB state but are still in use by an iterator.
337 : ZombieSize uint64
338 : // The count of zombie tables.
339 : ZombieCount int64
340 : // The count of sstables backing virtual tables.
341 : BackingTableCount uint64
342 : // The sum of the sizes of the BackingTableCount sstables that are backing virtual tables.
343 : BackingTableSize uint64
344 : // The number of sstables that are compressed with an unknown compression
345 : // algorithm.
346 : CompressedCountUnknown int64
347 : // The number of sstables that are compressed with the default compression
348 : // algorithm, snappy.
349 : CompressedCountSnappy int64
350 : // The number of sstables that are compressed with zstd.
351 : CompressedCountZstd int64
352 : // The number of sstables that are compressed with minlz.
353 : CompressedCountMinLZ int64
354 : // The number of sstables that are uncompressed.
355 : CompressedCountNone int64
356 :
357 : // Local file sizes.
358 : Local struct {
359 : // LiveSize is the number of bytes in live tables.
360 : LiveSize uint64
361 : // LiveCount is the number of live tables.
362 : LiveCount uint64
363 : // ObsoleteSize is the number of bytes in obsolete tables.
364 : ObsoleteSize uint64
365 : // ObsoleteCount is the number of obsolete tables.
366 : ObsoleteCount uint64
367 : // ZombieSize is the number of bytes in zombie tables.
368 : ZombieSize uint64
369 : // ZombieCount is the number of zombie tables.
370 : ZombieCount uint64
371 : }
372 :
373 : // Garbage bytes.
374 : Garbage struct {
375 : // PointDeletionsBytesEstimate is the estimated file bytes that will be
376 : // saved by compacting all point deletions. This is dependent on table
377 : // stats collection, so can be very incomplete until
378 : // InitialStatsCollectionComplete becomes true.
379 : PointDeletionsBytesEstimate uint64
380 : // RangeDeletionsBytesEstimate is the estimated file bytes that will be
381 : // saved by compacting all range deletions. This is dependent on table
382 : // stats collection, so can be very incomplete until
383 : // InitialStatsCollectionComplete becomes true.
384 : RangeDeletionsBytesEstimate uint64
385 : }
386 :
387 : // Whether the initial stats collection (for existing tables on Open) is
388 : // complete.
389 : InitialStatsCollectionComplete bool
390 : // The count of recently created sstables that need stats collection. This
391 : // does not include sstables that existed when the DB was opened, so the
392 : // value is only useful when InitialStatsCollectionComplete is true.
393 : PendingStatsCollectionCount int64
394 : }
395 :
396 : BlobFiles struct {
397 : // The count of all live blob files.
398 : LiveCount uint64
399 : // The physical file size of all live blob files.
400 : LiveSize uint64
401 : // ValueSize is the sum of the length of the uncompressed values in all
402 : // live (referenced by some sstable(s) within the current version) blob
403 : // files. ValueSize may be greater than LiveSize when compression is
404 : // effective. ValueSize includes bytes in live blob files that are not
405 : // actually reachable by any sstable key. If any value within the blob
406 : // file is reachable by a key in a live sstable, then the entirety of
407 : // the blob file's values are included within ValueSize.
408 : ValueSize uint64
409 : // ReferencedValueSize is the sum of the length of the uncompressed
410 : // values (in all live blob files) that are still referenced by keys
411 : // within live tables. Over the lifetime of a blob file, a blob file's
412 : // references are removed as some compactions choose to write new blob
413 : // files containing the same values or keys referencing the file's
414 : // values are deleted. ReferencedValueSize accounts the volume of bytes
415 : // that are actually reachable by some key in a live table.
416 : //
417 : // The difference between ValueSize and ReferencedValueSize is
418 : // (uncompressed) space amplification that could be reclaimed if all
419 : // blob files were rewritten, discarding values that are no longer
420 : // referenced by any keys in any sstables within the current version.
421 : ReferencedValueSize uint64
422 : // The count of all obsolete blob files.
423 : ObsoleteCount uint64
424 : // The physical size of all obsolete blob files.
425 : ObsoleteSize uint64
426 : // The count of all zombie blob files.
427 : ZombieCount uint64
428 : // The physical size of all zombie blob files.
429 : ZombieSize uint64
430 : // Local file sizes.
431 : Local struct {
432 : // LiveSize is the physical size of local live blob files.
433 : LiveSize uint64
434 : // LiveCount is the number of local live blob files.
435 : LiveCount uint64
436 : // ObsoleteSize is the physical size of local obsolete blob files.
437 : ObsoleteSize uint64
438 : // ObsoleteCount is the number of local obsolete blob files.
439 : ObsoleteCount uint64
440 : // ZombieSize is the physical size of local zombie blob files.
441 : ZombieSize uint64
442 : // ZombieCount is the number of local zombie blob files.
443 : ZombieCount uint64
444 : }
445 : }
446 :
447 : FileCache FileCacheMetrics
448 :
449 : // Count of the number of open sstable iterators.
450 : TableIters int64
451 : // Uptime is the total time since this DB was opened.
452 : Uptime time.Duration
453 :
454 : WAL struct {
455 : // Number of live WAL files.
456 : Files int64
457 : // Number of obsolete WAL files.
458 : ObsoleteFiles int64
459 : // Physical size of the obsolete WAL files.
460 : ObsoletePhysicalSize uint64
461 : // Size of the live data in the WAL files. Note that with WAL file
462 : // recycling this is less than the actual on-disk size of the WAL files.
463 : Size uint64
464 : // Physical size of the WAL files on-disk. With WAL file recycling,
465 : // this is greater than the live data in WAL files.
466 : //
467 : // TODO(sumeer): it seems this does not include ObsoletePhysicalSize.
468 : // Should the comment be updated?
469 : PhysicalSize uint64
470 : // Number of logical bytes written to the WAL.
471 : BytesIn uint64
472 : // Number of bytes written to the WAL.
473 : BytesWritten uint64
474 : // Failover contains failover stats. Empty if failover is not enabled.
475 : Failover wal.FailoverStats
476 : }
477 :
478 : LogWriter struct {
479 : FsyncLatency prometheus.Histogram
480 : record.LogWriterMetrics
481 : }
482 :
483 : CategoryStats []block.CategoryStatsAggregate
484 :
485 : SecondaryCacheMetrics SecondaryCacheMetrics
486 :
487 : private struct {
488 : optionsFileSize uint64
489 : manifestFileSize uint64
490 : }
491 :
492 : manualMemory manual.Metrics
493 : }
494 :
495 : var (
496 : // FsyncLatencyBuckets are prometheus histogram buckets suitable for a histogram
497 : // that records latencies for fsyncs.
498 : FsyncLatencyBuckets = append(
499 : prometheus.LinearBuckets(0.0, float64(time.Microsecond*100), 50),
500 : prometheus.ExponentialBucketsRange(float64(time.Millisecond*5), float64(10*time.Second), 50)...,
501 : )
502 :
503 : // SecondaryCacheIOBuckets exported to enable exporting from package pebble to
504 : // enable exporting metrics with below buckets in CRDB.
505 : SecondaryCacheIOBuckets = sharedcache.IOBuckets
506 : // SecondaryCacheChannelWriteBuckets exported to enable exporting from package
507 : // pebble to enable exporting metrics with below buckets in CRDB.
508 : SecondaryCacheChannelWriteBuckets = sharedcache.ChannelWriteBuckets
509 : )
510 :
511 : // DiskSpaceUsage returns the total disk space used by the database in bytes,
512 : // including live and obsolete files. This only includes local files, i.e.,
513 : // remote files (as known to objstorage.Provider) are not included.
514 1 : func (m *Metrics) DiskSpaceUsage() uint64 {
515 1 : var usageBytes uint64
516 1 : usageBytes += m.WAL.PhysicalSize
517 1 : usageBytes += m.WAL.ObsoletePhysicalSize
518 1 : usageBytes += m.Table.Local.LiveSize
519 1 : usageBytes += m.Table.Local.ObsoleteSize
520 1 : usageBytes += m.Table.Local.ZombieSize
521 1 : usageBytes += m.BlobFiles.Local.LiveSize
522 1 : usageBytes += m.BlobFiles.Local.ObsoleteSize
523 1 : usageBytes += m.BlobFiles.Local.ZombieSize
524 1 : usageBytes += m.private.optionsFileSize
525 1 : usageBytes += m.private.manifestFileSize
526 1 : // TODO(sumeer): InProgressBytes does not distinguish between local and
527 1 : // remote files. This causes a small error. Fix.
528 1 : usageBytes += uint64(m.Compact.InProgressBytes)
529 1 : return usageBytes
530 1 : }
531 :
532 : // NumVirtual is the number of virtual sstables in the latest version
533 : // summed over every level in the lsm.
534 1 : func (m *Metrics) NumVirtual() uint64 {
535 1 : var n uint64
536 1 : for _, level := range m.Levels {
537 1 : n += level.VirtualTablesCount
538 1 : }
539 1 : return n
540 : }
541 :
542 : // VirtualSize is the sum of the sizes of the virtual sstables in the
543 : // latest version. BackingTableSize - VirtualSize gives an estimate for
544 : // the space amplification caused by not compacting virtual sstables.
545 1 : func (m *Metrics) VirtualSize() uint64 {
546 1 : var size uint64
547 1 : for _, level := range m.Levels {
548 1 : size += level.VirtualTablesSize
549 1 : }
550 1 : return size
551 : }
552 :
553 : // ReadAmp returns the current read amplification of the database.
554 : // It's computed as the number of sublevels in L0 + the number of non-empty
555 : // levels below L0.
556 1 : func (m *Metrics) ReadAmp() int {
557 1 : var ramp int32
558 1 : for _, l := range m.Levels {
559 1 : ramp += l.Sublevels
560 1 : }
561 1 : return int(ramp)
562 : }
563 :
564 : // Total returns the sum of the per-level metrics and WAL metrics.
565 1 : func (m *Metrics) Total() LevelMetrics {
566 1 : var total LevelMetrics
567 1 : for level := 0; level < numLevels; level++ {
568 1 : l := &m.Levels[level]
569 1 : total.Add(l)
570 1 : total.Sublevels += l.Sublevels
571 1 : }
572 : // Compute total bytes-in as the bytes written to the WAL + bytes ingested.
573 1 : total.TableBytesIn = m.WAL.BytesWritten + total.TableBytesIngested
574 1 : // Add the total bytes-in to the total bytes-flushed. This is to account for
575 1 : // the bytes written to the log and bytes written externally and then
576 1 : // ingested.
577 1 : total.TableBytesFlushed += total.TableBytesIn
578 1 : return total
579 : }
580 :
581 : // RemoteTablesTotal returns the total number of remote tables and their total
582 : // size. Remote tables are computed as the difference between total tables
583 : // (live + obsolete + zombie) and local tables.
584 1 : func (m *Metrics) RemoteTablesTotal() (count uint64, size uint64) {
585 1 : var liveTables, liveTableBytes int64
586 1 : for level := 0; level < numLevels; level++ {
587 1 : liveTables += m.Levels[level].TablesCount
588 1 : liveTableBytes += m.Levels[level].TablesSize
589 1 : }
590 1 : totalCount := liveTables + m.Table.ObsoleteCount + m.Table.ZombieCount
591 1 : localCount := m.Table.Local.LiveCount + m.Table.Local.ObsoleteCount + m.Table.Local.ZombieCount
592 1 : remoteCount := uint64(totalCount) - localCount
593 1 :
594 1 : totalSize := uint64(liveTableBytes) + m.Table.ObsoleteSize + m.Table.ZombieSize
595 1 : localSize := m.Table.Local.LiveSize + m.Table.Local.ObsoleteSize + m.Table.Local.ZombieSize
596 1 : remoteSize := totalSize - localSize
597 1 :
598 1 : return remoteCount, remoteSize
599 : }
600 :
601 : // String pretty-prints the metrics as below (semi-adjusted visually to avoid
602 : // the crlfmt from auto-reformatting):
603 : //
604 : // | | | | ingested | moved | written | | amp | val sep | multilevel
605 : // level | tables size val-bl vtables | score ff cff | in | tables size | tables size |tables size| read | r w | refsz valblk| top in read
606 : // ------+-----------------------------+----------------+-------+--------------+--------------+-----------+-------+---------+--------------+------------------
607 : // 0 | 101 102B 0B 101 | 1.10 2.10 0.30 | 104B | 112 104B | 113 106B | 221 217B| 107B | 1 2.09 | 114B 0B| 104B 104B 104B
608 : // 1 | 201 202B 0B 201 | 1.20 2.20 0.60 | 204B | 212 204B | 213 206B | 421 417B| 207B | 2 2.04 | 214B 0B| 204B 204B 204B
609 : // 2 | 301 302B 0B 301 | 1.30 2.30 0.90 | 304B | 312 304B | 313 306B | 621 617B| 307B | 3 2.03 | 314B 0B| 304B 304B 304B
610 : // 3 | 401 402B 0B 401 | 1.40 2.40 1.20 | 404B | 412 404B | 413 406B | 821 817B| 407B | 4 2.02 | 414B 0B| 404B 404B 404B
611 : // 4 | 501 502B 0B 501 | 1.50 2.50 1.50 | 504B | 512 504B | 513 506B |1.0K 1017B| 507B | 5 2.02 | 514B 0B| 504B 504B 504B
612 : // 5 | 601 602B 0B 601 | 1.60 2.60 1.80 | 604B | 612 604B | 613 606B |1.2K 1.2KB| 607B | 6 2.01 | 614B 0B| 604B 604B 604B
613 : // 6 | 701 702B 0B 701 | - 2.70 2.10 | 704B | 712 704B | 713 706B |1.4K 1.4KB| 707B | 7 2.01 | 714B 0B| 704B 704B 704B
614 : // total | 2.8K 2.7KB 0B 2.8K | - - - | 2.8KB | 2.9K 2.8KB | 2.9K 2.8KB |5.7K 8.4KB| 2.8KB | 28 3.00 |2.8KB 0B| 2.8KB 2.8KB 2.8KB
615 : //
616 : // WAL: 22 files (24B) in: 25B written: 26B (4% overhead)
617 : // Flushes: 8
618 : // Compactions: 5 estimated debt: 6B in progress: 2 (7B)
619 : // default: 27 delete: 28 elision: 29 move: 30 read: 31 tombstone-density: 16 rewrite: 32 copy: 33 multi-level: 34
620 : // MemTables: 12 (11B) zombie: 14 (13B)
621 : // Zombie tables: 16 (15B, local: 30B)
622 : // Backing tables: 1 (2.0MB)
623 : // Virtual tables: 2807 (2.8KB)
624 : // Local tables size: 28B
625 : // Compression types:
626 : // Table stats: 31
627 : // Block cache: 2 entries (1B) hit rate: 42.9%
628 : // Table cache: 18 entries (17B) hit rate: 48.7%
629 : // Range key sets: 123 Tombstones: 456 Total missized tombstones encountered: 789
630 : // Snapshots: 4 earliest seq num: 1024
631 : // Table iters: 21
632 : // Filter utility: 47.4%
633 : // Ingestions: 27 as flushable: 36 (34B in 35 tables)
634 : // Cgo memory usage: 15KB block cache: 9.0KB (data: 4.0KB, maps: 2.0KB, entries: 3.0KB) memtables: 5.0KB
635 : //
636 : //nolint:lll
637 1 : func (m *Metrics) String() string {
638 1 : return redact.StringWithoutMarkers(m)
639 1 : }
640 :
641 : var _ redact.SafeFormatter = &Metrics{}
642 :
643 : // SafeFormat implements redact.SafeFormatter.
644 1 : func (m *Metrics) SafeFormat(w redact.SafePrinter, _ rune) {
645 1 : // NB: Pebble does not make any assumptions as to which Go primitive types
646 1 : // have been registered as safe with redact.RegisterSafeType and does not
647 1 : // register any types itself. Some of the calls to `redact.Safe`, etc are
648 1 : // superfluous in the context of CockroachDB, which registers all the Go
649 1 : // numeric types as safe.
650 1 :
651 1 : multiExists := m.Compact.MultiLevelCount > 0
652 1 : appendIfMulti := func(line redact.SafeString) {
653 1 : if multiExists {
654 1 : w.SafeString(line)
655 1 : }
656 : }
657 1 : newline := func() {
658 1 : w.SafeString("\n")
659 1 : }
660 :
661 1 : w.SafeString(" | | | | ingested | moved | written | | amp | val sep")
662 1 : appendIfMulti(" | multilevel")
663 1 : newline()
664 1 : w.SafeString("level | tables size val-bl vtables | score ff cff | in | tables size | tables size | tables size | read | r w | refsz valblk")
665 1 : appendIfMulti(" | top in read")
666 1 : newline()
667 1 : w.SafeString("------+-----------------------------+----------------+-------+--------------+--------------+--------------+-------+----------+--------------")
668 1 : appendIfMulti("-+------------------")
669 1 : newline()
670 1 :
671 1 : // formatRow prints out a row of the table.
672 1 : formatRow := func(m *LevelMetrics) {
673 1 : score := m.Score
674 1 : if score == 0 {
675 1 : // Format a zero level score as a dash.
676 1 : score = math.NaN()
677 1 : }
678 1 : w.Printf("| %5s %6s %6s %7s | %4s %4s %4s | %5s | %5s %6s | %5s %6s | %5s %6s | %5s | %3d %4s | %5s %7s",
679 1 : humanize.Count.Int64(m.TablesCount),
680 1 : humanize.Bytes.Int64(m.TablesSize),
681 1 : humanize.Bytes.Uint64(m.Additional.ValueBlocksSize),
682 1 : humanize.Count.Uint64(m.VirtualTablesCount),
683 1 : humanizeFloat(score, 4),
684 1 : humanizeFloat(m.FillFactor, 4),
685 1 : humanizeFloat(m.CompensatedFillFactor, 4),
686 1 : humanize.Bytes.Uint64(m.TableBytesIn),
687 1 : humanize.Count.Uint64(m.TablesIngested),
688 1 : humanize.Bytes.Uint64(m.TableBytesIngested),
689 1 : humanize.Count.Uint64(m.TablesMoved),
690 1 : humanize.Bytes.Uint64(m.TableBytesMoved),
691 1 : humanize.Count.Uint64(m.TablesFlushed+m.TablesCompacted),
692 1 : humanize.Bytes.Uint64(m.TableBytesFlushed+m.TableBytesCompacted),
693 1 : humanize.Bytes.Uint64(m.TableBytesRead),
694 1 : redact.Safe(m.Sublevels),
695 1 : humanizeFloat(m.WriteAmp(), 4),
696 1 : humanize.Bytes.Uint64(m.EstimatedReferencesSize),
697 1 : humanize.Bytes.Uint64(m.Additional.ValueBlocksSize),
698 1 : )
699 1 :
700 1 : if multiExists {
701 1 : w.Printf(" | %5s %5s %5s",
702 1 : humanize.Bytes.Uint64(m.MultiLevel.TableBytesInTop),
703 1 : humanize.Bytes.Uint64(m.MultiLevel.TableBytesIn),
704 1 : humanize.Bytes.Uint64(m.MultiLevel.TableBytesRead))
705 1 : }
706 1 : newline()
707 : }
708 :
709 1 : var total LevelMetrics
710 1 : for level := 0; level < numLevels; level++ {
711 1 : l := &m.Levels[level]
712 1 : w.Printf("%5d ", redact.Safe(level))
713 1 : formatRow(l)
714 1 : total.Add(l)
715 1 : total.Sublevels += l.Sublevels
716 1 : }
717 : // Compute total bytes-in as the bytes written to the WAL + bytes ingested.
718 1 : total.TableBytesIn = m.WAL.BytesWritten + total.TableBytesIngested
719 1 : // Add the total bytes-in to the total bytes-flushed. This is to account for
720 1 : // the bytes written to the log and bytes written externally and then
721 1 : // ingested.
722 1 : total.TableBytesFlushed += total.TableBytesIn
723 1 : total.Score = math.NaN()
724 1 : total.FillFactor = math.NaN()
725 1 : total.CompensatedFillFactor = math.NaN()
726 1 : w.SafeString("total ")
727 1 : formatRow(&total)
728 1 :
729 1 : w.SafeString("--------------------------------------------------------------------------------------------------------------------------------------------")
730 1 : appendIfMulti("--------------------")
731 1 : newline()
732 1 : w.Printf("WAL: %d files (%s) in: %s written: %s (%.0f%% overhead)",
733 1 : redact.Safe(m.WAL.Files),
734 1 : humanize.Bytes.Uint64(m.WAL.Size),
735 1 : humanize.Bytes.Uint64(m.WAL.BytesIn),
736 1 : humanize.Bytes.Uint64(m.WAL.BytesWritten),
737 1 : redact.Safe(percent(int64(m.WAL.BytesWritten)-int64(m.WAL.BytesIn), int64(m.WAL.BytesIn))))
738 1 : failoverStats := m.WAL.Failover
739 1 : failoverStats.FailoverWriteAndSyncLatency = nil
740 1 : if failoverStats == (wal.FailoverStats{}) {
741 1 : w.Printf("\n")
742 1 : } else {
743 0 : w.Printf(" failover: (switches: %d, primary: %s, secondary: %s)\n", m.WAL.Failover.DirSwitchCount,
744 0 : m.WAL.Failover.PrimaryWriteDuration.String(), m.WAL.Failover.SecondaryWriteDuration.String())
745 0 : }
746 :
747 1 : w.Printf("Flushes: %d\n", redact.Safe(m.Flush.Count))
748 1 :
749 1 : w.Printf("Compactions: %d estimated debt: %s in progress: %d (%s) canceled: %d (%s) failed: %d problem spans: %d\n",
750 1 : redact.Safe(m.Compact.Count),
751 1 : humanize.Bytes.Uint64(m.Compact.EstimatedDebt),
752 1 : redact.Safe(m.Compact.NumInProgress),
753 1 : humanize.Bytes.Int64(m.Compact.InProgressBytes),
754 1 : redact.Safe(m.Compact.CancelledCount),
755 1 : humanize.Bytes.Int64(m.Compact.CancelledBytes),
756 1 : redact.Safe(m.Compact.FailedCount),
757 1 : redact.Safe(m.Compact.NumProblemSpans),
758 1 : )
759 1 :
760 1 : w.Printf(" default: %d delete: %d elision: %d move: %d read: %d tombstone-density: %d rewrite: %d copy: %d multi-level: %d blob-file-rewrite: %d\n",
761 1 : redact.Safe(m.Compact.DefaultCount),
762 1 : redact.Safe(m.Compact.DeleteOnlyCount),
763 1 : redact.Safe(m.Compact.ElisionOnlyCount),
764 1 : redact.Safe(m.Compact.MoveCount),
765 1 : redact.Safe(m.Compact.ReadCount),
766 1 : redact.Safe(m.Compact.TombstoneDensityCount),
767 1 : redact.Safe(m.Compact.RewriteCount),
768 1 : redact.Safe(m.Compact.CopyCount),
769 1 : redact.Safe(m.Compact.MultiLevelCount),
770 1 : redact.Safe(m.Compact.BlobFileRewriteCount),
771 1 : )
772 1 :
773 1 : w.Printf("MemTables: %d (%s) zombie: %d (%s)\n",
774 1 : redact.Safe(m.MemTable.Count),
775 1 : humanize.Bytes.Uint64(m.MemTable.Size),
776 1 : redact.Safe(m.MemTable.ZombieCount),
777 1 : humanize.Bytes.Uint64(m.MemTable.ZombieSize))
778 1 :
779 1 : w.Printf("Zombie tables: %d (%s, local: %s)\n",
780 1 : redact.Safe(m.Table.ZombieCount),
781 1 : humanize.Bytes.Uint64(m.Table.ZombieSize),
782 1 : humanize.Bytes.Uint64(m.Table.Local.ZombieSize))
783 1 :
784 1 : w.Printf("Backing tables: %d (%s)\n",
785 1 : redact.Safe(m.Table.BackingTableCount),
786 1 : humanize.Bytes.Uint64(m.Table.BackingTableSize))
787 1 : w.Printf("Virtual tables: %d (%s)\n",
788 1 : redact.Safe(m.NumVirtual()),
789 1 : humanize.Bytes.Uint64(m.VirtualSize()))
790 1 : w.Printf("Local tables size: %s\n", humanize.Bytes.Uint64(m.Table.Local.LiveSize))
791 1 : w.SafeString("Compression types:")
792 1 : if count := m.Table.CompressedCountSnappy; count > 0 {
793 1 : w.Printf(" snappy: %d", redact.Safe(count))
794 1 : }
795 1 : if count := m.Table.CompressedCountZstd; count > 0 {
796 0 : w.Printf(" zstd: %d", redact.Safe(count))
797 0 : }
798 1 : if count := m.Table.CompressedCountMinLZ; count > 0 {
799 0 : w.Printf(" minlz: %d", redact.Safe(count))
800 0 : }
801 1 : if count := m.Table.CompressedCountNone; count > 0 {
802 0 : w.Printf(" none: %d", redact.Safe(count))
803 0 : }
804 1 : if count := m.Table.CompressedCountUnknown; count > 0 {
805 1 : w.Printf(" unknown: %d", redact.Safe(count))
806 1 : }
807 1 : w.Printf("\n")
808 1 : if m.Table.Garbage.PointDeletionsBytesEstimate > 0 || m.Table.Garbage.RangeDeletionsBytesEstimate > 0 {
809 1 : w.Printf("Garbage: point-deletions %s range-deletions %s\n",
810 1 : humanize.Bytes.Uint64(m.Table.Garbage.PointDeletionsBytesEstimate),
811 1 : humanize.Bytes.Uint64(m.Table.Garbage.RangeDeletionsBytesEstimate))
812 1 : }
813 1 : w.Printf("Table stats: ")
814 1 : if !m.Table.InitialStatsCollectionComplete {
815 1 : w.Printf("initial load in progress")
816 1 : } else if m.Table.PendingStatsCollectionCount == 0 {
817 1 : w.Printf("all loaded")
818 1 : } else {
819 1 : w.Printf("%s", humanize.Count.Int64(m.Table.PendingStatsCollectionCount))
820 1 : }
821 1 : w.Printf("\n")
822 1 :
823 1 : w.Printf("Block cache: %s entries (%s) hit rate: %.1f%%\n",
824 1 : humanize.Count.Int64(m.BlockCache.Count),
825 1 : humanize.Bytes.Int64(m.BlockCache.Size),
826 1 : redact.Safe(hitRate(m.BlockCache.Hits, m.BlockCache.Misses)))
827 1 :
828 1 : w.Printf("File cache: %s tables, %s blobfiles (%s) hit rate: %.1f%%\n",
829 1 : humanize.Count.Int64(m.FileCache.TableCount),
830 1 : humanize.Count.Int64(m.FileCache.BlobFileCount),
831 1 : humanize.Bytes.Int64(m.FileCache.Size),
832 1 : redact.Safe(hitRate(m.FileCache.Hits, m.FileCache.Misses)))
833 1 :
834 1 : formatSharedCacheMetrics := func(w redact.SafePrinter, m *SecondaryCacheMetrics, name redact.SafeString) {
835 0 : w.Printf("%s: %s entries (%s) hit rate: %.1f%%\n",
836 0 : name,
837 0 : humanize.Count.Int64(m.Count),
838 0 : humanize.Bytes.Int64(m.Size),
839 0 : redact.Safe(hitRate(m.ReadsWithFullHit, m.ReadsWithPartialHit+m.ReadsWithNoHit)))
840 0 : }
841 1 : if m.SecondaryCacheMetrics.Size > 0 || m.SecondaryCacheMetrics.ReadsWithFullHit > 0 {
842 0 : formatSharedCacheMetrics(w, &m.SecondaryCacheMetrics, "Secondary cache")
843 0 : }
844 :
845 1 : w.Printf("Range key sets: %s Tombstones: %s Total missized tombstones encountered: %s\n",
846 1 : humanize.Count.Uint64(m.Keys.RangeKeySetsCount),
847 1 : humanize.Count.Uint64(m.Keys.TombstoneCount),
848 1 : humanize.Count.Uint64(m.Keys.MissizedTombstonesCount),
849 1 : )
850 1 :
851 1 : w.Printf("Snapshots: %d earliest seq num: %d\n",
852 1 : redact.Safe(m.Snapshots.Count),
853 1 : redact.Safe(m.Snapshots.EarliestSeqNum))
854 1 :
855 1 : w.Printf("Table iters: %d\n", redact.Safe(m.TableIters))
856 1 : w.Printf("Filter utility: %.1f%%\n", redact.Safe(hitRate(m.Filter.Hits, m.Filter.Misses)))
857 1 : w.Printf("Ingestions: %d as flushable: %d (%s in %d tables)\n",
858 1 : redact.Safe(m.Ingest.Count),
859 1 : redact.Safe(m.Flush.AsIngestCount),
860 1 : humanize.Bytes.Uint64(m.Flush.AsIngestBytes),
861 1 : redact.Safe(m.Flush.AsIngestTableCount))
862 1 :
863 1 : var inUseTotal uint64
864 1 : for i := range m.manualMemory {
865 1 : inUseTotal += m.manualMemory[i].InUseBytes
866 1 : }
867 1 : inUse := func(purpose manual.Purpose) uint64 {
868 1 : return m.manualMemory[purpose].InUseBytes
869 1 : }
870 1 : w.Printf("Cgo memory usage: %s block cache: %s (data: %s, maps: %s, entries: %s) memtables: %s\n",
871 1 : humanize.Bytes.Uint64(inUseTotal),
872 1 : humanize.Bytes.Uint64(inUse(manual.BlockCacheData)+inUse(manual.BlockCacheMap)+inUse(manual.BlockCacheEntry)),
873 1 : humanize.Bytes.Uint64(inUse(manual.BlockCacheData)),
874 1 : humanize.Bytes.Uint64(inUse(manual.BlockCacheMap)),
875 1 : humanize.Bytes.Uint64(inUse(manual.BlockCacheEntry)),
876 1 : humanize.Bytes.Uint64(inUse(manual.MemTable)),
877 1 : )
878 : }
879 :
880 1 : func hitRate(hits, misses int64) float64 {
881 1 : return percent(hits, hits+misses)
882 1 : }
883 :
884 1 : func percent(numerator, denominator int64) float64 {
885 1 : if denominator == 0 {
886 1 : return 0
887 1 : }
888 1 : return 100 * float64(numerator) / float64(denominator)
889 : }
890 :
891 : // StringForTests is identical to m.String() on 64-bit platforms. It is used to
892 : // provide a platform-independent result for tests.
893 1 : func (m *Metrics) StringForTests() string {
894 1 : mCopy := *m
895 1 :
896 1 : // We recalculate the file cache size using the 64-bit sizes, and we ignore
897 1 : // the genericcache metadata size which is harder to adjust.
898 1 : const sstableReaderSize64bit = 280
899 1 : const blobFileReaderSize64bit = 96
900 1 : mCopy.FileCache.Size = mCopy.FileCache.TableCount*sstableReaderSize64bit + mCopy.FileCache.BlobFileCount*blobFileReaderSize64bit
901 1 : if math.MaxInt == math.MaxInt64 {
902 1 : // Verify the 64-bit sizes, so they are kept updated.
903 1 : if sstableReaderSize64bit != unsafe.Sizeof(sstable.Reader{}) {
904 0 : panic(fmt.Sprintf("sstableReaderSize64bit should be updated to %d", unsafe.Sizeof(sstable.Reader{})))
905 : }
906 1 : if blobFileReaderSize64bit != unsafe.Sizeof(blob.FileReader{}) {
907 0 : panic(fmt.Sprintf("blobFileReaderSize64bit should be updated to %d", unsafe.Sizeof(blob.FileReader{})))
908 : }
909 : }
910 : // Don't show cgo memory statistics as they can vary based on architecture,
911 : // invariants tag, etc.
912 1 : mCopy.manualMemory = manual.Metrics{}
913 1 : return redact.StringWithoutMarkers(&mCopy)
914 : }
915 :
916 : // levelMetricsDelta accumulates incremental ("delta") level metric updates
917 : // (e.g. from compactions or flushes).
918 : type levelMetricsDelta [manifest.NumLevels]*LevelMetrics
919 :
920 1 : func (m *levelMetricsDelta) level(level int) *LevelMetrics {
921 1 : if m[level] == nil {
922 1 : m[level] = &LevelMetrics{}
923 1 : }
924 1 : return m[level]
925 : }
926 :
927 1 : func (m *Metrics) updateLevelMetrics(updates levelMetricsDelta) {
928 1 : for i, u := range updates {
929 1 : if u != nil {
930 1 : m.Levels[i].Add(u)
931 1 : }
932 : }
933 : }
934 :
935 : // humanizeFloat formats a float64 value as a string. It shows up to two
936 : // decimals, depending on the target length. NaN is shown as "-".
937 1 : func humanizeFloat(v float64, targetLength int) redact.SafeString {
938 1 : if math.IsNaN(v) {
939 1 : return "-"
940 1 : }
941 : // We treat 0 specially. Values near zero will show up as 0.00.
942 1 : if v == 0 {
943 1 : return "0"
944 1 : }
945 1 : res := fmt.Sprintf("%.2f", v)
946 1 : if len(res) <= targetLength {
947 1 : return redact.SafeString(res)
948 1 : }
949 1 : if len(res) == targetLength+1 {
950 1 : return redact.SafeString(fmt.Sprintf("%.1f", v))
951 1 : }
952 1 : return redact.SafeString(fmt.Sprintf("%.0f", v))
953 : }
|