Line data Source code
1 : // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package pebble
6 :
7 : import (
8 : "fmt"
9 : "math"
10 : "time"
11 :
12 : "github.com/cockroachdb/pebble/internal/base"
13 : "github.com/cockroachdb/pebble/internal/cache"
14 : "github.com/cockroachdb/pebble/internal/humanize"
15 : "github.com/cockroachdb/pebble/internal/manifest"
16 : "github.com/cockroachdb/pebble/internal/manual"
17 : "github.com/cockroachdb/pebble/objstorage/objstorageprovider/sharedcache"
18 : "github.com/cockroachdb/pebble/record"
19 : "github.com/cockroachdb/pebble/sstable"
20 : "github.com/cockroachdb/pebble/sstable/block"
21 : "github.com/cockroachdb/pebble/wal"
22 : "github.com/cockroachdb/redact"
23 : "github.com/prometheus/client_golang/prometheus"
24 : )
25 :
26 : // CacheMetrics holds metrics for the block and file cache.
27 : type CacheMetrics = cache.Metrics
28 :
29 : // FilterMetrics holds metrics for the filter policy
30 : type FilterMetrics = sstable.FilterMetrics
31 :
32 : // ThroughputMetric is a cumulative throughput metric. See the detailed
33 : // comment in base.
34 : type ThroughputMetric = base.ThroughputMetric
35 :
36 : // SecondaryCacheMetrics holds metrics for the persistent secondary cache
37 : // that caches commonly accessed blocks from blob storage on a local
38 : // file system.
39 : type SecondaryCacheMetrics = sharedcache.Metrics
40 :
41 : // LevelMetrics holds per-level metrics such as the number of files and total
42 : // size of the files, and compaction related metrics.
43 : type LevelMetrics struct {
44 : // The number of sublevels within the level. The sublevel count corresponds
45 : // to the read amplification for the level. An empty level will have a
46 : // sublevel count of 0, implying no read amplification. Only L0 will have
47 : // a sublevel count other than 0 or 1.
48 : Sublevels int32
49 : // The total count of sstables in the level.
50 : TablesCount int64
51 : // The total size in bytes of the sstables in the level. Note that if tables
52 : // contain references to blob files, this quantity does not include the the
53 : // size of the blob files or the referenced values.
54 : TablesSize int64
55 : // The total number of virtual sstables in the level.
56 : VirtualTablesCount uint64
57 : // The total size of the virtual sstables in the level.
58 : VirtualTablesSize uint64
59 : // The estimated total physical size of all blob references across all
60 : // sstables in the level. The physical size is estimated based on the size
61 : // of referenced values and the values' blob file's compression ratios.
62 : EstimatedReferencesSize uint64
63 : // The level's compaction score, used to rank levels (0 if the level doesn't
64 : // need compaction). See candidateLevelInfo.
65 : Score float64
66 : // The level's fill factor (the ratio between the size of the level and the
67 : // ideal size). See candidateLevelInfo.
68 : FillFactor float64
69 : // The level's compensated fill factor. See candidateLevelInfo.
70 : CompensatedFillFactor float64
71 : // The number of incoming bytes from other levels' sstables read during
72 : // compactions. This excludes bytes moved and bytes ingested. For L0 this is
73 : // the bytes written to the WAL.
74 : TableBytesIn uint64
75 : // The number of sstable bytes ingested. The sibling metric for tables is
76 : // TablesIngested.
77 : TableBytesIngested uint64
78 : // The number of sstable bytes moved into the level by a "move" compaction.
79 : // The sibling metric for tables is TablesMoved.
80 : TableBytesMoved uint64
81 : // The number of bytes read for compactions at the level. This includes bytes
82 : // read from other levels (BytesIn), as well as bytes read for the level.
83 : TableBytesRead uint64
84 : // The number of bytes written to sstables during compactions. The sibling
85 : // metric for tables is TablesCompacted. This metric may be summed with
86 : // BytesFlushed to compute the total bytes written for the level.
87 : TableBytesCompacted uint64
88 : // The number of bytes written to sstables during flushes. The sibling
89 : // metrics for tables is TablesFlushed. This metric is always zero for all
90 : // levels other than L0.
91 : TableBytesFlushed uint64
92 : // The number of sstables compacted to this level.
93 : TablesCompacted uint64
94 : // The number of sstables flushed to this level.
95 : TablesFlushed uint64
96 : // The number of sstables ingested into the level.
97 : TablesIngested uint64
98 : // The number of sstables moved to this level by a "move" compaction.
99 : TablesMoved uint64
100 : // The number of sstables deleted in a level by a delete-only compaction.
101 : TablesDeleted uint64
102 : // The number of sstables excised in a level by a delete-only compaction.
103 : TablesExcised uint64
104 : // BlobBytesReadEstimate is an estimate of the physical bytes corresponding
105 : // to values referenced by sstables that were inputs into compactions
106 : // outputting into this level.
107 : BlobBytesReadEstimate uint64
108 : // BlobBytesCompacted is the number of bytes written to blob files while
109 : // compacting sstables in this level.
110 : BlobBytesCompacted uint64
111 : // BlobBytesFlushed is the number of bytes written to blob files while
112 : // flushing sstables. This metric is always zero for all levels other than
113 : // L0.
114 : BlobBytesFlushed uint64
115 :
116 : MultiLevel struct {
117 : // TableBytesInTop are the total bytes in a multilevel compaction coming
118 : // from the top level.
119 : TableBytesInTop uint64
120 :
121 : // TableBytesIn, exclusively for multiLevel compactions.
122 : TableBytesIn uint64
123 :
124 : // TableBytesRead, exclusively for multilevel compactions.
125 : TableBytesRead uint64
126 : }
127 :
128 : // Additional contains misc additional metrics that are not always printed.
129 : Additional struct {
130 : // The sum of Properties.ValueBlocksSize for all the sstables in this
131 : // level. Printed by LevelMetrics.format iff there is at least one level
132 : // with a non-zero value.
133 : ValueBlocksSize uint64
134 : // Cumulative metrics about bytes written to data blocks and value blocks,
135 : // via compactions (except move compactions) or flushes. Not printed by
136 : // LevelMetrics.format, but are available to sophisticated clients.
137 : BytesWrittenDataBlocks uint64
138 : BytesWrittenValueBlocks uint64
139 : }
140 : }
141 :
142 : // AggregateSize returns an estimated physical size of the level's sstables and
143 : // their referenced values stored in blob files. The size of physical sstables
144 : // is exactly known. Virtual sstables' sizes are estimated, and the size of
145 : // values stored in blob files is estimated based on the volume of referenced
146 : // data and the blob file's compression ratio.
147 1 : func (m *LevelMetrics) AggregateSize() int64 {
148 1 : return m.TablesSize + int64(m.EstimatedReferencesSize)
149 1 : }
150 :
151 : // Add updates the counter metrics for the level.
152 1 : func (m *LevelMetrics) Add(u *LevelMetrics) {
153 1 : m.TablesCount += u.TablesCount
154 1 : m.TablesSize += u.TablesSize
155 1 : m.VirtualTablesCount += u.VirtualTablesCount
156 1 : m.VirtualTablesSize += u.VirtualTablesSize
157 1 : m.EstimatedReferencesSize += u.EstimatedReferencesSize
158 1 : m.TableBytesIn += u.TableBytesIn
159 1 : m.TableBytesIngested += u.TableBytesIngested
160 1 : m.TableBytesMoved += u.TableBytesMoved
161 1 : m.TableBytesRead += u.TableBytesRead
162 1 : m.TableBytesCompacted += u.TableBytesCompacted
163 1 : m.TableBytesFlushed += u.TableBytesFlushed
164 1 : m.TablesCompacted += u.TablesCompacted
165 1 : m.TablesFlushed += u.TablesFlushed
166 1 : m.TablesIngested += u.TablesIngested
167 1 : m.TablesMoved += u.TablesMoved
168 1 : m.BlobBytesCompacted += u.BlobBytesCompacted
169 1 : m.BlobBytesFlushed += u.BlobBytesFlushed
170 1 : m.BlobBytesReadEstimate += u.BlobBytesReadEstimate
171 1 : m.MultiLevel.TableBytesInTop += u.MultiLevel.TableBytesInTop
172 1 : m.MultiLevel.TableBytesRead += u.MultiLevel.TableBytesRead
173 1 : m.MultiLevel.TableBytesIn += u.MultiLevel.TableBytesIn
174 1 : m.Additional.BytesWrittenDataBlocks += u.Additional.BytesWrittenDataBlocks
175 1 : m.Additional.BytesWrittenValueBlocks += u.Additional.BytesWrittenValueBlocks
176 1 : m.Additional.ValueBlocksSize += u.Additional.ValueBlocksSize
177 1 : }
178 :
179 : // WriteAmp computes the write amplification for compactions at this
180 : // level.
181 : //
182 : // The write amplification is computed as the quantity of physical bytes written
183 : // divided by the quantity of logical bytes written.
184 : //
185 : // Concretely, it's computed as:
186 : //
187 : // TableBytesFlushed + TableBytesCompacted + BlobBytesFlushed + BlobBytesCompacted
188 : // -------------------------------------------------------------------------------
189 : // TableBytesIn
190 1 : func (m *LevelMetrics) WriteAmp() float64 {
191 1 : if m.TableBytesIn == 0 {
192 1 : return 0
193 1 : }
194 1 : return float64(m.TableBytesFlushed+m.TableBytesCompacted+m.BlobBytesFlushed+m.BlobBytesCompacted) /
195 1 : float64(m.TableBytesIn)
196 : }
197 :
198 : var categoryCompaction = block.RegisterCategory("pebble-compaction", block.NonLatencySensitiveQoSLevel)
199 : var categoryIngest = block.RegisterCategory("pebble-ingest", block.LatencySensitiveQoSLevel)
200 : var categoryGet = block.RegisterCategory("pebble-get", block.LatencySensitiveQoSLevel)
201 :
202 : // Metrics holds metrics for various subsystems of the DB such as the Cache,
203 : // Compactions, WAL, and per-Level metrics.
204 : //
205 : // TODO(peter): The testing of these metrics is relatively weak. There should
206 : // be testing that performs various operations on a DB and verifies that the
207 : // metrics reflect those operations.
208 : type Metrics struct {
209 : BlockCache CacheMetrics
210 :
211 : Compact struct {
212 : // The total number of compactions, and per-compaction type counts.
213 : Count int64
214 : DefaultCount int64
215 : DeleteOnlyCount int64
216 : ElisionOnlyCount int64
217 : CopyCount int64
218 : MoveCount int64
219 : ReadCount int64
220 : TombstoneDensityCount int64
221 : RewriteCount int64
222 : MultiLevelCount int64
223 : CounterLevelCount int64
224 : // An estimate of the number of bytes that need to be compacted for the LSM
225 : // to reach a stable state.
226 : EstimatedDebt uint64
227 : // Number of bytes present in sstables being written by in-progress
228 : // compactions. This value will be zero if there are no in-progress
229 : // compactions.
230 : InProgressBytes int64
231 : // Number of compactions that are in-progress.
232 : NumInProgress int64
233 : // Number of compactions that were cancelled.
234 : CancelledCount int64
235 : // CancelledBytes the number of bytes written by compactions that were
236 : // cancelled.
237 : CancelledBytes int64
238 : // Total number of compactions that hit an error.
239 : FailedCount int64
240 : // NumProblemSpans is the current (instantaneous) count of "problem spans"
241 : // which temporarily block compactions.
242 : NumProblemSpans int
243 : // MarkedFiles is a count of files that are marked for
244 : // compaction. Such files are compacted in a rewrite compaction
245 : // when no other compactions are picked.
246 : MarkedFiles int
247 : // Duration records the cumulative duration of all compactions since the
248 : // database was opened.
249 : Duration time.Duration
250 : }
251 :
252 : Ingest struct {
253 : // The total number of ingestions
254 : Count uint64
255 : }
256 :
257 : Flush struct {
258 : // The total number of flushes.
259 : Count int64
260 : WriteThroughput ThroughputMetric
261 : // Number of flushes that are in-progress. In the current implementation
262 : // this will always be zero or one.
263 : NumInProgress int64
264 : // AsIngestCount is a monotonically increasing counter of flush operations
265 : // handling ingested tables.
266 : AsIngestCount uint64
267 : // AsIngestCount is a monotonically increasing counter of tables ingested as
268 : // flushables.
269 : AsIngestTableCount uint64
270 : // AsIngestBytes is a monotonically increasing counter of the bytes flushed
271 : // for flushables that originated as ingestion operations.
272 : AsIngestBytes uint64
273 : }
274 :
275 : Filter FilterMetrics
276 :
277 : Levels [numLevels]LevelMetrics
278 :
279 : MemTable struct {
280 : // The number of bytes allocated by memtables and large (flushable)
281 : // batches.
282 : Size uint64
283 : // The count of memtables.
284 : Count int64
285 : // The number of bytes present in zombie memtables which are no longer
286 : // referenced by the current DB state. An unbounded number of memtables
287 : // may be zombie if they're still in use by an iterator. One additional
288 : // memtable may be zombie if it's no longer in use and waiting to be
289 : // recycled.
290 : ZombieSize uint64
291 : // The count of zombie memtables.
292 : ZombieCount int64
293 : }
294 :
295 : Keys struct {
296 : // The approximate count of internal range key set keys in the database.
297 : RangeKeySetsCount uint64
298 : // The approximate count of internal tombstones (DEL, SINGLEDEL and
299 : // RANGEDEL key kinds) within the database.
300 : TombstoneCount uint64
301 : // A cumulative total number of missized DELSIZED keys encountered by
302 : // compactions since the database was opened.
303 : MissizedTombstonesCount uint64
304 : }
305 :
306 : Snapshots struct {
307 : // The number of currently open snapshots.
308 : Count int
309 : // The sequence number of the earliest, currently open snapshot.
310 : EarliestSeqNum base.SeqNum
311 : // A running tally of keys written to sstables during flushes or
312 : // compactions that would've been elided if it weren't for open
313 : // snapshots.
314 : PinnedKeys uint64
315 : // A running cumulative sum of the size of keys and values written to
316 : // sstables during flushes or compactions that would've been elided if
317 : // it weren't for open snapshots.
318 : PinnedSize uint64
319 : }
320 :
321 : Table struct {
322 : // The number of bytes present in obsolete tables which are no longer
323 : // referenced by the current DB state or any open iterators.
324 : ObsoleteSize uint64
325 : // The count of obsolete tables.
326 : ObsoleteCount int64
327 : // The number of bytes present in zombie tables which are no longer
328 : // referenced by the current DB state but are still in use by an iterator.
329 : ZombieSize uint64
330 : // The count of zombie tables.
331 : ZombieCount int64
332 : // The count of sstables backing virtual tables.
333 : BackingTableCount uint64
334 : // The sum of the sizes of the BackingTableCount sstables that are backing virtual tables.
335 : BackingTableSize uint64
336 : // The number of sstables that are compressed with an unknown compression
337 : // algorithm.
338 : CompressedCountUnknown int64
339 : // The number of sstables that are compressed with the default compression
340 : // algorithm, snappy.
341 : CompressedCountSnappy int64
342 : // The number of sstables that are compressed with zstd.
343 : CompressedCountZstd int64
344 : // The number of sstables that are compressed with minlz.
345 : CompressedCountMinLZ int64
346 : // The number of sstables that are uncompressed.
347 : CompressedCountNone int64
348 :
349 : // Local file sizes.
350 : Local struct {
351 : // LiveSize is the number of bytes in live tables.
352 : LiveSize uint64
353 : // LiveCount is the number of live tables.
354 : LiveCount uint64
355 : // ObsoleteSize is the number of bytes in obsolete tables.
356 : ObsoleteSize uint64
357 : // ObsoleteCount is the number of obsolete tables.
358 : ObsoleteCount uint64
359 : // ZombieSize is the number of bytes in zombie tables.
360 : ZombieSize uint64
361 : // ZombieCount is the number of zombie tables.
362 : ZombieCount uint64
363 : }
364 :
365 : // Garbage bytes.
366 : Garbage struct {
367 : // PointDeletionsBytesEstimate is the estimated file bytes that will be
368 : // saved by compacting all point deletions. This is dependent on table
369 : // stats collection, so can be very incomplete until
370 : // InitialStatsCollectionComplete becomes true.
371 : PointDeletionsBytesEstimate uint64
372 : // RangeDeletionsBytesEstimate is the estimated file bytes that will be
373 : // saved by compacting all range deletions. This is dependent on table
374 : // stats collection, so can be very incomplete until
375 : // InitialStatsCollectionComplete becomes true.
376 : RangeDeletionsBytesEstimate uint64
377 : }
378 :
379 : // Whether the initial stats collection (for existing tables on Open) is
380 : // complete.
381 : InitialStatsCollectionComplete bool
382 : // The count of recently created sstables that need stats collection. This
383 : // does not include sstables that existed when the DB was opened, so the
384 : // value is only useful when InitialStatsCollectionComplete is true.
385 : PendingStatsCollectionCount int64
386 : }
387 :
388 : BlobFiles struct {
389 : // The count of all live blob files.
390 : LiveCount uint64
391 : // The physical file size of all live blob files.
392 : LiveSize uint64
393 : // ValueSize is the sum of the length of the uncompressed values in all
394 : // live (referenced by some sstable(s) within the current version) blob
395 : // files. ValueSize may be greater than LiveSize when compression is
396 : // effective. ValueSize includes bytes in live blob files that are not
397 : // actually reachable by any sstable key. If any value within the blob
398 : // file is reachable by a key in a live sstable, then the entirety of
399 : // the blob file's values are included within ValueSize.
400 : ValueSize uint64
401 : // ReferencedValueSize is the sum of the length of the uncompressed
402 : // values (in all live blob files) that are still referenced by keys
403 : // within live tables. Over the lifetime of a blob file, a blob file's
404 : // references are removed as some compactions choose to write new blob
405 : // files containing the same values or keys referencing the file's
406 : // values are deleted. ReferencedValueSize accounts the volume of bytes
407 : // that are actually reachable by some key in a live table.
408 : //
409 : // The difference between ValueSize and ReferencedValueSize is
410 : // (uncompressed) space amplification that could be reclaimed if all
411 : // blob files were rewritten, discarding values that are no longer
412 : // referenced by any keys in any sstables within the current version.
413 : ReferencedValueSize uint64
414 : // The count of all obsolete blob files.
415 : ObsoleteCount uint64
416 : // The physical size of all obsolete blob files.
417 : ObsoleteSize uint64
418 : // The count of all zombie blob files.
419 : ZombieCount uint64
420 : // The physical size of all zombie blob files.
421 : ZombieSize uint64
422 : // Local file sizes.
423 : Local struct {
424 : // LiveSize is the physical size of local live blob files.
425 : LiveSize uint64
426 : // LiveCount is the number of local live blob files.
427 : LiveCount uint64
428 : // ObsoleteSize is the physical size of local obsolete blob files.
429 : ObsoleteSize uint64
430 : // ObsoleteCount is the number of local obsolete blob files.
431 : ObsoleteCount uint64
432 : // ZombieSize is the physical size of local zombie blob files.
433 : ZombieSize uint64
434 : // ZombieCount is the number of local zombie blob files.
435 : ZombieCount uint64
436 : }
437 : }
438 :
439 : FileCache CacheMetrics
440 :
441 : // Count of the number of open sstable iterators.
442 : TableIters int64
443 : // Uptime is the total time since this DB was opened.
444 : Uptime time.Duration
445 :
446 : WAL struct {
447 : // Number of live WAL files.
448 : Files int64
449 : // Number of obsolete WAL files.
450 : ObsoleteFiles int64
451 : // Physical size of the obsolete WAL files.
452 : ObsoletePhysicalSize uint64
453 : // Size of the live data in the WAL files. Note that with WAL file
454 : // recycling this is less than the actual on-disk size of the WAL files.
455 : Size uint64
456 : // Physical size of the WAL files on-disk. With WAL file recycling,
457 : // this is greater than the live data in WAL files.
458 : //
459 : // TODO(sumeer): it seems this does not include ObsoletePhysicalSize.
460 : // Should the comment be updated?
461 : PhysicalSize uint64
462 : // Number of logical bytes written to the WAL.
463 : BytesIn uint64
464 : // Number of bytes written to the WAL.
465 : BytesWritten uint64
466 : // Failover contains failover stats. Empty if failover is not enabled.
467 : Failover wal.FailoverStats
468 : }
469 :
470 : LogWriter struct {
471 : FsyncLatency prometheus.Histogram
472 : record.LogWriterMetrics
473 : }
474 :
475 : CategoryStats []block.CategoryStatsAggregate
476 :
477 : SecondaryCacheMetrics SecondaryCacheMetrics
478 :
479 : private struct {
480 : optionsFileSize uint64
481 : manifestFileSize uint64
482 : }
483 :
484 : manualMemory manual.Metrics
485 : }
486 :
487 : var (
488 : // FsyncLatencyBuckets are prometheus histogram buckets suitable for a histogram
489 : // that records latencies for fsyncs.
490 : FsyncLatencyBuckets = append(
491 : prometheus.LinearBuckets(0.0, float64(time.Microsecond*100), 50),
492 : prometheus.ExponentialBucketsRange(float64(time.Millisecond*5), float64(10*time.Second), 50)...,
493 : )
494 :
495 : // SecondaryCacheIOBuckets exported to enable exporting from package pebble to
496 : // enable exporting metrics with below buckets in CRDB.
497 : SecondaryCacheIOBuckets = sharedcache.IOBuckets
498 : // SecondaryCacheChannelWriteBuckets exported to enable exporting from package
499 : // pebble to enable exporting metrics with below buckets in CRDB.
500 : SecondaryCacheChannelWriteBuckets = sharedcache.ChannelWriteBuckets
501 : )
502 :
503 : // DiskSpaceUsage returns the total disk space used by the database in bytes,
504 : // including live and obsolete files. This only includes local files, i.e.,
505 : // remote files (as known to objstorage.Provider) are not included.
506 1 : func (m *Metrics) DiskSpaceUsage() uint64 {
507 1 : var usageBytes uint64
508 1 : usageBytes += m.WAL.PhysicalSize
509 1 : usageBytes += m.WAL.ObsoletePhysicalSize
510 1 : usageBytes += m.Table.Local.LiveSize
511 1 : usageBytes += m.Table.Local.ObsoleteSize
512 1 : usageBytes += m.Table.Local.ZombieSize
513 1 : usageBytes += m.BlobFiles.Local.LiveSize
514 1 : usageBytes += m.BlobFiles.Local.ObsoleteSize
515 1 : usageBytes += m.BlobFiles.Local.ZombieSize
516 1 : usageBytes += m.private.optionsFileSize
517 1 : usageBytes += m.private.manifestFileSize
518 1 : // TODO(sumeer): InProgressBytes does not distinguish between local and
519 1 : // remote files. This causes a small error. Fix.
520 1 : usageBytes += uint64(m.Compact.InProgressBytes)
521 1 : return usageBytes
522 1 : }
523 :
524 : // NumVirtual is the number of virtual sstables in the latest version
525 : // summed over every level in the lsm.
526 1 : func (m *Metrics) NumVirtual() uint64 {
527 1 : var n uint64
528 1 : for _, level := range m.Levels {
529 1 : n += level.VirtualTablesCount
530 1 : }
531 1 : return n
532 : }
533 :
534 : // VirtualSize is the sum of the sizes of the virtual sstables in the
535 : // latest version. BackingTableSize - VirtualSize gives an estimate for
536 : // the space amplification caused by not compacting virtual sstables.
537 1 : func (m *Metrics) VirtualSize() uint64 {
538 1 : var size uint64
539 1 : for _, level := range m.Levels {
540 1 : size += level.VirtualTablesSize
541 1 : }
542 1 : return size
543 : }
544 :
545 : // ReadAmp returns the current read amplification of the database.
546 : // It's computed as the number of sublevels in L0 + the number of non-empty
547 : // levels below L0.
548 1 : func (m *Metrics) ReadAmp() int {
549 1 : var ramp int32
550 1 : for _, l := range m.Levels {
551 1 : ramp += l.Sublevels
552 1 : }
553 1 : return int(ramp)
554 : }
555 :
556 : // Total returns the sum of the per-level metrics and WAL metrics.
557 1 : func (m *Metrics) Total() LevelMetrics {
558 1 : var total LevelMetrics
559 1 : for level := 0; level < numLevels; level++ {
560 1 : l := &m.Levels[level]
561 1 : total.Add(l)
562 1 : total.Sublevels += l.Sublevels
563 1 : }
564 : // Compute total bytes-in as the bytes written to the WAL + bytes ingested.
565 1 : total.TableBytesIn = m.WAL.BytesWritten + total.TableBytesIngested
566 1 : // Add the total bytes-in to the total bytes-flushed. This is to account for
567 1 : // the bytes written to the log and bytes written externally and then
568 1 : // ingested.
569 1 : total.TableBytesFlushed += total.TableBytesIn
570 1 : return total
571 : }
572 :
573 : // RemoteTablesTotal returns the total number of remote tables and their total
574 : // size. Remote tables are computed as the difference between total tables
575 : // (live + obsolete + zombie) and local tables.
576 1 : func (m *Metrics) RemoteTablesTotal() (count uint64, size uint64) {
577 1 : var liveTables, liveTableBytes int64
578 1 : for level := 0; level < numLevels; level++ {
579 1 : liveTables += m.Levels[level].TablesCount
580 1 : liveTableBytes += m.Levels[level].TablesSize
581 1 : }
582 1 : totalCount := liveTables + m.Table.ObsoleteCount + m.Table.ZombieCount
583 1 : localCount := m.Table.Local.LiveCount + m.Table.Local.ObsoleteCount + m.Table.Local.ZombieCount
584 1 : remoteCount := uint64(totalCount) - localCount
585 1 :
586 1 : totalSize := uint64(liveTableBytes) + m.Table.ObsoleteSize + m.Table.ZombieSize
587 1 : localSize := m.Table.Local.LiveSize + m.Table.Local.ObsoleteSize + m.Table.Local.ZombieSize
588 1 : remoteSize := totalSize - localSize
589 1 :
590 1 : return remoteCount, remoteSize
591 : }
592 :
593 : // String pretty-prints the metrics as below:
594 : //
595 : // | | | | ingested | moved | written | | amp | multilevel
596 : // level | tables size val-bl vtables | score uc c | in | tables size | tables size | tables size | read | r w | top in read
597 : // ------+-----------------------------+----------------+-------+--------------+--------------+--------------+-------+----------+------------------
598 : // 0 | 101 102B 0B 101 | 1.10 2.10 0.30 | 104B | 112 104B | 113 106B | 221 217B | 107B | 1 2.09 | 104B 104B 104B
599 : // 1 | 201 202B 0B 201 | 1.20 2.20 0.60 | 204B | 212 204B | 213 206B | 421 417B | 207B | 2 2.04 | 204B 204B 204B
600 : // 2 | 301 302B 0B 301 | 1.30 2.30 0.90 | 304B | 312 304B | 313 306B | 621 617B | 307B | 3 2.03 | 304B 304B 304B
601 : // 3 | 401 402B 0B 401 | 1.40 2.40 1.20 | 404B | 412 404B | 413 406B | 821 817B | 407B | 4 2.02 | 404B 404B 404B
602 : // 4 | 501 502B 0B 501 | 1.50 2.50 1.50 | 504B | 512 504B | 513 506B | 1.0K 1017B | 507B | 5 2.02 | 504B 504B 504B
603 : // 5 | 601 602B 0B 601 | 1.60 2.60 1.80 | 604B | 612 604B | 613 606B | 1.2K 1.2KB | 607B | 6 2.01 | 604B 604B 604B
604 : // 6 | 701 702B 0B 701 | - 2.70 2.10 | 704B | 712 704B | 713 706B | 1.4K 1.4KB | 707B | 7 2.01 | 704B 704B 704B
605 : // total | 2.8K 2.7KB 0B 2.8K | - - - | 2.8KB | 2.9K 2.8KB | 2.9K 2.8KB | 5.7K 8.4KB | 2.8KB | 28 3.00 | 2.8KB 2.8KB 2.8KB
606 : // ------------------------------------------------------------------------------------------------------------------------------------------------
607 : // WAL: 22 files (24B) in: 25B written: 26B (4% overhead)
608 : // Flushes: 8
609 : // Compactions: 5 estimated debt: 6B in progress: 2 (7B)
610 : // default: 27 delete: 28 elision: 29 move: 30 read: 31 tombstone-density: 16 rewrite: 32 copy: 33 multi-level: 34
611 : // MemTables: 12 (11B) zombie: 14 (13B)
612 : // Zombie tables: 16 (15B, local: 30B)
613 : // Backing tables: 1 (2.0MB)
614 : // Virtual tables: 2807 (2.8KB)
615 : // Local tables size: 28B
616 : // Compression types:
617 : // Table stats: 31
618 : // Block cache: 2 entries (1B) hit rate: 42.9%
619 : // Table cache: 18 entries (17B) hit rate: 48.7%
620 : // Range key sets: 123 Tombstones: 456 Total missized tombstones encountered: 789
621 : // Snapshots: 4 earliest seq num: 1024
622 : // Table iters: 21
623 : // Filter utility: 47.4%
624 : // Ingestions: 27 as flushable: 36 (34B in 35 tables)
625 : // Cgo memory usage: 15KB block cache: 9.0KB (data: 4.0KB, maps: 2.0KB, entries: 3.0KB) memtables: 5.0KB
626 1 : func (m *Metrics) String() string {
627 1 : return redact.StringWithoutMarkers(m)
628 1 : }
629 :
630 : var _ redact.SafeFormatter = &Metrics{}
631 :
632 : // SafeFormat implements redact.SafeFormatter.
633 1 : func (m *Metrics) SafeFormat(w redact.SafePrinter, _ rune) {
634 1 : // NB: Pebble does not make any assumptions as to which Go primitive types
635 1 : // have been registered as safe with redact.RegisterSafeType and does not
636 1 : // register any types itself. Some of the calls to `redact.Safe`, etc are
637 1 : // superfluous in the context of CockroachDB, which registers all the Go
638 1 : // numeric types as safe.
639 1 :
640 1 : multiExists := m.Compact.MultiLevelCount > 0
641 1 : appendIfMulti := func(line redact.SafeString) {
642 1 : if multiExists {
643 1 : w.SafeString(line)
644 1 : }
645 : }
646 1 : newline := func() {
647 1 : w.SafeString("\n")
648 1 : }
649 :
650 1 : w.SafeString(" | | | | ingested | moved | written | | amp")
651 1 : appendIfMulti(" | multilevel")
652 1 : newline()
653 1 : w.SafeString("level | tables size val-bl vtables | score ff cff | in | tables size | tables size | tables size | read | r w")
654 1 : appendIfMulti(" | top in read")
655 1 : newline()
656 1 : w.SafeString("------+-----------------------------+----------------+-------+--------------+--------------+--------------+-------+---------")
657 1 : appendIfMulti("-+------------------")
658 1 : newline()
659 1 :
660 1 : // formatRow prints out a row of the table.
661 1 : formatRow := func(m *LevelMetrics) {
662 1 : score := m.Score
663 1 : if score == 0 {
664 1 : // Format a zero level score as a dash.
665 1 : score = math.NaN()
666 1 : }
667 1 : w.Printf("| %5s %6s %6s %7s | %4s %4s %4s | %5s | %5s %6s | %5s %6s | %5s %6s | %5s | %3d %4s",
668 1 : humanize.Count.Int64(m.TablesCount),
669 1 : humanize.Bytes.Int64(m.TablesSize),
670 1 : humanize.Bytes.Uint64(m.Additional.ValueBlocksSize),
671 1 : humanize.Count.Uint64(m.VirtualTablesCount),
672 1 : humanizeFloat(score, 4),
673 1 : humanizeFloat(m.FillFactor, 4),
674 1 : humanizeFloat(m.CompensatedFillFactor, 4),
675 1 : humanize.Bytes.Uint64(m.TableBytesIn),
676 1 : humanize.Count.Uint64(m.TablesIngested),
677 1 : humanize.Bytes.Uint64(m.TableBytesIngested),
678 1 : humanize.Count.Uint64(m.TablesMoved),
679 1 : humanize.Bytes.Uint64(m.TableBytesMoved),
680 1 : humanize.Count.Uint64(m.TablesFlushed+m.TablesCompacted),
681 1 : humanize.Bytes.Uint64(m.TableBytesFlushed+m.TableBytesCompacted),
682 1 : humanize.Bytes.Uint64(m.TableBytesRead),
683 1 : redact.Safe(m.Sublevels),
684 1 : humanizeFloat(m.WriteAmp(), 4),
685 1 : )
686 1 :
687 1 : if multiExists {
688 1 : w.Printf(" | %5s %5s %5s",
689 1 : humanize.Bytes.Uint64(m.MultiLevel.TableBytesInTop),
690 1 : humanize.Bytes.Uint64(m.MultiLevel.TableBytesIn),
691 1 : humanize.Bytes.Uint64(m.MultiLevel.TableBytesRead))
692 1 : }
693 1 : newline()
694 : }
695 :
696 1 : var total LevelMetrics
697 1 : for level := 0; level < numLevels; level++ {
698 1 : l := &m.Levels[level]
699 1 : w.Printf("%5d ", redact.Safe(level))
700 1 : formatRow(l)
701 1 : total.Add(l)
702 1 : total.Sublevels += l.Sublevels
703 1 : }
704 : // Compute total bytes-in as the bytes written to the WAL + bytes ingested.
705 1 : total.TableBytesIn = m.WAL.BytesWritten + total.TableBytesIngested
706 1 : // Add the total bytes-in to the total bytes-flushed. This is to account for
707 1 : // the bytes written to the log and bytes written externally and then
708 1 : // ingested.
709 1 : total.TableBytesFlushed += total.TableBytesIn
710 1 : total.Score = math.NaN()
711 1 : total.FillFactor = math.NaN()
712 1 : total.CompensatedFillFactor = math.NaN()
713 1 : w.SafeString("total ")
714 1 : formatRow(&total)
715 1 :
716 1 : w.SafeString("----------------------------------------------------------------------------------------------------------------------------")
717 1 : appendIfMulti("--------------------")
718 1 : newline()
719 1 : w.Printf("WAL: %d files (%s) in: %s written: %s (%.0f%% overhead)",
720 1 : redact.Safe(m.WAL.Files),
721 1 : humanize.Bytes.Uint64(m.WAL.Size),
722 1 : humanize.Bytes.Uint64(m.WAL.BytesIn),
723 1 : humanize.Bytes.Uint64(m.WAL.BytesWritten),
724 1 : redact.Safe(percent(int64(m.WAL.BytesWritten)-int64(m.WAL.BytesIn), int64(m.WAL.BytesIn))))
725 1 : failoverStats := m.WAL.Failover
726 1 : failoverStats.FailoverWriteAndSyncLatency = nil
727 1 : if failoverStats == (wal.FailoverStats{}) {
728 1 : w.Printf("\n")
729 1 : } else {
730 0 : w.Printf(" failover: (switches: %d, primary: %s, secondary: %s)\n", m.WAL.Failover.DirSwitchCount,
731 0 : m.WAL.Failover.PrimaryWriteDuration.String(), m.WAL.Failover.SecondaryWriteDuration.String())
732 0 : }
733 :
734 1 : w.Printf("Flushes: %d\n", redact.Safe(m.Flush.Count))
735 1 :
736 1 : w.Printf("Compactions: %d estimated debt: %s in progress: %d (%s) canceled: %d (%s) failed: %d problem spans: %d\n",
737 1 : redact.Safe(m.Compact.Count),
738 1 : humanize.Bytes.Uint64(m.Compact.EstimatedDebt),
739 1 : redact.Safe(m.Compact.NumInProgress),
740 1 : humanize.Bytes.Int64(m.Compact.InProgressBytes),
741 1 : redact.Safe(m.Compact.CancelledCount),
742 1 : humanize.Bytes.Int64(m.Compact.CancelledBytes),
743 1 : redact.Safe(m.Compact.FailedCount),
744 1 : redact.Safe(m.Compact.NumProblemSpans),
745 1 : )
746 1 :
747 1 : w.Printf(" default: %d delete: %d elision: %d move: %d read: %d tombstone-density: %d rewrite: %d copy: %d multi-level: %d\n",
748 1 : redact.Safe(m.Compact.DefaultCount),
749 1 : redact.Safe(m.Compact.DeleteOnlyCount),
750 1 : redact.Safe(m.Compact.ElisionOnlyCount),
751 1 : redact.Safe(m.Compact.MoveCount),
752 1 : redact.Safe(m.Compact.ReadCount),
753 1 : redact.Safe(m.Compact.TombstoneDensityCount),
754 1 : redact.Safe(m.Compact.RewriteCount),
755 1 : redact.Safe(m.Compact.CopyCount),
756 1 : redact.Safe(m.Compact.MultiLevelCount),
757 1 : )
758 1 :
759 1 : w.Printf("MemTables: %d (%s) zombie: %d (%s)\n",
760 1 : redact.Safe(m.MemTable.Count),
761 1 : humanize.Bytes.Uint64(m.MemTable.Size),
762 1 : redact.Safe(m.MemTable.ZombieCount),
763 1 : humanize.Bytes.Uint64(m.MemTable.ZombieSize))
764 1 :
765 1 : w.Printf("Zombie tables: %d (%s, local: %s)\n",
766 1 : redact.Safe(m.Table.ZombieCount),
767 1 : humanize.Bytes.Uint64(m.Table.ZombieSize),
768 1 : humanize.Bytes.Uint64(m.Table.Local.ZombieSize))
769 1 :
770 1 : w.Printf("Backing tables: %d (%s)\n",
771 1 : redact.Safe(m.Table.BackingTableCount),
772 1 : humanize.Bytes.Uint64(m.Table.BackingTableSize))
773 1 : w.Printf("Virtual tables: %d (%s)\n",
774 1 : redact.Safe(m.NumVirtual()),
775 1 : humanize.Bytes.Uint64(m.VirtualSize()))
776 1 : w.Printf("Local tables size: %s\n", humanize.Bytes.Uint64(m.Table.Local.LiveSize))
777 1 : w.SafeString("Compression types:")
778 1 : if count := m.Table.CompressedCountSnappy; count > 0 {
779 1 : w.Printf(" snappy: %d", redact.Safe(count))
780 1 : }
781 1 : if count := m.Table.CompressedCountZstd; count > 0 {
782 0 : w.Printf(" zstd: %d", redact.Safe(count))
783 0 : }
784 1 : if count := m.Table.CompressedCountMinLZ; count > 0 {
785 0 : w.Printf(" minlz: %d", redact.Safe(count))
786 0 : }
787 1 : if count := m.Table.CompressedCountNone; count > 0 {
788 0 : w.Printf(" none: %d", redact.Safe(count))
789 0 : }
790 1 : if count := m.Table.CompressedCountUnknown; count > 0 {
791 1 : w.Printf(" unknown: %d", redact.Safe(count))
792 1 : }
793 1 : w.Printf("\n")
794 1 : if m.Table.Garbage.PointDeletionsBytesEstimate > 0 || m.Table.Garbage.RangeDeletionsBytesEstimate > 0 {
795 1 : w.Printf("Garbage: point-deletions %s range-deletions %s\n",
796 1 : humanize.Bytes.Uint64(m.Table.Garbage.PointDeletionsBytesEstimate),
797 1 : humanize.Bytes.Uint64(m.Table.Garbage.RangeDeletionsBytesEstimate))
798 1 : }
799 1 : w.Printf("Table stats: ")
800 1 : if !m.Table.InitialStatsCollectionComplete {
801 1 : w.Printf("initial load in progress")
802 1 : } else if m.Table.PendingStatsCollectionCount == 0 {
803 1 : w.Printf("all loaded")
804 1 : } else {
805 1 : w.Printf("%s", humanize.Count.Int64(m.Table.PendingStatsCollectionCount))
806 1 : }
807 1 : w.Printf("\n")
808 1 :
809 1 : formatCacheMetrics := func(m *CacheMetrics, name redact.SafeString) {
810 1 : w.Printf("%s: %s entries (%s) hit rate: %.1f%%\n",
811 1 : name,
812 1 : humanize.Count.Int64(m.Count),
813 1 : humanize.Bytes.Int64(m.Size),
814 1 : redact.Safe(hitRate(m.Hits, m.Misses)))
815 1 : }
816 1 : formatCacheMetrics(&m.BlockCache, "Block cache")
817 1 : formatCacheMetrics(&m.FileCache, "Table cache")
818 1 :
819 1 : formatSharedCacheMetrics := func(w redact.SafePrinter, m *SecondaryCacheMetrics, name redact.SafeString) {
820 0 : w.Printf("%s: %s entries (%s) hit rate: %.1f%%\n",
821 0 : name,
822 0 : humanize.Count.Int64(m.Count),
823 0 : humanize.Bytes.Int64(m.Size),
824 0 : redact.Safe(hitRate(m.ReadsWithFullHit, m.ReadsWithPartialHit+m.ReadsWithNoHit)))
825 0 : }
826 1 : if m.SecondaryCacheMetrics.Size > 0 || m.SecondaryCacheMetrics.ReadsWithFullHit > 0 {
827 0 : formatSharedCacheMetrics(w, &m.SecondaryCacheMetrics, "Secondary cache")
828 0 : }
829 :
830 1 : w.Printf("Range key sets: %s Tombstones: %s Total missized tombstones encountered: %s\n",
831 1 : humanize.Count.Uint64(m.Keys.RangeKeySetsCount),
832 1 : humanize.Count.Uint64(m.Keys.TombstoneCount),
833 1 : humanize.Count.Uint64(m.Keys.MissizedTombstonesCount),
834 1 : )
835 1 :
836 1 : w.Printf("Snapshots: %d earliest seq num: %d\n",
837 1 : redact.Safe(m.Snapshots.Count),
838 1 : redact.Safe(m.Snapshots.EarliestSeqNum))
839 1 :
840 1 : w.Printf("Table iters: %d\n", redact.Safe(m.TableIters))
841 1 : w.Printf("Filter utility: %.1f%%\n", redact.Safe(hitRate(m.Filter.Hits, m.Filter.Misses)))
842 1 : w.Printf("Ingestions: %d as flushable: %d (%s in %d tables)\n",
843 1 : redact.Safe(m.Ingest.Count),
844 1 : redact.Safe(m.Flush.AsIngestCount),
845 1 : humanize.Bytes.Uint64(m.Flush.AsIngestBytes),
846 1 : redact.Safe(m.Flush.AsIngestTableCount))
847 1 :
848 1 : var inUseTotal uint64
849 1 : for i := range m.manualMemory {
850 1 : inUseTotal += m.manualMemory[i].InUseBytes
851 1 : }
852 1 : inUse := func(purpose manual.Purpose) uint64 {
853 1 : return m.manualMemory[purpose].InUseBytes
854 1 : }
855 1 : w.Printf("Cgo memory usage: %s block cache: %s (data: %s, maps: %s, entries: %s) memtables: %s\n",
856 1 : humanize.Bytes.Uint64(inUseTotal),
857 1 : humanize.Bytes.Uint64(inUse(manual.BlockCacheData)+inUse(manual.BlockCacheMap)+inUse(manual.BlockCacheEntry)),
858 1 : humanize.Bytes.Uint64(inUse(manual.BlockCacheData)),
859 1 : humanize.Bytes.Uint64(inUse(manual.BlockCacheMap)),
860 1 : humanize.Bytes.Uint64(inUse(manual.BlockCacheEntry)),
861 1 : humanize.Bytes.Uint64(inUse(manual.MemTable)),
862 1 : )
863 : }
864 :
865 1 : func hitRate(hits, misses int64) float64 {
866 1 : return percent(hits, hits+misses)
867 1 : }
868 :
869 1 : func percent(numerator, denominator int64) float64 {
870 1 : if denominator == 0 {
871 1 : return 0
872 1 : }
873 1 : return 100 * float64(numerator) / float64(denominator)
874 : }
875 :
876 : // StringForTests is identical to m.String() on 64-bit platforms. It is used to
877 : // provide a platform-independent result for tests.
878 1 : func (m *Metrics) StringForTests() string {
879 1 : mCopy := *m
880 1 : if math.MaxInt == math.MaxInt32 {
881 0 : // README: This is the difference in Sizeof(sstable.Reader{})) + Sizeof(blob.FileReader{})
882 0 : // between 64 and 32 bit platforms. See Metrics() in file_cache.go for more details.
883 0 : // This magic number must be updated if the sstable.Reader or blob.FileReader struct changes.
884 0 : // On 64-bit platforms, the size of the sstable.Reader struct is 616 bytes.
885 0 : // On 32-bit platforms, the size of the sstable.Reader struct is 496 bytes.
886 0 : // On 64-bit platforms, the size of the blob.FileReader struct is 88 bytes.
887 0 : // On 32-bit platforms, the size of the blob.FileReader struct is 56 bytes.
888 0 : // The difference is 616 - 496 + 88 - 56 = 152 bytes.
889 0 : const tableCacheSizeAdjustment = 152
890 0 : mCopy.FileCache.Size += mCopy.FileCache.Count * tableCacheSizeAdjustment
891 0 : }
892 : // Don't show cgo memory statistics as they can vary based on architecture,
893 : // invariants tag, etc.
894 1 : mCopy.manualMemory = manual.Metrics{}
895 1 : return redact.StringWithoutMarkers(&mCopy)
896 : }
897 :
898 : // levelMetricsDelta accumulates incremental ("delta") level metric updates
899 : // (e.g. from compactions or flushes).
900 : type levelMetricsDelta [manifest.NumLevels]*LevelMetrics
901 :
902 1 : func (m *Metrics) updateLevelMetrics(updates levelMetricsDelta) {
903 1 : for i, u := range updates {
904 1 : if u != nil {
905 1 : m.Levels[i].Add(u)
906 1 : }
907 : }
908 : }
909 :
910 : // humanizeFloat formats a float64 value as a string. It shows up to two
911 : // decimals, depending on the target length. NaN is shown as "-".
912 1 : func humanizeFloat(v float64, targetLength int) redact.SafeString {
913 1 : if math.IsNaN(v) {
914 1 : return "-"
915 1 : }
916 : // We treat 0 specially. Values near zero will show up as 0.00.
917 1 : if v == 0 {
918 1 : return "0"
919 1 : }
920 1 : res := fmt.Sprintf("%.2f", v)
921 1 : if len(res) <= targetLength {
922 1 : return redact.SafeString(res)
923 1 : }
924 1 : if len(res) == targetLength+1 {
925 1 : return redact.SafeString(fmt.Sprintf("%.1f", v))
926 1 : }
927 1 : return redact.SafeString(fmt.Sprintf("%.0f", v))
928 : }
|