LCOV - code coverage report
Current view: top level - pebble/sstable - options.go (source / functions) Hit Total Coverage
Test: 2024-11-19 08:17Z 7ce2628f - tests only.lcov Lines: 77 78 98.7 %
Date: 2024-11-19 08:18:01 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package sstable
       6             : 
       7             : import (
       8             :         "fmt"
       9             : 
      10             :         "github.com/cockroachdb/crlib/fifo"
      11             :         "github.com/cockroachdb/pebble/internal/base"
      12             :         "github.com/cockroachdb/pebble/internal/sstableinternal"
      13             :         "github.com/cockroachdb/pebble/sstable/block"
      14             :         "github.com/cockroachdb/pebble/sstable/colblk"
      15             :         "github.com/cockroachdb/pebble/sstable/rowblk"
      16             : )
      17             : 
      18             : const (
      19             :         // MaximumBlockSize is the maximum permissible size of a block.
      20             :         MaximumBlockSize = rowblk.MaximumSize
      21             :         // DefaultNumDeletionsThreshold defines the minimum number of point
      22             :         // tombstones that must be present in a data block for it to be
      23             :         // considered tombstone-dense.
      24             :         DefaultNumDeletionsThreshold = 100
      25             :         // DefaultDeletionSizeRatioThreshold defines the minimum ratio of the size
      26             :         // of point tombstones to the size of the data block in order to consider the
      27             :         // block as tombstone-dense.
      28             :         DefaultDeletionSizeRatioThreshold = 0.5
      29             : )
      30             : 
      31             : var ignoredInternalProperties = map[string]struct{}{
      32             :         "rocksdb.column.family.id":             {},
      33             :         "rocksdb.fixed.key.length":             {},
      34             :         "rocksdb.index.key.is.user.key":        {},
      35             :         "rocksdb.index.value.is.delta.encoded": {},
      36             :         "rocksdb.oldest.key.time":              {},
      37             :         "rocksdb.creation.time":                {},
      38             :         "rocksdb.file.creation.time":           {},
      39             :         "rocksdb.format.version":               {},
      40             : }
      41             : 
      42             : // FilterType exports the base.FilterType type.
      43             : type FilterType = base.FilterType
      44             : 
      45             : // Exported TableFilter constants.
      46             : const (
      47             :         TableFilter = base.TableFilter
      48             : )
      49             : 
      50             : // FilterWriter exports the base.FilterWriter type.
      51             : type FilterWriter = base.FilterWriter
      52             : 
      53             : // FilterPolicy exports the base.FilterPolicy type.
      54             : type FilterPolicy = base.FilterPolicy
      55             : 
      56             : // Comparers is a map from comparer name to comparer. It is used for debugging
      57             : // tools which may be used on multiple databases configured with different
      58             : // comparers.
      59             : type Comparers map[string]*base.Comparer
      60             : 
      61             : // Mergers is a map from merger name to merger. It is used for debugging tools
      62             : // which may be used on multiple databases configured with different
      63             : // mergers.
      64             : type Mergers map[string]*base.Merger
      65             : 
      66             : // KeySchemas is a map from key schema name to key schema. A single database may
      67             : // contain sstables with multiple key schemas.
      68             : type KeySchemas map[string]*colblk.KeySchema
      69             : 
      70             : // MakeKeySchemas constructs a KeySchemas from a slice of key schemas.
      71           1 : func MakeKeySchemas(keySchemas ...*colblk.KeySchema) KeySchemas {
      72           1 :         m := make(KeySchemas, len(keySchemas))
      73           1 :         for _, keySchema := range keySchemas {
      74           1 :                 if _, ok := m[keySchema.Name]; ok {
      75           0 :                         panic(fmt.Sprintf("duplicate key schemas with name %q", keySchema.Name))
      76             :                 }
      77           1 :                 m[keySchema.Name] = keySchema
      78             :         }
      79           1 :         return m
      80             : }
      81             : 
      82             : // ReaderOptions holds the parameters needed for reading an sstable.
      83             : type ReaderOptions struct {
      84             :         // LoadBlockSema, if set, is used to limit the number of blocks that can be
      85             :         // loaded (i.e. read from the filesystem) in parallel. Each load acquires one
      86             :         // unit from the semaphore for the duration of the read.
      87             :         LoadBlockSema *fifo.Semaphore
      88             : 
      89             :         // User properties specified in this map will not be added to sst.Properties.UserProperties.
      90             :         DeniedUserProperties map[string]struct{}
      91             : 
      92             :         // Comparer defines a total ordering over the space of []byte keys: a 'less
      93             :         // than' relationship. The same comparison algorithm must be used for reads
      94             :         // and writes over the lifetime of the DB.
      95             :         //
      96             :         // The default value uses the same ordering as bytes.Compare.
      97             :         Comparer *Comparer
      98             : 
      99             :         // Merger defines the Merge function in use for this keyspace.
     100             :         Merger *Merger
     101             : 
     102             :         Comparers Comparers
     103             :         Mergers   Mergers
     104             :         // KeySchemas contains the set of known key schemas to use when interpreting
     105             :         // columnar data blocks. Only used for sstables encoded in format
     106             :         // TableFormatPebblev5 or higher.
     107             :         KeySchemas KeySchemas
     108             : 
     109             :         // Filters is a map from filter policy name to filter policy. Filters with
     110             :         // policies that are not in this map will be ignored.
     111             :         Filters map[string]FilterPolicy
     112             : 
     113             :         // Logger is an optional logger and tracer.
     114             :         LoggerAndTracer base.LoggerAndTracer
     115             : 
     116             :         // FilterMetricsTracker is optionally used to track filter metrics.
     117             :         FilterMetricsTracker *FilterMetricsTracker
     118             : 
     119             :         // internal options can only be used from within the pebble package.
     120             :         internal sstableinternal.ReaderOptions
     121             : }
     122             : 
     123             : // SetInternal sets the internal reader options. Note that even though this
     124             : // method is public, a caller outside the pebble package can't construct a value
     125             : // to pass to it.
     126           1 : func (o *ReaderOptions) SetInternal(internalOpts sstableinternal.ReaderOptions) {
     127           1 :         o.internal = internalOpts
     128           1 : }
     129             : 
     130             : // SetInternalCacheOpts sets the internal cache options. Note that even though
     131             : // this method is public, a caller outside the pebble package can't construct a
     132             : // value to pass to it.
     133           1 : func (o *ReaderOptions) SetInternalCacheOpts(cacheOpts sstableinternal.CacheOptions) {
     134           1 :         o.internal.CacheOpts = cacheOpts
     135           1 : }
     136             : 
     137           1 : func (o ReaderOptions) ensureDefaults() ReaderOptions {
     138           1 :         if o.Comparer == nil {
     139           1 :                 o.Comparer = base.DefaultComparer
     140           1 :         }
     141           1 :         if o.Merger == nil {
     142           1 :                 o.Merger = base.DefaultMerger
     143           1 :         }
     144           1 :         if o.LoggerAndTracer == nil {
     145           1 :                 o.LoggerAndTracer = base.NoopLoggerAndTracer{}
     146           1 :         }
     147           1 :         if o.DeniedUserProperties == nil {
     148           1 :                 o.DeniedUserProperties = ignoredInternalProperties
     149           1 :         }
     150           1 :         if o.KeySchemas == nil {
     151           1 :                 o.KeySchemas = defaultKeySchemas
     152           1 :         }
     153           1 :         return o
     154             : }
     155             : 
     156             : var defaultKeySchema = colblk.DefaultKeySchema(base.DefaultComparer, 16)
     157             : var defaultKeySchemas = MakeKeySchemas(&defaultKeySchema)
     158             : 
     159             : // WriterOptions holds the parameters used to control building an sstable.
     160             : type WriterOptions struct {
     161             :         // BlockRestartInterval is the number of keys between restart points
     162             :         // for delta encoding of keys.
     163             :         //
     164             :         // The default value is 16.
     165             :         BlockRestartInterval int
     166             : 
     167             :         // BlockSize is the target uncompressed size in bytes of each table block.
     168             :         //
     169             :         // The default value is 4096.
     170             :         BlockSize int
     171             : 
     172             :         // BlockSizeThreshold finishes a block if the block size is larger than the
     173             :         // specified percentage of the target block size and adding the next entry
     174             :         // would cause the block to be larger than the target block size.
     175             :         //
     176             :         // The default value is 90.
     177             :         BlockSizeThreshold int
     178             : 
     179             :         // SizeClassAwareThreshold imposes a minimum block size restriction for blocks
     180             :         // to be flushed, that is computed as the percentage of the target block size.
     181             :         // Note that this threshold takes precedence over BlockSizeThreshold when
     182             :         // valid AllocatorSizeClasses are specified.
     183             :         //
     184             :         // The default value is 60.
     185             :         SizeClassAwareThreshold int
     186             : 
     187             :         // Comparer defines a total ordering over the space of []byte keys: a 'less
     188             :         // than' relationship. The same comparison algorithm must be used for reads
     189             :         // and writes over the lifetime of the DB.
     190             :         //
     191             :         // The default value uses the same ordering as bytes.Compare.
     192             :         Comparer *Comparer
     193             : 
     194             :         // Compression defines the per-block compression to use.
     195             :         //
     196             :         // The default value (DefaultCompression) uses snappy compression.
     197             :         Compression block.Compression
     198             : 
     199             :         // FilterPolicy defines a filter algorithm (such as a Bloom filter) that can
     200             :         // reduce disk reads for Get calls.
     201             :         //
     202             :         // One such implementation is bloom.FilterPolicy(10) from the pebble/bloom
     203             :         // package.
     204             :         //
     205             :         // The default value means to use no filter.
     206             :         FilterPolicy FilterPolicy
     207             : 
     208             :         // FilterType defines whether an existing filter policy is applied at a
     209             :         // block-level or table-level. Block-level filters use less memory to create,
     210             :         // but are slower to access as a check for the key in the index must first be
     211             :         // performed to locate the filter block. A table-level filter will require
     212             :         // memory proportional to the number of keys in an sstable to create, but
     213             :         // avoids the index lookup when determining if a key is present. Table-level
     214             :         // filters should be preferred except under constrained memory situations.
     215             :         FilterType FilterType
     216             : 
     217             :         // IndexBlockSize is the target uncompressed size in bytes of each index
     218             :         // block. When the index block size is larger than this target, two-level
     219             :         // indexes are automatically enabled. Setting this option to a large value
     220             :         // (such as math.MaxInt32) disables the automatic creation of two-level
     221             :         // indexes.
     222             :         //
     223             :         // The default value is the value of BlockSize.
     224             :         IndexBlockSize int
     225             : 
     226             :         // KeySchema describes the schema to use for sstable formats that make use
     227             :         // of columnar blocks, decomposing keys into their constituent components.
     228             :         // Ignored if TableFormat <= TableFormatPebblev4.
     229             :         KeySchema *colblk.KeySchema
     230             : 
     231             :         // Merger defines the associative merge operation to use for merging values
     232             :         // written with {Batch,DB}.Merge. The MergerName is checked for consistency
     233             :         // with the value stored in the sstable when it was written.
     234             :         MergerName string
     235             : 
     236             :         // TableFormat specifies the format version for writing sstables. The default
     237             :         // is TableFormatMinSupported.
     238             :         TableFormat TableFormat
     239             : 
     240             :         // IsStrictObsolete is only relevant for >= TableFormatPebblev4. See comment
     241             :         // in format.go. Must be false if format < TableFormatPebblev4.
     242             :         //
     243             :         // TODO(bilal): set this when writing shared ssts.
     244             :         IsStrictObsolete bool
     245             : 
     246             :         // WritingToLowestLevel is only relevant for >= TableFormatPebblev4. It is
     247             :         // used to set the obsolete bit on DEL/DELSIZED/SINGLEDEL if they are the
     248             :         // youngest for a userkey.
     249             :         WritingToLowestLevel bool
     250             : 
     251             :         // BlockPropertyCollectors is a list of BlockPropertyCollector creation
     252             :         // functions. A new BlockPropertyCollector is created for each sstable
     253             :         // built and lives for the lifetime of writing that table.
     254             :         BlockPropertyCollectors []func() BlockPropertyCollector
     255             : 
     256             :         // Checksum specifies which checksum to use.
     257             :         Checksum block.ChecksumType
     258             : 
     259             :         // Parallelism is used to indicate that the sstable Writer is allowed to
     260             :         // compress data blocks and write datablocks to disk in parallel with the
     261             :         // Writer client goroutine.
     262             :         Parallelism bool
     263             : 
     264             :         // ShortAttributeExtractor mirrors
     265             :         // Options.Experimental.ShortAttributeExtractor.
     266             :         ShortAttributeExtractor base.ShortAttributeExtractor
     267             : 
     268             :         // RequiredInPlaceValueBound mirrors
     269             :         // Options.Experimental.RequiredInPlaceValueBound.
     270             :         RequiredInPlaceValueBound UserKeyPrefixBound
     271             : 
     272             :         // DisableValueBlocks is only used for TableFormat >= TableFormatPebblev3,
     273             :         // and if set to true, does not write any values to value blocks. This is
     274             :         // only intended for cases where the in-memory buffering of all value blocks
     275             :         // while writing a sstable is too expensive and likely to cause an OOM. It
     276             :         // is never set to true by a Pebble DB, and can be set to true when some
     277             :         // external code is directly generating huge sstables using Pebble's
     278             :         // sstable.Writer (for example, CockroachDB backups can sometimes write
     279             :         // 750MB sstables -- see
     280             :         // https://github.com/cockroachdb/cockroach/issues/117113).
     281             :         DisableValueBlocks bool
     282             : 
     283             :         // AllocatorSizeClasses provides a sorted list containing the supported size
     284             :         // classes of the underlying memory allocator. This provides hints to the
     285             :         // writer's flushing policy to select block sizes that preemptively reduce
     286             :         // internal fragmentation when loaded into the block cache.
     287             :         AllocatorSizeClasses []int
     288             : 
     289             :         // internal options can only be used from within the pebble package.
     290             :         internal sstableinternal.WriterOptions
     291             : 
     292             :         // NumDeletionsThreshold mirrors Options.Experimental.NumDeletionsThreshold.
     293             :         NumDeletionsThreshold int
     294             : 
     295             :         // DeletionSizeRatioThreshold mirrors
     296             :         // Options.Experimental.DeletionSizeRatioThreshold.
     297             :         DeletionSizeRatioThreshold float32
     298             : 
     299             :         // disableObsoleteCollector is used to disable the obsolete key block property
     300             :         // collector automatically added by sstable block writers.
     301             :         disableObsoleteCollector bool
     302             : }
     303             : 
     304             : // UserKeyPrefixBound represents a [Lower,Upper) bound of user key prefixes.
     305             : // If both are nil, there is no bound specified. Else, Compare(Lower,Upper)
     306             : // must be < 0.
     307             : type UserKeyPrefixBound struct {
     308             :         // Lower is a lower bound user key prefix.
     309             :         Lower []byte
     310             :         // Upper is an upper bound user key prefix.
     311             :         Upper []byte
     312             : }
     313             : 
     314             : // IsEmpty returns true iff the bound is empty.
     315           1 : func (ukb *UserKeyPrefixBound) IsEmpty() bool {
     316           1 :         return len(ukb.Lower) == 0 && len(ukb.Upper) == 0
     317           1 : }
     318             : 
     319             : // JemallocSizeClasses are a subset of available size classes in jemalloc[1],
     320             : // suitable for the AllocatorSizeClasses option.
     321             : //
     322             : // The size classes are used when writing sstables for determining target block
     323             : // sizes for flushes, with the goal of reducing internal memory fragmentation
     324             : // when the blocks are later loaded into the block cache. We only use the size
     325             : // classes between 16KiB - 256KiB as block limits fall in that range.
     326             : //
     327             : // [1] https://jemalloc.net/jemalloc.3.html#size_classes
     328             : var JemallocSizeClasses = []int{
     329             :         16 * 1024,
     330             :         20 * 1024, 24 * 1024, 28 * 1024, 32 * 1024, // 4KiB spacing
     331             :         40 * 1024, 48 * 1024, 56 * 1024, 64 * 1024, // 8KiB spacing
     332             :         80 * 1024, 96 * 1024, 112 * 1024, 128 * 1024, // 16KiB spacing.
     333             :         160 * 1024, 192 * 1024, 224 * 1024, 256 * 1024, // 32KiB spacing.
     334             :         320 * 1024,
     335             : }
     336             : 
     337             : // SetInternal sets the internal writer options. Note that even though this
     338             : // method is public, a caller outside the pebble package can't construct a value
     339             : // to pass to it.
     340           1 : func (o *WriterOptions) SetInternal(internalOpts sstableinternal.WriterOptions) {
     341           1 :         o.internal = internalOpts
     342           1 : }
     343             : 
     344           1 : func (o WriterOptions) ensureDefaults() WriterOptions {
     345           1 :         if o.BlockRestartInterval <= 0 {
     346           1 :                 o.BlockRestartInterval = base.DefaultBlockRestartInterval
     347           1 :         }
     348           1 :         if o.BlockSize <= 0 {
     349           1 :                 o.BlockSize = base.DefaultBlockSize
     350           1 :         }
     351           1 :         if o.BlockSizeThreshold <= 0 {
     352           1 :                 o.BlockSizeThreshold = base.DefaultBlockSizeThreshold
     353           1 :         }
     354           1 :         if o.SizeClassAwareThreshold <= 0 {
     355           1 :                 o.SizeClassAwareThreshold = base.SizeClassAwareBlockSizeThreshold
     356           1 :         }
     357           1 :         if o.Comparer == nil {
     358           1 :                 o.Comparer = base.DefaultComparer
     359           1 :         }
     360           1 :         if o.Compression <= block.DefaultCompression || o.Compression >= block.NCompression {
     361           1 :                 o.Compression = block.SnappyCompression
     362           1 :         }
     363           1 :         if o.IndexBlockSize <= 0 {
     364           1 :                 o.IndexBlockSize = o.BlockSize
     365           1 :         }
     366           1 :         if o.MergerName == "" {
     367           1 :                 o.MergerName = base.DefaultMerger.Name
     368           1 :         }
     369           1 :         if o.Checksum == block.ChecksumTypeNone {
     370           1 :                 o.Checksum = block.ChecksumTypeCRC32c
     371           1 :         }
     372             :         // By default, if the table format is not specified, fall back to using the
     373             :         // most compatible format that is supported by Pebble.
     374           1 :         if o.TableFormat == TableFormatUnspecified {
     375           1 :                 o.TableFormat = TableFormatMinSupported
     376           1 :         }
     377           1 :         if o.NumDeletionsThreshold == 0 {
     378           1 :                 o.NumDeletionsThreshold = DefaultNumDeletionsThreshold
     379           1 :         }
     380           1 :         if o.DeletionSizeRatioThreshold == 0 {
     381           1 :                 o.DeletionSizeRatioThreshold = DefaultDeletionSizeRatioThreshold
     382           1 :         }
     383           1 :         if o.KeySchema == nil && o.TableFormat.BlockColumnar() {
     384           1 :                 s := colblk.DefaultKeySchema(o.Comparer, 16 /* bundle size */)
     385           1 :                 o.KeySchema = &s
     386           1 :         }
     387           1 :         return o
     388             : }

Generated by: LCOV version 1.14