Line data Source code
1 : // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 : // of this source code is governed by a BSD-style license that can be found in 3 : // the LICENSE file. 4 : 5 : package sstable 6 : 7 : import ( 8 : "github.com/cockroachdb/fifo" 9 : "github.com/cockroachdb/pebble/internal/base" 10 : "github.com/cockroachdb/pebble/internal/sstableinternal" 11 : "github.com/cockroachdb/pebble/sstable/block" 12 : "github.com/cockroachdb/pebble/sstable/rowblk" 13 : ) 14 : 15 : const ( 16 : // MaximumBlockSize is the maximum permissible size of a block. 17 : MaximumBlockSize = rowblk.MaximumSize 18 : // DefaultNumDeletionsThreshold defines the minimum number of point 19 : // tombstones that must be present in a data block for it to be 20 : // considered tombstone-dense. 21 : DefaultNumDeletionsThreshold = 100 22 : // DefaultDeletionSizeRatioThreshold defines the minimum ratio of the size 23 : // of point tombstones to the size of the data block in order to consider the 24 : // block as tombstone-dense. 25 : DefaultDeletionSizeRatioThreshold = 0.5 26 : ) 27 : 28 : var ignoredInternalProperties = map[string]struct{}{ 29 : "rocksdb.column.family.id": {}, 30 : "rocksdb.fixed.key.length": {}, 31 : "rocksdb.index.key.is.user.key": {}, 32 : "rocksdb.index.value.is.delta.encoded": {}, 33 : "rocksdb.oldest.key.time": {}, 34 : "rocksdb.creation.time": {}, 35 : "rocksdb.file.creation.time": {}, 36 : "rocksdb.format.version": {}, 37 : } 38 : 39 : // FilterType exports the base.FilterType type. 40 : type FilterType = base.FilterType 41 : 42 : // Exported TableFilter constants. 43 : const ( 44 : TableFilter = base.TableFilter 45 : ) 46 : 47 : // FilterWriter exports the base.FilterWriter type. 48 : type FilterWriter = base.FilterWriter 49 : 50 : // FilterPolicy exports the base.FilterPolicy type. 51 : type FilterPolicy = base.FilterPolicy 52 : 53 : // Comparers is a map from comparer name to comparer. It is used for debugging 54 : // tools which may be used on multiple databases configured with different 55 : // comparers. 56 : type Comparers map[string]*base.Comparer 57 : 58 : // Mergers is a map from merger name to merger. It is used for debugging tools 59 : // which may be used on multiple databases configured with different 60 : // mergers. 61 : type Mergers map[string]*base.Merger 62 : 63 : // ReaderOptions holds the parameters needed for reading an sstable. 64 : type ReaderOptions struct { 65 : // LoadBlockSema, if set, is used to limit the number of blocks that can be 66 : // loaded (i.e. read from the filesystem) in parallel. Each load acquires one 67 : // unit from the semaphore for the duration of the read. 68 : LoadBlockSema *fifo.Semaphore 69 : 70 : // User properties specified in this map will not be added to sst.Properties.UserProperties. 71 : DeniedUserProperties map[string]struct{} 72 : 73 : // Comparer defines a total ordering over the space of []byte keys: a 'less 74 : // than' relationship. The same comparison algorithm must be used for reads 75 : // and writes over the lifetime of the DB. 76 : // 77 : // The default value uses the same ordering as bytes.Compare. 78 : Comparer *Comparer 79 : 80 : // Merger defines the Merge function in use for this keyspace. 81 : Merger *Merger 82 : 83 : Comparers Comparers 84 : Mergers Mergers 85 : 86 : // Filters is a map from filter policy name to filter policy. Filters with 87 : // policies that are not in this map will be ignored. 88 : Filters map[string]FilterPolicy 89 : 90 : // Logger is an optional logger and tracer. 91 : LoggerAndTracer base.LoggerAndTracer 92 : 93 : // FilterMetricsTracker is optionally used to track filter metrics. 94 : FilterMetricsTracker *FilterMetricsTracker 95 : 96 : // internal options can only be used from within the pebble package. 97 : internal sstableinternal.ReaderOptions 98 : } 99 : 100 : // SetInternal sets the internal reader options. Note that even though this 101 : // method is public, a caller outside the pebble package can't construct a value 102 : // to pass to it. 103 0 : func (o *ReaderOptions) SetInternal(internalOpts sstableinternal.ReaderOptions) { 104 0 : o.internal = internalOpts 105 0 : } 106 : 107 : // SetInternalCacheOpts sets the internal cache options. Note that even though 108 : // this method is public, a caller outside the pebble package can't construct a 109 : // value to pass to it. 110 1 : func (o *ReaderOptions) SetInternalCacheOpts(cacheOpts sstableinternal.CacheOptions) { 111 1 : o.internal.CacheOpts = cacheOpts 112 1 : } 113 : 114 1 : func (o ReaderOptions) ensureDefaults() ReaderOptions { 115 1 : if o.Comparer == nil { 116 0 : o.Comparer = base.DefaultComparer 117 0 : } 118 1 : if o.Merger == nil { 119 1 : o.Merger = base.DefaultMerger 120 1 : } 121 1 : if o.LoggerAndTracer == nil { 122 1 : o.LoggerAndTracer = base.NoopLoggerAndTracer{} 123 1 : } 124 1 : if o.DeniedUserProperties == nil { 125 1 : o.DeniedUserProperties = ignoredInternalProperties 126 1 : } 127 1 : return o 128 : } 129 : 130 : // WriterOptions holds the parameters used to control building an sstable. 131 : type WriterOptions struct { 132 : // BlockRestartInterval is the number of keys between restart points 133 : // for delta encoding of keys. 134 : // 135 : // The default value is 16. 136 : BlockRestartInterval int 137 : 138 : // BlockSize is the target uncompressed size in bytes of each table block. 139 : // 140 : // The default value is 4096. 141 : BlockSize int 142 : 143 : // BlockSizeThreshold finishes a block if the block size is larger than the 144 : // specified percentage of the target block size and adding the next entry 145 : // would cause the block to be larger than the target block size. 146 : // 147 : // The default value is 90. 148 : BlockSizeThreshold int 149 : 150 : // SizeClassAwareThreshold imposes a minimum block size restriction for blocks 151 : // to be flushed, that is computed as the percentage of the target block size. 152 : // Note that this threshold takes precedence over BlockSizeThreshold when 153 : // valid AllocatorSizeClasses are specified. 154 : // 155 : // The default value is 60. 156 : SizeClassAwareThreshold int 157 : 158 : // Comparer defines a total ordering over the space of []byte keys: a 'less 159 : // than' relationship. The same comparison algorithm must be used for reads 160 : // and writes over the lifetime of the DB. 161 : // 162 : // The default value uses the same ordering as bytes.Compare. 163 : Comparer *Comparer 164 : 165 : // Compression defines the per-block compression to use. 166 : // 167 : // The default value (DefaultCompression) uses snappy compression. 168 : Compression block.Compression 169 : 170 : // FilterPolicy defines a filter algorithm (such as a Bloom filter) that can 171 : // reduce disk reads for Get calls. 172 : // 173 : // One such implementation is bloom.FilterPolicy(10) from the pebble/bloom 174 : // package. 175 : // 176 : // The default value means to use no filter. 177 : FilterPolicy FilterPolicy 178 : 179 : // FilterType defines whether an existing filter policy is applied at a 180 : // block-level or table-level. Block-level filters use less memory to create, 181 : // but are slower to access as a check for the key in the index must first be 182 : // performed to locate the filter block. A table-level filter will require 183 : // memory proportional to the number of keys in an sstable to create, but 184 : // avoids the index lookup when determining if a key is present. Table-level 185 : // filters should be preferred except under constrained memory situations. 186 : FilterType FilterType 187 : 188 : // IndexBlockSize is the target uncompressed size in bytes of each index 189 : // block. When the index block size is larger than this target, two-level 190 : // indexes are automatically enabled. Setting this option to a large value 191 : // (such as math.MaxInt32) disables the automatic creation of two-level 192 : // indexes. 193 : // 194 : // The default value is the value of BlockSize. 195 : IndexBlockSize int 196 : 197 : // Merger defines the associative merge operation to use for merging values 198 : // written with {Batch,DB}.Merge. The MergerName is checked for consistency 199 : // with the value stored in the sstable when it was written. 200 : MergerName string 201 : 202 : // TableFormat specifies the format version for writing sstables. The default 203 : // is TableFormatMinSupported. 204 : TableFormat TableFormat 205 : 206 : // IsStrictObsolete is only relevant for >= TableFormatPebblev4. See comment 207 : // in format.go. Must be false if format < TableFormatPebblev4. 208 : // 209 : // TODO(bilal): set this when writing shared ssts. 210 : IsStrictObsolete bool 211 : 212 : // WritingToLowestLevel is only relevant for >= TableFormatPebblev4. It is 213 : // used to set the obsolete bit on DEL/DELSIZED/SINGLEDEL if they are the 214 : // youngest for a userkey. 215 : WritingToLowestLevel bool 216 : 217 : // BlockPropertyCollectors is a list of BlockPropertyCollector creation 218 : // functions. A new BlockPropertyCollector is created for each sstable 219 : // built and lives for the lifetime of writing that table. 220 : BlockPropertyCollectors []func() BlockPropertyCollector 221 : 222 : // Checksum specifies which checksum to use. 223 : Checksum block.ChecksumType 224 : 225 : // Parallelism is used to indicate that the sstable Writer is allowed to 226 : // compress data blocks and write datablocks to disk in parallel with the 227 : // Writer client goroutine. 228 : Parallelism bool 229 : 230 : // ShortAttributeExtractor mirrors 231 : // Options.Experimental.ShortAttributeExtractor. 232 : ShortAttributeExtractor base.ShortAttributeExtractor 233 : 234 : // RequiredInPlaceValueBound mirrors 235 : // Options.Experimental.RequiredInPlaceValueBound. 236 : RequiredInPlaceValueBound UserKeyPrefixBound 237 : 238 : // DisableValueBlocks is only used for TableFormat >= TableFormatPebblev3, 239 : // and if set to true, does not write any values to value blocks. This is 240 : // only intended for cases where the in-memory buffering of all value blocks 241 : // while writing a sstable is too expensive and likely to cause an OOM. It 242 : // is never set to true by a Pebble DB, and can be set to true when some 243 : // external code is directly generating huge sstables using Pebble's 244 : // sstable.Writer (for example, CockroachDB backups can sometimes write 245 : // 750MB sstables -- see 246 : // https://github.com/cockroachdb/cockroach/issues/117113). 247 : DisableValueBlocks bool 248 : 249 : // AllocatorSizeClasses provides a sorted list containing the supported size 250 : // classes of the underlying memory allocator. This provides hints to the 251 : // writer's flushing policy to select block sizes that preemptively reduce 252 : // internal fragmentation when loaded into the block cache. 253 : AllocatorSizeClasses []int 254 : 255 : // internal options can only be used from within the pebble package. 256 : internal sstableinternal.WriterOptions 257 : 258 : // NumDeletionsThreshold mirrors Options.Experimental.NumDeletionsThreshold. 259 : NumDeletionsThreshold int 260 : 261 : // DeletionSizeRatioThreshold mirrors 262 : // Options.Experimental.DeletionSizeRatioThreshold. 263 : DeletionSizeRatioThreshold float32 264 : } 265 : 266 : // SetInternal sets the internal writer options. Note that even though this 267 : // method is public, a caller outside the pebble package can't construct a value 268 : // to pass to it. 269 1 : func (o *WriterOptions) SetInternal(internalOpts sstableinternal.WriterOptions) { 270 1 : o.internal = internalOpts 271 1 : } 272 : 273 1 : func (o WriterOptions) ensureDefaults() WriterOptions { 274 1 : if o.BlockRestartInterval <= 0 { 275 0 : o.BlockRestartInterval = base.DefaultBlockRestartInterval 276 0 : } 277 1 : if o.BlockSize <= 0 { 278 0 : o.BlockSize = base.DefaultBlockSize 279 0 : } 280 1 : if o.BlockSizeThreshold <= 0 { 281 0 : o.BlockSizeThreshold = base.DefaultBlockSizeThreshold 282 0 : } 283 1 : if o.SizeClassAwareThreshold <= 0 { 284 1 : o.SizeClassAwareThreshold = base.SizeClassAwareBlockSizeThreshold 285 1 : } 286 1 : if o.Comparer == nil { 287 0 : o.Comparer = base.DefaultComparer 288 0 : } 289 1 : if o.Compression <= block.DefaultCompression || o.Compression >= block.NCompression { 290 0 : o.Compression = block.SnappyCompression 291 0 : } 292 1 : if o.IndexBlockSize <= 0 { 293 0 : o.IndexBlockSize = o.BlockSize 294 0 : } 295 1 : if o.MergerName == "" { 296 0 : o.MergerName = base.DefaultMerger.Name 297 0 : } 298 1 : if o.Checksum == block.ChecksumTypeNone { 299 1 : o.Checksum = block.ChecksumTypeCRC32c 300 1 : } 301 : // By default, if the table format is not specified, fall back to using the 302 : // most compatible format that is supported by Pebble. 303 1 : if o.TableFormat == TableFormatUnspecified { 304 0 : o.TableFormat = TableFormatMinSupported 305 0 : } 306 1 : if o.NumDeletionsThreshold == 0 { 307 0 : o.NumDeletionsThreshold = DefaultNumDeletionsThreshold 308 0 : } 309 1 : if o.DeletionSizeRatioThreshold == 0 { 310 0 : o.DeletionSizeRatioThreshold = DefaultDeletionSizeRatioThreshold 311 0 : } 312 1 : return o 313 : }