Line data Source code
1 : // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 : // of this source code is governed by a BSD-style license that can be found in 3 : // the LICENSE file. 4 : 5 : package sstable 6 : 7 : import ( 8 : "github.com/cockroachdb/pebble/internal/base" 9 : "github.com/cockroachdb/pebble/internal/cache" 10 : ) 11 : 12 : // Compression is the per-block compression algorithm to use. 13 : type Compression int 14 : 15 : // The available compression types. 16 : const ( 17 : DefaultCompression Compression = iota 18 : NoCompression 19 : SnappyCompression 20 : ZstdCompression 21 : NCompression 22 : ) 23 : 24 : var ignoredInternalProperties = map[string]struct{}{ 25 : "rocksdb.column.family.id": {}, 26 : "rocksdb.fixed.key.length": {}, 27 : "rocksdb.index.key.is.user.key": {}, 28 : "rocksdb.index.value.is.delta.encoded": {}, 29 : "rocksdb.oldest.key.time": {}, 30 : "rocksdb.creation.time": {}, 31 : "rocksdb.file.creation.time": {}, 32 : "rocksdb.format.version": {}, 33 : } 34 : 35 1 : func (c Compression) String() string { 36 1 : switch c { 37 0 : case DefaultCompression: 38 0 : return "Default" 39 1 : case NoCompression: 40 1 : return "NoCompression" 41 1 : case SnappyCompression: 42 1 : return "Snappy" 43 1 : case ZstdCompression: 44 1 : return "ZSTD" 45 0 : default: 46 0 : return "Unknown" 47 : } 48 : } 49 : 50 : // CompressionFromString returns an sstable.Compression from its 51 : // string representation. Inverse of c.String() above. 52 1 : func CompressionFromString(s string) Compression { 53 1 : switch s { 54 0 : case "Default": 55 0 : return DefaultCompression 56 1 : case "NoCompression": 57 1 : return NoCompression 58 1 : case "Snappy": 59 1 : return SnappyCompression 60 1 : case "ZSTD": 61 1 : return ZstdCompression 62 1 : default: 63 1 : return DefaultCompression 64 : } 65 : } 66 : 67 : // FilterType exports the base.FilterType type. 68 : type FilterType = base.FilterType 69 : 70 : // Exported TableFilter constants. 71 : const ( 72 : TableFilter = base.TableFilter 73 : ) 74 : 75 : // FilterWriter exports the base.FilterWriter type. 76 : type FilterWriter = base.FilterWriter 77 : 78 : // FilterPolicy exports the base.FilterPolicy type. 79 : type FilterPolicy = base.FilterPolicy 80 : 81 : // ReaderOptions holds the parameters needed for reading an sstable. 82 : type ReaderOptions struct { 83 : // Cache is used to cache uncompressed blocks from sstables. 84 : // 85 : // The default cache size is a zero-size cache. 86 : Cache *cache.Cache 87 : 88 : // User properties specified in this map will not be added to sst.Properties.UserProperties. 89 : DeniedUserProperties map[string]struct{} 90 : 91 : // Comparer defines a total ordering over the space of []byte keys: a 'less 92 : // than' relationship. The same comparison algorithm must be used for reads 93 : // and writes over the lifetime of the DB. 94 : // 95 : // The default value uses the same ordering as bytes.Compare. 96 : Comparer *Comparer 97 : 98 : // Merge defines the Merge function in use for this keyspace. 99 : Merge base.Merge 100 : 101 : // Filters is a map from filter policy name to filter policy. Filters with 102 : // policies that are not in this map will be ignored. 103 : Filters map[string]FilterPolicy 104 : 105 : // Merger defines the associative merge operation to use for merging values 106 : // written with {Batch,DB}.Merge. The MergerName is checked for consistency 107 : // with the value stored in the sstable when it was written. 108 : MergerName string 109 : 110 : // Logger is an optional logger and tracer. 111 : LoggerAndTracer base.LoggerAndTracer 112 : } 113 : 114 1 : func (o ReaderOptions) ensureDefaults() ReaderOptions { 115 1 : if o.Comparer == nil { 116 0 : o.Comparer = base.DefaultComparer 117 0 : } 118 1 : if o.Merge == nil { 119 1 : o.Merge = base.DefaultMerger.Merge 120 1 : } 121 1 : if o.MergerName == "" { 122 1 : o.MergerName = base.DefaultMerger.Name 123 1 : } 124 1 : if o.LoggerAndTracer == nil { 125 1 : o.LoggerAndTracer = base.NoopLoggerAndTracer{} 126 1 : } 127 1 : if o.DeniedUserProperties == nil { 128 1 : o.DeniedUserProperties = ignoredInternalProperties 129 1 : } 130 1 : return o 131 : } 132 : 133 : // WriterOptions holds the parameters used to control building an sstable. 134 : type WriterOptions struct { 135 : // BlockRestartInterval is the number of keys between restart points 136 : // for delta encoding of keys. 137 : // 138 : // The default value is 16. 139 : BlockRestartInterval int 140 : 141 : // BlockSize is the target uncompressed size in bytes of each table block. 142 : // 143 : // The default value is 4096. 144 : BlockSize int 145 : 146 : // BlockSizeThreshold finishes a block if the block size is larger than the 147 : // specified percentage of the target block size and adding the next entry 148 : // would cause the block to be larger than the target block size. 149 : // 150 : // The default value is 90. 151 : BlockSizeThreshold int 152 : 153 : // SizeClassAwareThreshold imposes a minimum block size restriction for blocks 154 : // to be flushed, that is computed as the percentage of the target block size. 155 : // Note that this threshold takes precedence over BlockSizeThreshold when 156 : // valid AllocatorSizeClasses are specified. 157 : // 158 : // The default value is 60. 159 : SizeClassAwareThreshold int 160 : 161 : // Cache is used to cache uncompressed blocks from sstables. 162 : // 163 : // The default is a nil cache. 164 : Cache *cache.Cache 165 : 166 : // Comparer defines a total ordering over the space of []byte keys: a 'less 167 : // than' relationship. The same comparison algorithm must be used for reads 168 : // and writes over the lifetime of the DB. 169 : // 170 : // The default value uses the same ordering as bytes.Compare. 171 : Comparer *Comparer 172 : 173 : // Compression defines the per-block compression to use. 174 : // 175 : // The default value (DefaultCompression) uses snappy compression. 176 : Compression Compression 177 : 178 : // FilterPolicy defines a filter algorithm (such as a Bloom filter) that can 179 : // reduce disk reads for Get calls. 180 : // 181 : // One such implementation is bloom.FilterPolicy(10) from the pebble/bloom 182 : // package. 183 : // 184 : // The default value means to use no filter. 185 : FilterPolicy FilterPolicy 186 : 187 : // FilterType defines whether an existing filter policy is applied at a 188 : // block-level or table-level. Block-level filters use less memory to create, 189 : // but are slower to access as a check for the key in the index must first be 190 : // performed to locate the filter block. A table-level filter will require 191 : // memory proportional to the number of keys in an sstable to create, but 192 : // avoids the index lookup when determining if a key is present. Table-level 193 : // filters should be preferred except under constrained memory situations. 194 : FilterType FilterType 195 : 196 : // IndexBlockSize is the target uncompressed size in bytes of each index 197 : // block. When the index block size is larger than this target, two-level 198 : // indexes are automatically enabled. Setting this option to a large value 199 : // (such as math.MaxInt32) disables the automatic creation of two-level 200 : // indexes. 201 : // 202 : // The default value is the value of BlockSize. 203 : IndexBlockSize int 204 : 205 : // Merger defines the associative merge operation to use for merging values 206 : // written with {Batch,DB}.Merge. The MergerName is checked for consistency 207 : // with the value stored in the sstable when it was written. 208 : MergerName string 209 : 210 : // TableFormat specifies the format version for writing sstables. The default 211 : // is TableFormatMinSupported. 212 : TableFormat TableFormat 213 : 214 : // IsStrictObsolete is only relevant for >= TableFormatPebblev4. See comment 215 : // in format.go. Must be false if format < TableFormatPebblev4. 216 : // 217 : // TODO(bilal): set this when writing shared ssts. 218 : IsStrictObsolete bool 219 : 220 : // WritingToLowestLevel is only relevant for >= TableFormatPebblev4. It is 221 : // used to set the obsolete bit on DEL/DELSIZED/SINGLEDEL if they are the 222 : // youngest for a userkey. 223 : WritingToLowestLevel bool 224 : 225 : // BlockPropertyCollectors is a list of BlockPropertyCollector creation 226 : // functions. A new BlockPropertyCollector is created for each sstable 227 : // built and lives for the lifetime of writing that table. 228 : BlockPropertyCollectors []func() BlockPropertyCollector 229 : 230 : // Checksum specifies which checksum to use. 231 : Checksum ChecksumType 232 : 233 : // Parallelism is used to indicate that the sstable Writer is allowed to 234 : // compress data blocks and write datablocks to disk in parallel with the 235 : // Writer client goroutine. 236 : Parallelism bool 237 : 238 : // ShortAttributeExtractor mirrors 239 : // Options.Experimental.ShortAttributeExtractor. 240 : ShortAttributeExtractor base.ShortAttributeExtractor 241 : 242 : // RequiredInPlaceValueBound mirrors 243 : // Options.Experimental.RequiredInPlaceValueBound. 244 : RequiredInPlaceValueBound UserKeyPrefixBound 245 : 246 : // DisableValueBlocks is only used for TableFormat >= TableFormatPebblev3, 247 : // and if set to true, does not write any values to value blocks. This is 248 : // only intended for cases where the in-memory buffering of all value blocks 249 : // while writing a sstable is too expensive and likely to cause an OOM. It 250 : // is never set to true by a Pebble DB, and can be set to true when some 251 : // external code is directly generating huge sstables using Pebble's 252 : // sstable.Writer (for example, CockroachDB backups can sometimes write 253 : // 750MB sstables -- see 254 : // https://github.com/cockroachdb/cockroach/issues/117113). 255 : DisableValueBlocks bool 256 : 257 : // AllocatorSizeClasses provides a sorted list containing the supported size 258 : // classes of the underlying memory allocator. This provides hints to the 259 : // writer's flushing policy to select block sizes that preemptively reduce 260 : // internal fragmentation when loaded into the block cache. 261 : AllocatorSizeClasses []int 262 : } 263 : 264 1 : func (o WriterOptions) ensureDefaults() WriterOptions { 265 1 : if o.BlockRestartInterval <= 0 { 266 0 : o.BlockRestartInterval = base.DefaultBlockRestartInterval 267 0 : } 268 1 : if o.BlockSize <= 0 { 269 0 : o.BlockSize = base.DefaultBlockSize 270 0 : } 271 1 : if o.BlockSizeThreshold <= 0 { 272 0 : o.BlockSizeThreshold = base.DefaultBlockSizeThreshold 273 0 : } 274 1 : if o.SizeClassAwareThreshold <= 0 { 275 1 : o.SizeClassAwareThreshold = base.SizeClassAwareBlockSizeThreshold 276 1 : } 277 1 : if o.Comparer == nil { 278 0 : o.Comparer = base.DefaultComparer 279 0 : } 280 1 : if o.Compression <= DefaultCompression || o.Compression >= NCompression { 281 0 : o.Compression = SnappyCompression 282 0 : } 283 1 : if o.IndexBlockSize <= 0 { 284 0 : o.IndexBlockSize = o.BlockSize 285 0 : } 286 1 : if o.MergerName == "" { 287 0 : o.MergerName = base.DefaultMerger.Name 288 0 : } 289 1 : if o.Checksum == ChecksumTypeNone { 290 1 : o.Checksum = ChecksumTypeCRC32c 291 1 : } 292 : // By default, if the table format is not specified, fall back to using the 293 : // most compatible format that is supported by Pebble. 294 1 : if o.TableFormat == TableFormatUnspecified { 295 0 : o.TableFormat = TableFormatMinSupported 296 0 : } 297 1 : return o 298 : }