Line data Source code
1 : // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 : // of this source code is governed by a BSD-style license that can be found in 3 : // the LICENSE file. 4 : 5 : package sstable 6 : 7 : import ( 8 : "github.com/cockroachdb/pebble/internal/base" 9 : "github.com/cockroachdb/pebble/internal/cache" 10 : ) 11 : 12 : // Compression is the per-block compression algorithm to use. 13 : type Compression int 14 : 15 : // The available compression types. 16 : const ( 17 : DefaultCompression Compression = iota 18 : NoCompression 19 : SnappyCompression 20 : ZstdCompression 21 : NCompression 22 : ) 23 : 24 : var ignoredInternalProperties = map[string]struct{}{ 25 : "rocksdb.column.family.id": {}, 26 : "rocksdb.fixed.key.length": {}, 27 : "rocksdb.index.key.is.user.key": {}, 28 : "rocksdb.index.value.is.delta.encoded": {}, 29 : "rocksdb.oldest.key.time": {}, 30 : "rocksdb.creation.time": {}, 31 : "rocksdb.file.creation.time": {}, 32 : "rocksdb.format.version": {}, 33 : } 34 : 35 1 : func (c Compression) String() string { 36 1 : switch c { 37 0 : case DefaultCompression: 38 0 : return "Default" 39 1 : case NoCompression: 40 1 : return "NoCompression" 41 1 : case SnappyCompression: 42 1 : return "Snappy" 43 1 : case ZstdCompression: 44 1 : return "ZSTD" 45 0 : default: 46 0 : return "Unknown" 47 : } 48 : } 49 : 50 : // FilterType exports the base.FilterType type. 51 : type FilterType = base.FilterType 52 : 53 : // Exported TableFilter constants. 54 : const ( 55 : TableFilter = base.TableFilter 56 : ) 57 : 58 : // FilterWriter exports the base.FilterWriter type. 59 : type FilterWriter = base.FilterWriter 60 : 61 : // FilterPolicy exports the base.FilterPolicy type. 62 : type FilterPolicy = base.FilterPolicy 63 : 64 : // ReaderOptions holds the parameters needed for reading an sstable. 65 : type ReaderOptions struct { 66 : // Cache is used to cache uncompressed blocks from sstables. 67 : // 68 : // The default cache size is a zero-size cache. 69 : Cache *cache.Cache 70 : 71 : // User properties specified in this map will not be added to sst.Properties.UserProperties. 72 : DeniedUserProperties map[string]struct{} 73 : 74 : // Comparer defines a total ordering over the space of []byte keys: a 'less 75 : // than' relationship. The same comparison algorithm must be used for reads 76 : // and writes over the lifetime of the DB. 77 : // 78 : // The default value uses the same ordering as bytes.Compare. 79 : Comparer *Comparer 80 : 81 : // Merge defines the Merge function in use for this keyspace. 82 : Merge base.Merge 83 : 84 : // Filters is a map from filter policy name to filter policy. It is used for 85 : // debugging tools which may be used on multiple databases configured with 86 : // different filter policies. It is not necessary to populate this filters 87 : // map during normal usage of a DB. 88 : Filters map[string]FilterPolicy 89 : 90 : // Merger defines the associative merge operation to use for merging values 91 : // written with {Batch,DB}.Merge. The MergerName is checked for consistency 92 : // with the value stored in the sstable when it was written. 93 : MergerName string 94 : 95 : // Logger is an optional logger and tracer. 96 : LoggerAndTracer base.LoggerAndTracer 97 : } 98 : 99 1 : func (o ReaderOptions) ensureDefaults() ReaderOptions { 100 1 : if o.Comparer == nil { 101 0 : o.Comparer = base.DefaultComparer 102 0 : } 103 1 : if o.Merge == nil { 104 1 : o.Merge = base.DefaultMerger.Merge 105 1 : } 106 1 : if o.MergerName == "" { 107 1 : o.MergerName = base.DefaultMerger.Name 108 1 : } 109 1 : if o.LoggerAndTracer == nil { 110 1 : o.LoggerAndTracer = base.NoopLoggerAndTracer{} 111 1 : } 112 1 : if o.DeniedUserProperties == nil { 113 1 : o.DeniedUserProperties = ignoredInternalProperties 114 1 : } 115 1 : return o 116 : } 117 : 118 : // WriterOptions holds the parameters used to control building an sstable. 119 : type WriterOptions struct { 120 : // BlockRestartInterval is the number of keys between restart points 121 : // for delta encoding of keys. 122 : // 123 : // The default value is 16. 124 : BlockRestartInterval int 125 : 126 : // BlockSize is the target uncompressed size in bytes of each table block. 127 : // 128 : // The default value is 4096. 129 : BlockSize int 130 : 131 : // BlockSizeThreshold finishes a block if the block size is larger than the 132 : // specified percentage of the target block size and adding the next entry 133 : // would cause the block to be larger than the target block size. 134 : // 135 : // The default value is 90 136 : BlockSizeThreshold int 137 : 138 : // Cache is used to cache uncompressed blocks from sstables. 139 : // 140 : // The default is a nil cache. 141 : Cache *cache.Cache 142 : 143 : // Comparer defines a total ordering over the space of []byte keys: a 'less 144 : // than' relationship. The same comparison algorithm must be used for reads 145 : // and writes over the lifetime of the DB. 146 : // 147 : // The default value uses the same ordering as bytes.Compare. 148 : Comparer *Comparer 149 : 150 : // Compression defines the per-block compression to use. 151 : // 152 : // The default value (DefaultCompression) uses snappy compression. 153 : Compression Compression 154 : 155 : // FilterPolicy defines a filter algorithm (such as a Bloom filter) that can 156 : // reduce disk reads for Get calls. 157 : // 158 : // One such implementation is bloom.FilterPolicy(10) from the pebble/bloom 159 : // package. 160 : // 161 : // The default value means to use no filter. 162 : FilterPolicy FilterPolicy 163 : 164 : // FilterType defines whether an existing filter policy is applied at a 165 : // block-level or table-level. Block-level filters use less memory to create, 166 : // but are slower to access as a check for the key in the index must first be 167 : // performed to locate the filter block. A table-level filter will require 168 : // memory proportional to the number of keys in an sstable to create, but 169 : // avoids the index lookup when determining if a key is present. Table-level 170 : // filters should be preferred except under constrained memory situations. 171 : FilterType FilterType 172 : 173 : // IndexBlockSize is the target uncompressed size in bytes of each index 174 : // block. When the index block size is larger than this target, two-level 175 : // indexes are automatically enabled. Setting this option to a large value 176 : // (such as math.MaxInt32) disables the automatic creation of two-level 177 : // indexes. 178 : // 179 : // The default value is the value of BlockSize. 180 : IndexBlockSize int 181 : 182 : // Merger defines the associative merge operation to use for merging values 183 : // written with {Batch,DB}.Merge. The MergerName is checked for consistency 184 : // with the value stored in the sstable when it was written. 185 : MergerName string 186 : 187 : // TableFormat specifies the format version for writing sstables. The default 188 : // is TableFormatMinSupported. 189 : TableFormat TableFormat 190 : 191 : // IsStrictObsolete is only relevant for >= TableFormatPebblev4. See comment 192 : // in format.go. Must be false if format < TableFormatPebblev4. 193 : // 194 : // TODO(bilal): set this when writing shared ssts. 195 : IsStrictObsolete bool 196 : 197 : // WritingToLowestLevel is only relevant for >= TableFormatPebblev4. It is 198 : // used to set the obsolete bit on DEL/DELSIZED/SINGLEDEL if they are the 199 : // youngest for a userkey. 200 : WritingToLowestLevel bool 201 : 202 : // BlockPropertyCollectors is a list of BlockPropertyCollector creation 203 : // functions. A new BlockPropertyCollector is created for each sstable 204 : // built and lives for the lifetime of writing that table. 205 : BlockPropertyCollectors []func() BlockPropertyCollector 206 : 207 : // Checksum specifies which checksum to use. 208 : Checksum ChecksumType 209 : 210 : // Parallelism is used to indicate that the sstable Writer is allowed to 211 : // compress data blocks and write datablocks to disk in parallel with the 212 : // Writer client goroutine. 213 : Parallelism bool 214 : 215 : // ShortAttributeExtractor mirrors 216 : // Options.Experimental.ShortAttributeExtractor. 217 : ShortAttributeExtractor base.ShortAttributeExtractor 218 : 219 : // RequiredInPlaceValueBound mirrors 220 : // Options.Experimental.RequiredInPlaceValueBound. 221 : RequiredInPlaceValueBound UserKeyPrefixBound 222 : 223 : // DisableValueBlocks is only used for TableFormat >= TableFormatPebblev3, 224 : // and if set to true, does not write any values to value blocks. This is 225 : // only intended for cases where the in-memory buffering of all value blocks 226 : // while writing a sstable is too expensive and likely to cause an OOM. It 227 : // is never set to true by a Pebble DB, and can be set to true when some 228 : // external code is directly generating huge sstables using Pebble's 229 : // sstable.Writer (for example, CockroachDB backups can sometimes write 230 : // 750MB sstables -- see 231 : // https://github.com/cockroachdb/cockroach/issues/117113). 232 : DisableValueBlocks bool 233 : } 234 : 235 1 : func (o WriterOptions) ensureDefaults() WriterOptions { 236 1 : if o.BlockRestartInterval <= 0 { 237 0 : o.BlockRestartInterval = base.DefaultBlockRestartInterval 238 0 : } 239 1 : if o.BlockSize <= 0 { 240 0 : o.BlockSize = base.DefaultBlockSize 241 0 : } 242 1 : if o.BlockSizeThreshold <= 0 { 243 0 : o.BlockSizeThreshold = base.DefaultBlockSizeThreshold 244 0 : } 245 1 : if o.Comparer == nil { 246 0 : o.Comparer = base.DefaultComparer 247 0 : } 248 1 : if o.Compression <= DefaultCompression || o.Compression >= NCompression { 249 0 : o.Compression = SnappyCompression 250 0 : } 251 1 : if o.IndexBlockSize <= 0 { 252 0 : o.IndexBlockSize = o.BlockSize 253 0 : } 254 1 : if o.MergerName == "" { 255 0 : o.MergerName = base.DefaultMerger.Name 256 0 : } 257 1 : if o.Checksum == ChecksumTypeNone { 258 1 : o.Checksum = ChecksumTypeCRC32c 259 1 : } 260 : // By default, if the table format is not specified, fall back to using the 261 : // most compatible format that is supported by Pebble. 262 1 : if o.TableFormat == TableFormatUnspecified { 263 0 : o.TableFormat = TableFormatMinSupported 264 0 : } 265 1 : return o 266 : }