LCOV - code coverage report
Current view: top level - pebble - format_major_version.go (source / functions) Coverage Total Hit
Test: 2025-09-11 08:19Z 34f641bd - tests only.lcov Lines: 76.7 % 270 207
Test Date: 2025-09-11 08:20:02 Functions: - 0 0

            Line data    Source code
       1              : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2              : // of this source code is governed by a BSD-style license that can be found in
       3              : // the LICENSE file.
       4              : 
       5              : package pebble
       6              : 
       7              : import (
       8              :         "fmt"
       9              :         "strconv"
      10              : 
      11              :         "github.com/cockroachdb/errors"
      12              :         "github.com/cockroachdb/pebble/internal/manifest"
      13              :         "github.com/cockroachdb/pebble/objstorage/remote"
      14              :         "github.com/cockroachdb/pebble/sstable"
      15              :         "github.com/cockroachdb/pebble/sstable/blob"
      16              :         "github.com/cockroachdb/pebble/vfs"
      17              :         "github.com/cockroachdb/pebble/vfs/atomicfs"
      18              : )
      19              : 
      20              : // FormatMajorVersion is a constant controlling the format of persisted
      21              : // data. Backwards incompatible changes to durable formats are gated
      22              : // behind new format major versions.
      23              : //
      24              : // At any point, a database's format major version may be bumped.
      25              : // However, once a database's format major version is increased,
      26              : // previous versions of Pebble will refuse to open the database.
      27              : //
      28              : // The zero value format is the FormatDefault constant. The exact
      29              : // FormatVersion that the default corresponds to may change with time.
      30              : type FormatMajorVersion uint64
      31              : 
      32              : // SafeValue implements redact.SafeValue.
      33            0 : func (v FormatMajorVersion) SafeValue() {}
      34              : 
      35              : // String implements fmt.Stringer.
      36            1 : func (v FormatMajorVersion) String() string {
      37            1 :         // NB: This must not change. It's used as the value for the on-disk
      38            1 :         // version marker file.
      39            1 :         //
      40            1 :         // Specifically, this value must always parse as a base 10 integer
      41            1 :         // that fits in a uint64. We format it as zero-padded, 3-digit
      42            1 :         // number today, but the padding may change.
      43            1 :         return fmt.Sprintf("%03d", v)
      44            1 : }
      45              : 
      46              : const (
      47              :         // FormatDefault leaves the format version unspecified. When used to create a
      48              :         // new store, Pebble will choose the earliest format version it supports.
      49              :         FormatDefault FormatMajorVersion = iota
      50              : 
      51              :         // 21.2 versions.
      52              : 
      53              :         // FormatMostCompatible maintains the most backwards compatibility,
      54              :         // maintaining bi-directional compatibility with RocksDB 6.2.1 in
      55              :         // the particular configuration described in the Pebble README.
      56              :         // Deprecated.
      57              :         _ // FormatMostCompatible
      58              : 
      59              :         // formatVersionedManifestMarker is the first
      60              :         // backwards-incompatible change made to Pebble, introducing the
      61              :         // format-version marker file for handling backwards-incompatible
      62              :         // changes more broadly, and replacing the `CURRENT` file with a
      63              :         // marker file.
      64              :         //
      65              :         // This format version is intended as an intermediary version state.
      66              :         // It is deliberately unexported to discourage direct use of this
      67              :         // format major version.  Clients should use FormatVersioned which
      68              :         // also ensures earlier versions of Pebble fail to open a database
      69              :         // written in a future format major version.
      70              :         // Deprecated.
      71              :         _ // formatVersionedManifestMarker
      72              : 
      73              :         // FormatVersioned is a new format major version that replaces the
      74              :         // old `CURRENT` file with a new 'marker' file scheme.  Previous
      75              :         // Pebble versions will be unable to open the database unless
      76              :         // they're aware of format versions.
      77              :         // Deprecated.
      78              :         _ // FormatVersioned
      79              : 
      80              :         // FormatSetWithDelete is a format major version that introduces a new key
      81              :         // kind, base.InternalKeyKindSetWithDelete. Previous Pebble versions will be
      82              :         // unable to open this database.
      83              :         // Deprecated.
      84              :         _ // FormatSetWithDelete
      85              : 
      86              :         // 22.1 versions.
      87              : 
      88              :         // FormatBlockPropertyCollector is a format major version that introduces
      89              :         // BlockPropertyCollectors.
      90              :         // Deprecated.
      91              :         _ // FormatBlockPropertyCollector
      92              : 
      93              :         // FormatSplitUserKeysMarked is a format major version that guarantees that
      94              :         // all files that share user keys with neighbors are marked for compaction
      95              :         // in the manifest. Ratcheting to FormatSplitUserKeysMarked will block
      96              :         // (without holding mutexes) until the scan of the LSM is complete and the
      97              :         // manifest has been rotated.
      98              :         // Deprecated.
      99              :         _ // FormatSplitUserKeysMarked
     100              : 
     101              :         // 22.2 versions.
     102              : 
     103              :         // FormatSplitUserKeysMarkedCompacted is a format major version that
     104              :         // guarantees that all files explicitly marked for compaction in the manifest
     105              :         // have been compacted. Combined with the FormatSplitUserKeysMarked format
     106              :         // major version, this version guarantees that there are no user keys split
     107              :         // across multiple files within a level L1+. Ratcheting to this format version
     108              :         // will block (without holding mutexes) until all necessary compactions for
     109              :         // files marked for compaction are complete.
     110              :         // Deprecated.
     111              :         _ // FormatSplitUserKeysMarkedCompacted
     112              : 
     113              :         // FormatRangeKeys is a format major version that introduces range keys.
     114              :         // Deprecated.
     115              :         _ // FormatRangeKeys
     116              : 
     117              :         // FormatMinTableFormatPebblev1 is a format major version that guarantees that
     118              :         // tables created by or ingested into the DB at or above this format major
     119              :         // version will have a table format version of at least Pebblev1 (Block
     120              :         // FileProperties).
     121              :         // Deprecated.
     122              :         _ // FormatMinTableFormatPebblev1
     123              : 
     124              :         // FormatPrePebblev1Marked is a format major version that guarantees that all
     125              :         // sstables with a table format version pre-Pebblev1 (i.e. those that are
     126              :         // guaranteed to not contain block properties) are marked for compaction in
     127              :         // the manifest. Ratcheting to FormatPrePebblev1Marked will block (without
     128              :         // holding mutexes) until the scan of the LSM is complete and the manifest has
     129              :         // been rotated.
     130              :         // Deprecated.
     131              :         _ // FormatPrePebblev1Marked
     132              : 
     133              :         // 23.1 versions.
     134              : 
     135              :         // formatUnusedPrePebblev1MarkedCompacted is an unused format major version.
     136              :         // This format major version was originally intended to ship in the 23.1
     137              :         // release. It was later decided that this should be deferred until a
     138              :         // subsequent release. The original ordering is preserved so as not to
     139              :         // introduce breaking changes in Cockroach.
     140              :         _ // formatUnusedPrePebblev1MarkedCompacted
     141              : 
     142              :         // FormatSSTableValueBlocks is a format major version that adds support for
     143              :         // storing values in value blocks in the sstable. Value block support is not
     144              :         // necessarily enabled when writing sstables, when running with this format
     145              :         // major version.
     146              :         _ // FormatSSTableValueBlocks
     147              : 
     148              :         // FormatFlushableIngest is a format major version that enables lazy
     149              :         // addition of ingested sstables into the LSM structure. When an ingest
     150              :         // overlaps with a memtable, a record of the ingest is written to the WAL
     151              :         // without waiting for a flush. Subsequent reads treat the ingested files as
     152              :         // a level above the overlapping memtable. Once the memtable is flushed, the
     153              :         // ingested files are moved into the lowest possible levels.
     154              :         //
     155              :         // This feature is behind a format major version because it required
     156              :         // breaking changes to the WAL format.
     157              :         FormatFlushableIngest
     158              : 
     159              :         // 23.2 versions.
     160              : 
     161              :         // FormatPrePebblev1MarkedCompacted is a format major version that guarantees
     162              :         // that all sstables explicitly marked for compaction in the manifest (see
     163              :         // FormatPrePebblev1Marked) have been compacted. Ratcheting to this format
     164              :         // version will block (without holding mutexes) until all necessary
     165              :         // compactions for files marked for compaction are complete.
     166              :         FormatPrePebblev1MarkedCompacted
     167              : 
     168              :         // FormatDeleteSizedAndObsolete is a format major version that adds support
     169              :         // for deletion tombstones that encode the size of the value they're
     170              :         // expected to delete. This format major version is required before the
     171              :         // associated key kind may be committed through batch applications or
     172              :         // ingests. It also adds support for keys that are marked obsolete (see
     173              :         // sstable/format.go for details).
     174              :         FormatDeleteSizedAndObsolete
     175              : 
     176              :         // FormatVirtualSSTables is a format major version that adds support for
     177              :         // virtual sstables that can reference a sub-range of keys in an underlying
     178              :         // physical sstable. This information is persisted through new,
     179              :         // backward-incompatible fields in the Manifest, and therefore requires
     180              :         // a format major version.
     181              :         FormatVirtualSSTables
     182              : 
     183              :         // FormatSyntheticPrefixSuffix is a format major version that adds support for
     184              :         // sstables to have their content exposed in a different prefix or suffix of
     185              :         // keyspace than the actual prefix/suffix persisted in the keys in such
     186              :         // sstables. The prefix and suffix replacement information is stored in new
     187              :         // fields in the Manifest and thus requires a format major version.
     188              :         FormatSyntheticPrefixSuffix
     189              : 
     190              :         // FormatFlushableIngestExcises is a format major version that adds support for
     191              :         // having excises unconditionally being written as flushable ingestions. This
     192              :         // is implemented through adding a new key kind that can go in the same batches
     193              :         // as flushable ingested sstables.
     194              :         FormatFlushableIngestExcises
     195              : 
     196              :         // FormatColumnarBlocks is a format major version enabling use of the
     197              :         // TableFormatPebblev5 table format, that encodes sstable data blocks, index
     198              :         // blocks and keyspan blocks by organizing the KVs into columns within the
     199              :         // block.
     200              :         FormatColumnarBlocks
     201              : 
     202              :         // FormatWALSyncChunks is a format major version enabling the writing of
     203              :         // WAL sync chunks. These new chunks are used to disambiguate between corruption
     204              :         // and logical EOF during WAL replay. This is implemented by adding a new
     205              :         // chunk wire format that encodes an additional "Synced Offset" field which acts
     206              :         // as a commitment that the WAL should have been synced up until the offset.
     207              :         FormatWALSyncChunks
     208              : 
     209              :         // FormatTableFormatV6 is a format major version enabling the sstable table
     210              :         // format TableFormatPebblev6.
     211              :         //
     212              :         // The TableFormatPebblev6 sstable format introduces a checksum within the
     213              :         // sstable footer, allows inclusion of blob handle references within the
     214              :         // value column of a sstable block, and supports columnar meta index +
     215              :         // properties blocks.
     216              :         //
     217              :         // This format major version does not yet enable use of value separation.
     218              :         FormatTableFormatV6
     219              : 
     220              :         // formatDeprecatedExperimentalValueSeparation was used to enable an
     221              :         // experimental version of value separation, separating values into external
     222              :         // blob files that do not participate in every compaction.
     223              :         //
     224              :         // Value separation now depends on TableFormatPebblev7 which this format
     225              :         // major version precedes. This format major version is deprecated and
     226              :         // unexported, and value separation now requires FormatValueSeparation.
     227              :         formatDeprecatedExperimentalValueSeparation
     228              : 
     229              :         // formatFooterAttributes is a format major version that adds support for
     230              :         // writing sstable.Attributes in the footer of sstables.
     231              :         formatFooterAttributes
     232              : 
     233              :         // FormatValueSeparation is a format major version that adds support for
     234              :         // value separation, separating values into external blob files that do not
     235              :         // participate in every compaction.
     236              :         FormatValueSeparation
     237              : 
     238              :         // FormatExciseBoundsRecord is a format major version that adds support for
     239              :         // persisting excise bounds records in the manifest (VersionEdit).
     240              :         FormatExciseBoundsRecord
     241              : 
     242              :         // FormatV2BlobFiles is a format major version that adds support for V2 blob
     243              :         // file format (which adds compression statistics).
     244              :         FormatV2BlobFiles
     245              : 
     246              :         // FormatBackingValueSize is a format major version that adds support for
     247              :         // persisting the value size of the backing sst for virtual sstables in the
     248              :         // manifest (VersionEdit).
     249              :         FormatBackingValueSize
     250              : 
     251              :         // FormatMarkForCompactionInVersionEdit is a format major version that adds
     252              :         // marking tables for compaction (via tagTableMarkedForCompaction).
     253              :         // Previously, marking for compaction required a manifest rotation.
     254              :         FormatMarkForCompactionInVersionEdit
     255              : 
     256              :         // -- Add new versions here --
     257              : 
     258              :         // FormatNewest is the most recent format major version.
     259              :         FormatNewest FormatMajorVersion = iota - 1
     260              : 
     261              :         // Experimental versions, which are excluded by FormatNewest (but can be used
     262              :         // in tests) can be defined here.
     263              : 
     264              :         // -- Add experimental versions here --
     265              : 
     266              :         // internalFormatNewest is the most recent, possibly experimental format major
     267              :         // version.
     268              :         internalFormatNewest FormatMajorVersion = iota - 2
     269              : )
     270              : 
     271              : // FormatMinSupported is the minimum format version that is supported by this
     272              : // Pebble version.
     273              : const FormatMinSupported = FormatFlushableIngest
     274              : 
     275              : // FormatMinForSharedObjects it the minimum format version that supports shared
     276              : // objects (see CreateOnShared option).
     277              : const FormatMinForSharedObjects = FormatVirtualSSTables
     278              : 
     279              : // resolveDefault asserts that the given version is supported, and returns the
     280              : // given version, replacing FormatDefault with FormatMinSupported.
     281            1 : func (v FormatMajorVersion) resolveDefault() FormatMajorVersion {
     282            1 :         if v == FormatDefault {
     283            1 :                 return FormatMinSupported
     284            1 :         }
     285            1 :         if v < FormatMinSupported || v > internalFormatNewest {
     286            1 :                 panic(fmt.Sprintf("pebble: unsupported format major version: %s", v))
     287              :         }
     288            1 :         return v
     289              : }
     290              : 
     291              : // MaxTableFormat returns the maximum sstable.TableFormat that can be used at
     292              : // this FormatMajorVersion.
     293            1 : func (v FormatMajorVersion) MaxTableFormat() sstable.TableFormat {
     294            1 :         v = v.resolveDefault()
     295            1 :         switch {
     296            1 :         case v >= formatFooterAttributes:
     297            1 :                 return sstable.TableFormatPebblev7
     298            1 :         case v >= FormatTableFormatV6:
     299            1 :                 return sstable.TableFormatPebblev6
     300            1 :         case v >= FormatColumnarBlocks:
     301            1 :                 return sstable.TableFormatPebblev5
     302            1 :         case v >= FormatDeleteSizedAndObsolete:
     303            1 :                 return sstable.TableFormatPebblev4
     304            1 :         default:
     305            1 :                 return sstable.TableFormatPebblev3
     306              :         }
     307              : }
     308              : 
     309              : // MinTableFormat returns the minimum sstable.TableFormat that can be used at
     310              : // this FormatMajorVersion.
     311            1 : func (v FormatMajorVersion) MinTableFormat() sstable.TableFormat {
     312            1 :         _ = v.resolveDefault()
     313            1 :         return sstable.TableFormatPebblev1
     314            1 : }
     315              : 
     316              : // MaxBlobFileFormat returns the maximum blob.FileFormat that can be used at
     317              : // this FormatMajorVersion. It can only be used on versions that support value
     318              : // separation.
     319            1 : func (v FormatMajorVersion) MaxBlobFileFormat() blob.FileFormat {
     320            1 :         v = v.resolveDefault()
     321            1 :         switch {
     322            1 :         case v >= FormatV2BlobFiles:
     323            1 :                 return blob.FileFormatV2
     324            1 :         case v >= FormatValueSeparation:
     325            1 :                 return blob.FileFormatV1
     326            1 :         default:
     327            1 :                 panic(fmt.Sprintf("pebble: format major version %s does not support blob files", v))
     328              :         }
     329              : }
     330              : 
     331              : // formatMajorVersionMigrations defines the migrations from one format
     332              : // major version to the next. Each migration is defined as a closure
     333              : // which will be invoked on the database before the new format major
     334              : // version is committed. Migrations must be idempotent. Migrations are
     335              : // invoked with d.mu locked.
     336              : //
     337              : // Each migration is responsible for invoking finalizeFormatVersUpgrade
     338              : // to set the new format major version.  RatchetFormatMajorVersion will
     339              : // panic if a migration returns a nil error but fails to finalize the
     340              : // new format major version.
     341              : var formatMajorVersionMigrations = map[FormatMajorVersion]func(*DB) error{
     342            0 :         FormatFlushableIngest: func(d *DB) error { return nil },
     343            1 :         FormatPrePebblev1MarkedCompacted: func(d *DB) error {
     344            1 :                 // Before finalizing the format major version, rewrite any sstables
     345            1 :                 // still marked for compaction. Note all format major versions
     346            1 :                 // migrations are invoked with DB.mu locked.
     347            1 :                 if err := d.compactMarkedFilesLocked(); err != nil {
     348            0 :                         return err
     349            0 :                 }
     350            1 :                 return d.finalizeFormatVersUpgrade(FormatPrePebblev1MarkedCompacted)
     351              :         },
     352            1 :         FormatDeleteSizedAndObsolete: func(d *DB) error {
     353            1 :                 return d.finalizeFormatVersUpgrade(FormatDeleteSizedAndObsolete)
     354            1 :         },
     355            1 :         FormatVirtualSSTables: func(d *DB) error {
     356            1 :                 return d.finalizeFormatVersUpgrade(FormatVirtualSSTables)
     357            1 :         },
     358            1 :         FormatSyntheticPrefixSuffix: func(d *DB) error {
     359            1 :                 return d.finalizeFormatVersUpgrade(FormatSyntheticPrefixSuffix)
     360            1 :         },
     361            1 :         FormatFlushableIngestExcises: func(d *DB) error {
     362            1 :                 return d.finalizeFormatVersUpgrade(FormatFlushableIngestExcises)
     363            1 :         },
     364            1 :         FormatColumnarBlocks: func(d *DB) error {
     365            1 :                 return d.finalizeFormatVersUpgrade(FormatColumnarBlocks)
     366            1 :         },
     367            1 :         FormatWALSyncChunks: func(d *DB) error {
     368            1 :                 return d.finalizeFormatVersUpgrade(FormatWALSyncChunks)
     369            1 :         },
     370            1 :         FormatTableFormatV6: func(d *DB) error {
     371            1 :                 return d.finalizeFormatVersUpgrade(FormatTableFormatV6)
     372            1 :         },
     373            1 :         formatDeprecatedExperimentalValueSeparation: func(d *DB) error {
     374            1 :                 return d.finalizeFormatVersUpgrade(formatDeprecatedExperimentalValueSeparation)
     375            1 :         },
     376            1 :         formatFooterAttributes: func(d *DB) error {
     377            1 :                 return d.finalizeFormatVersUpgrade(formatFooterAttributes)
     378            1 :         },
     379            1 :         FormatValueSeparation: func(d *DB) error {
     380            1 :                 return d.finalizeFormatVersUpgrade(FormatValueSeparation)
     381            1 :         },
     382            1 :         FormatExciseBoundsRecord: func(d *DB) error {
     383            1 :                 return d.finalizeFormatVersUpgrade(FormatExciseBoundsRecord)
     384            1 :         },
     385            1 :         FormatV2BlobFiles: func(d *DB) error {
     386            1 :                 return d.finalizeFormatVersUpgrade(FormatV2BlobFiles)
     387            1 :         },
     388            1 :         FormatBackingValueSize: func(d *DB) error {
     389            1 :                 return d.finalizeFormatVersUpgrade(FormatBackingValueSize)
     390            1 :         },
     391            1 :         FormatMarkForCompactionInVersionEdit: func(d *DB) error {
     392            1 :                 return d.finalizeFormatVersUpgrade(FormatMarkForCompactionInVersionEdit)
     393            1 :         },
     394              : }
     395              : 
     396              : const formatVersionMarkerName = `format-version`
     397              : 
     398              : // lookupFormatMajorVersion retrieves the format version from the format version
     399              : // marker file.
     400              : //
     401              : // If such a file does not exist, returns FormatDefault. Note that this case is
     402              : // only acceptable if we are creating a new store (we no longer support
     403              : // FormatMostCompatible which is the only one with no version marker file).
     404              : func lookupFormatMajorVersion(
     405              :         fs vfs.FS, dirname string, ls []string,
     406            1 : ) (FormatMajorVersion, *atomicfs.Marker, error) {
     407            1 :         m, versString, err := atomicfs.LocateMarkerInListing(fs, dirname, formatVersionMarkerName, ls)
     408            1 :         if err != nil {
     409            1 :                 return 0, nil, err
     410            1 :         }
     411            1 :         if versString == "" {
     412            1 :                 return FormatDefault, m, nil
     413            1 :         }
     414            1 :         v, err := strconv.ParseUint(versString, 10, 64)
     415            1 :         if err != nil {
     416            0 :                 return 0, nil, errors.Wrap(err, "parsing format major version")
     417            0 :         }
     418            1 :         vers := FormatMajorVersion(v)
     419            1 :         if vers == FormatDefault {
     420            0 :                 return 0, nil, errors.Newf("pebble: default format major version should not persisted", vers)
     421            0 :         }
     422            1 :         if vers > internalFormatNewest {
     423            1 :                 return 0, nil, errors.Newf("pebble: database %q written in unknown format major version %d", dirname, vers)
     424            1 :         }
     425            1 :         if vers < FormatMinSupported {
     426            0 :                 return 0, nil, errors.Newf("pebble: database %q written in format major version %d which is no longer supported", dirname, vers)
     427            0 :         }
     428            1 :         return vers, m, nil
     429              : }
     430              : 
     431              : // FormatMajorVersion returns the database's active format major
     432              : // version. The format major version may be higher than the one
     433              : // provided in Options when the database was opened if the existing
     434              : // database was written with a higher format version.
     435            1 : func (d *DB) FormatMajorVersion() FormatMajorVersion {
     436            1 :         return FormatMajorVersion(d.mu.formatVers.vers.Load())
     437            1 : }
     438              : 
     439              : // TableFormat returns the TableFormat that the database is currently using when
     440              : // writing sstables. The table format is determined by the database's format
     441              : // major version, as well as experimental settings like EnableValueBlocks and
     442              : // EnableColumnarBlocks.
     443            1 : func (d *DB) TableFormat() sstable.TableFormat {
     444            1 :         // The table is typically written at the maximum allowable format implied by
     445            1 :         // the current format major version of the DB.
     446            1 :         f := d.FormatMajorVersion().MaxTableFormat()
     447            1 :         if f == sstable.TableFormatPebblev3 {
     448            1 :                 // In format major versions with maximum table formats of Pebblev3,
     449            1 :                 // value blocks were conditional on an experimental setting. In format
     450            1 :                 // major versions with maximum table formats of Pebblev4 and higher,
     451            1 :                 // value blocks are always enabled.
     452            1 :                 if d.opts.Experimental.EnableValueBlocks == nil || !d.opts.Experimental.EnableValueBlocks() {
     453            1 :                         f = sstable.TableFormatPebblev2
     454            1 :                 }
     455              :         }
     456            1 :         return f
     457              : }
     458              : 
     459              : // BlobFileFormat returns the blob.FileFormat that the database is currently
     460              : // using when writing blob files.
     461            1 : func (d *DB) BlobFileFormat() blob.FileFormat {
     462            1 :         return d.FormatMajorVersion().MaxBlobFileFormat()
     463            1 : }
     464              : 
     465              : // shouldCreateShared returns true if the database should use shared objects
     466              : // when creating new objects on the given level.
     467            1 : func (d *DB) shouldCreateShared(targetLevel int) bool {
     468            1 :         return remote.ShouldCreateShared(d.opts.Experimental.CreateOnShared, targetLevel) &&
     469            1 :                 d.FormatMajorVersion() >= FormatMinForSharedObjects
     470            1 : }
     471              : 
     472              : // RatchetFormatMajorVersion ratchets the opened database's format major
     473              : // version to the provided version. It errors if the provided format
     474              : // major version is below the database's current version. Once a
     475              : // database's format major version is upgraded, previous Pebble versions
     476              : // that do not know of the format version will be unable to open the
     477              : // database.
     478            1 : func (d *DB) RatchetFormatMajorVersion(fmv FormatMajorVersion) error {
     479            1 :         if err := d.closed.Load(); err != nil {
     480            1 :                 panic(err)
     481              :         }
     482              : 
     483            1 :         d.mu.Lock()
     484            1 :         defer d.mu.Unlock()
     485            1 :         return d.ratchetFormatMajorVersionLocked(fmv)
     486              : }
     487              : 
     488            1 : func (d *DB) ratchetFormatMajorVersionLocked(formatVers FormatMajorVersion) error {
     489            1 :         if d.opts.ReadOnly {
     490            0 :                 return ErrReadOnly
     491            0 :         }
     492            1 :         if formatVers > internalFormatNewest {
     493            0 :                 // Guard against accidentally forgetting to update internalFormatNewest.
     494            0 :                 return errors.Errorf("pebble: unknown format version %d", formatVers)
     495            0 :         }
     496            1 :         if currentVers := d.FormatMajorVersion(); currentVers > formatVers {
     497            0 :                 return errors.Newf("pebble: database already at format major version %d; cannot reduce to %d",
     498            0 :                         currentVers, formatVers)
     499            0 :         }
     500            1 :         if d.mu.formatVers.ratcheting {
     501            0 :                 return errors.Newf("pebble: database format major version upgrade is in-progress")
     502            0 :         }
     503            1 :         d.mu.formatVers.ratcheting = true
     504            1 :         defer func() { d.mu.formatVers.ratcheting = false }()
     505              : 
     506            1 :         for nextVers := d.FormatMajorVersion() + 1; nextVers <= formatVers; nextVers++ {
     507            1 :                 if err := formatMajorVersionMigrations[nextVers](d); err != nil {
     508            0 :                         return errors.Wrapf(err, "migrating to version %d", nextVers)
     509            0 :                 }
     510              : 
     511              :                 // NB: The migration is responsible for calling
     512              :                 // finalizeFormatVersUpgrade to finalize the upgrade. This
     513              :                 // structure is necessary because some migrations may need to
     514              :                 // update in-memory state (without ever dropping locks) after
     515              :                 // the upgrade is finalized. Here we assert that the upgrade
     516              :                 // did occur.
     517            1 :                 if d.FormatMajorVersion() != nextVers {
     518            0 :                         d.opts.Logger.Fatalf("pebble: successful migration to format version %d never finalized the upgrade", nextVers)
     519            0 :                 }
     520              :         }
     521            1 :         return nil
     522              : }
     523              : 
     524              : // finalizeFormatVersUpgrade is typically only be called from within a
     525              : // format major version migration.
     526              : //
     527              : // See formatMajorVersionMigrations.
     528            1 : func (d *DB) finalizeFormatVersUpgrade(formatVers FormatMajorVersion) error {
     529            1 :         if err := d.writeFormatVersionMarker(formatVers); err != nil {
     530            0 :                 return err
     531            0 :         }
     532            1 :         d.mu.formatVers.vers.Store(uint64(formatVers))
     533            1 :         d.opts.EventListener.FormatUpgrade(formatVers)
     534            1 :         return nil
     535              : }
     536              : 
     537            1 : func (d *DB) writeFormatVersionMarker(formatVers FormatMajorVersion) error {
     538            1 :         // We use the marker to encode the active format version in the
     539            1 :         // marker filename. Unlike other uses of the atomic marker, there is
     540            1 :         // no file with the filename `formatVers.String()` on the
     541            1 :         // filesystem.
     542            1 :         return d.mu.formatVers.marker.Move(formatVers.String())
     543            1 : }
     544              : 
     545              : // compactMarkedFilesLocked performs a migration that schedules rewrite
     546              : // compactions to compact away any sstables marked for compaction.
     547              : // compactMarkedFilesLocked is run while ratcheting the database's format major
     548              : // version to FormatSplitUserKeysMarkedCompacted.
     549              : //
     550              : // Note that while this method is called with the DB.mu held, and will not
     551              : // return until all marked files have been compacted, the mutex is dropped while
     552              : // waiting for compactions to complete (or for slots to free up).
     553            1 : func (d *DB) compactMarkedFilesLocked() error {
     554            1 :         curr := d.mu.versions.currentVersion()
     555            1 :         if curr.MarkedForCompaction.Count() == 0 {
     556            1 :                 return nil
     557            1 :         }
     558              :         // Attempt to schedule a compaction to rewrite a file marked for compaction.
     559              :         // We simply call maybeScheduleCompaction since it also picks rewrite
     560              :         // compactions. Note that we don't need to call this repeatedly in the for
     561              :         // loop below since the completion of a compaction either starts a new one
     562              :         // or ensures a compaction is queued for scheduling. By calling
     563              :         // maybeScheduleCompaction here we are simply kicking off this behavior.
     564            0 :         d.maybeScheduleCompaction()
     565            0 : 
     566            0 :         // The above attempt might succeed and schedule a rewrite compaction. Or
     567            0 :         // there might not be available compaction concurrency to schedule the
     568            0 :         // compaction.  Or compaction of the file might have already been in
     569            0 :         // progress. In any scenario, wait until there's some change in the
     570            0 :         // state of active compactions.
     571            0 :         for curr.MarkedForCompaction.Count() > 0 {
     572            0 :                 // Before waiting, check that the database hasn't been closed. Trying to
     573            0 :                 // schedule the compaction may have dropped d.mu while waiting for a
     574            0 :                 // manifest write to complete. In that dropped interim, the database may
     575            0 :                 // have been closed.
     576            0 :                 if err := d.closed.Load(); err != nil {
     577            0 :                         return err.(error)
     578            0 :                 }
     579              : 
     580              :                 // Some flush or compaction may have scheduled or completed while we waited
     581              :                 // for the manifest lock in maybeScheduleCompactionPicker. Get the latest
     582              :                 // Version before waiting on a compaction.
     583            0 :                 curr = d.mu.versions.currentVersion()
     584            0 : 
     585            0 :                 // Only wait on compactions if there are files still marked for compaction.
     586            0 :                 // NB: Waiting on this condition variable drops d.mu while blocked.
     587            0 :                 if curr.MarkedForCompaction.Count() > 0 {
     588            0 :                         // NB: we cannot assert that d.mu.compact.compactingCount > 0, since
     589            0 :                         // with a CompactionScheduler a DB may not have even one ongoing
     590            0 :                         // compaction (if other competing activities are being preferred by the
     591            0 :                         // scheduler).
     592            0 :                         d.mu.compact.cond.Wait()
     593            0 :                         // Refresh the current version again.
     594            0 :                         curr = d.mu.versions.currentVersion()
     595            0 :                 }
     596              :         }
     597            0 :         return nil
     598              : }
     599              : 
     600              : // findFilesFunc scans the LSM for files, returning true if at least one
     601              : // file was found. The returned array contains the matched files, if any, per
     602              : // level.
     603              : type findFilesFunc func(v *manifest.Version) (found bool, files [numLevels][]*manifest.TableMetadata, _ error)
     604              : 
     605              : // markFilesForCompactionLocked durably marks the files that match the given findFilesFunc for
     606              : // compaction.
     607            1 : func (d *DB) markFilesForCompactionLocked(findFn findFilesFunc) error {
     608            1 :         if d.FormatMajorVersion() < FormatMarkForCompactionInVersionEdit {
     609            0 :                 return errors.Newf("pebble: marking files for compaction requires format major version %d or higher", FormatMarkForCompactionInVersionEdit)
     610            0 :         }
     611            1 :         jobID := d.newJobIDLocked()
     612            1 : 
     613            1 :         // Acquire a read state to have a view of the LSM and a guarantee that none
     614            1 :         // of the referenced files will be deleted until we've unreferenced the read
     615            1 :         // state. Some findFilesFuncs may read the files, requiring they not be
     616            1 :         // deleted.
     617            1 :         rs := d.loadReadState()
     618            1 :         var (
     619            1 :                 found bool
     620            1 :                 files [numLevels][]*manifest.TableMetadata
     621            1 :                 err   error
     622            1 :         )
     623            1 :         func() {
     624            1 :                 defer rs.unrefLocked()
     625            1 :                 // Note the unusual locking: unlock, defer Lock(). The scan of the files in
     626            1 :                 // the version does not need to block other operations that require the
     627            1 :                 // DB.mu. Drop it for the scan, before re-acquiring it.
     628            1 :                 d.mu.Unlock()
     629            1 :                 defer d.mu.Lock()
     630            1 :                 found, files, err = findFn(rs.current)
     631            1 :         }()
     632            1 :         if err != nil {
     633            0 :                 return err
     634            0 :         }
     635              : 
     636              :         // The database lock has been acquired again by the defer within the above
     637              :         // anonymous function.
     638            1 :         if !found {
     639            0 :                 // Nothing to do.
     640            0 :                 return nil
     641            0 :         }
     642              : 
     643              :         // After scanning, if we found files to mark, we fetch the current state of
     644              :         // the LSM (which may have changed) and build the list of tables to mark for
     645              :         // compaction.
     646              : 
     647              :         // Lock the manifest for a coherent view of the LSM. The database lock has
     648              :         // been re-acquired by the defer within the above anonymous function.
     649            1 :         _, err = d.mu.versions.UpdateVersionLocked(func() (versionUpdate, error) {
     650            1 :                 var ve manifest.VersionEdit
     651            1 :                 vers := d.mu.versions.currentVersion()
     652            1 :                 for level, filesToMark := range files {
     653            1 :                         for _, f := range filesToMark {
     654            1 :                                 // Ignore files to be marked that have already been compacted or marked.
     655            1 :                                 if f.CompactionState == manifest.CompactionStateCompacted ||
     656            1 :                                         vers.MarkedForCompaction.Contains(f, level) {
     657            0 :                                         continue
     658              :                                 }
     659              :                                 // Else, mark the file for compaction in this version.
     660            1 :                                 ve.TablesMarkedForCompaction = append(ve.TablesMarkedForCompaction, manifest.TableMarkedForCompactionEntry{
     661            1 :                                         TableNum: f.TableNum,
     662            1 :                                         Level:    level,
     663            1 :                                         Meta:     f,
     664            1 :                                 })
     665              :                         }
     666              :                 }
     667            1 :                 return versionUpdate{
     668            1 :                         VE:                      &ve,
     669            1 :                         JobID:                   jobID,
     670            1 :                         InProgressCompactionsFn: func() []compactionInfo { return d.getInProgressCompactionInfoLocked(nil) },
     671              :                 }, nil
     672              :         })
     673            1 :         return err
     674              : }
        

Generated by: LCOV version 2.0-1