LCOV - code coverage report
Current view: top level - pebble - format_major_version.go (source / functions) Hit Total Coverage
Test: 2023-10-31 08:18Z a9906157 - tests only.lcov Lines: 259 311 83.3 %
Date: 2023-10-31 08:19:06 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package pebble
       6             : 
       7             : import (
       8             :         "fmt"
       9             :         "strconv"
      10             : 
      11             :         "github.com/cockroachdb/errors"
      12             :         "github.com/cockroachdb/pebble/internal/base"
      13             :         "github.com/cockroachdb/pebble/internal/manifest"
      14             :         "github.com/cockroachdb/pebble/sstable"
      15             :         "github.com/cockroachdb/pebble/vfs"
      16             :         "github.com/cockroachdb/pebble/vfs/atomicfs"
      17             : )
      18             : 
      19             : // FormatMajorVersion is a constant controlling the format of persisted
      20             : // data. Backwards incompatible changes to durable formats are gated
      21             : // behind new format major versions.
      22             : //
      23             : // At any point, a database's format major version may be bumped.
      24             : // However, once a database's format major version is increased,
      25             : // previous versions of Pebble will refuse to open the database.
      26             : //
      27             : // The zero value format is the FormatDefault constant. The exact
      28             : // FormatVersion that the default corresponds to may change with time.
      29             : type FormatMajorVersion uint64
      30             : 
      31             : // SafeValue implements redact.SafeValue.
      32           0 : func (v FormatMajorVersion) SafeValue() {}
      33             : 
      34             : // String implements fmt.Stringer.
      35           1 : func (v FormatMajorVersion) String() string {
      36           1 :         // NB: This must not change. It's used as the value for the on-disk
      37           1 :         // version marker file.
      38           1 :         //
      39           1 :         // Specifically, this value must always parse as a base 10 integer
      40           1 :         // that fits in a uint64. We format it as zero-padded, 3-digit
      41           1 :         // number today, but the padding may change.
      42           1 :         return fmt.Sprintf("%03d", v)
      43           1 : }
      44             : 
      45             : const (
      46             :         // 21.2 versions.
      47             : 
      48             :         // FormatDefault leaves the format version unspecified. The
      49             :         // FormatDefault constant may be ratcheted upwards over time.
      50             :         FormatDefault FormatMajorVersion = iota
      51             :         // FormatMostCompatible maintains the most backwards compatibility,
      52             :         // maintaining bi-directional compatibility with RocksDB 6.2.1 in
      53             :         // the particular configuration described in the Pebble README.
      54             :         FormatMostCompatible
      55             :         // formatVersionedManifestMarker is the first
      56             :         // backwards-incompatible change made to Pebble, introducing the
      57             :         // format-version marker file for handling backwards-incompatible
      58             :         // changes more broadly, and replacing the `CURRENT` file with a
      59             :         // marker file.
      60             :         //
      61             :         // This format version is intended as an intermediary version state.
      62             :         // It is deliberately unexported to discourage direct use of this
      63             :         // format major version.  Clients should use FormatVersioned which
      64             :         // also ensures earlier versions of Pebble fail to open a database
      65             :         // written in a future format major version.
      66             :         formatVersionedManifestMarker
      67             :         // FormatVersioned is a new format major version that replaces the
      68             :         // old `CURRENT` file with a new 'marker' file scheme.  Previous
      69             :         // Pebble versions will be unable to open the database unless
      70             :         // they're aware of format versions.
      71             :         FormatVersioned
      72             :         // FormatSetWithDelete is a format major version that introduces a new key
      73             :         // kind, base.InternalKeyKindSetWithDelete. Previous Pebble versions will be
      74             :         // unable to open this database.
      75             :         FormatSetWithDelete
      76             : 
      77             :         // 22.1 versions.
      78             : 
      79             :         // FormatBlockPropertyCollector is a format major version that introduces
      80             :         // BlockPropertyCollectors.
      81             :         FormatBlockPropertyCollector
      82             :         // FormatSplitUserKeysMarked is a format major version that guarantees that
      83             :         // all files that share user keys with neighbors are marked for compaction
      84             :         // in the manifest. Ratcheting to FormatSplitUserKeysMarked will block
      85             :         // (without holding mutexes) until the scan of the LSM is complete and the
      86             :         // manifest has been rotated.
      87             :         FormatSplitUserKeysMarked
      88             : 
      89             :         // 22.2 versions.
      90             : 
      91             :         // FormatSplitUserKeysMarkedCompacted is a format major version that
      92             :         // guarantees that all files explicitly marked for compaction in the manifest
      93             :         // have been compacted. Combined with the FormatSplitUserKeysMarked format
      94             :         // major version, this version guarantees that there are no user keys split
      95             :         // across multiple files within a level L1+. Ratcheting to this format version
      96             :         // will block (without holding mutexes) until all necessary compactions for
      97             :         // files marked for compaction are complete.
      98             :         FormatSplitUserKeysMarkedCompacted
      99             :         // FormatRangeKeys is a format major version that introduces range keys.
     100             :         FormatRangeKeys
     101             :         // FormatMinTableFormatPebblev1 is a format major version that guarantees that
     102             :         // tables created by or ingested into the DB at or above this format major
     103             :         // version will have a table format version of at least Pebblev1 (Block
     104             :         // Properties).
     105             :         FormatMinTableFormatPebblev1
     106             :         // FormatPrePebblev1Marked is a format major version that guarantees that all
     107             :         // sstables with a table format version pre-Pebblev1 (i.e. those that are
     108             :         // guaranteed to not contain block properties) are marked for compaction in
     109             :         // the manifest. Ratcheting to FormatPrePebblev1Marked will block (without
     110             :         // holding mutexes) until the scan of the LSM is complete and the manifest has
     111             :         // been rotated.
     112             :         FormatPrePebblev1Marked
     113             : 
     114             :         // 23.1 versions.
     115             : 
     116             :         // FormatUnusedPrePebblev1MarkedCompacted is an unused format major version.
     117             :         // This format major version was originally intended to ship in the 23.1
     118             :         // release. It was later decided that this should be deferred until a
     119             :         // subsequent release. The original ordering is preserved so as not to
     120             :         // introduce breaking changes in Cockroach.
     121             :         FormatUnusedPrePebblev1MarkedCompacted
     122             : 
     123             :         // FormatSSTableValueBlocks is a format major version that adds support for
     124             :         // storing values in value blocks in the sstable. Value block support is not
     125             :         // necessarily enabled when writing sstables, when running with this format
     126             :         // major version.
     127             :         //
     128             :         // WARNING: In development, so no production code should upgrade to this
     129             :         // format, since a DB with this format major version will not actually
     130             :         // interoperate correctly with another DB with the same format major
     131             :         // version. This format major version is introduced so that tests can start
     132             :         // being executed up to this version. Note that these tests succeed despite
     133             :         // the incomplete support since they do not enable value blocks and use
     134             :         // TableFormatPebblev2.
     135             :         FormatSSTableValueBlocks
     136             : 
     137             :         // FormatFlushableIngest is a format major version that enables lazy
     138             :         // addition of ingested sstables into the LSM structure. When an ingest
     139             :         // overlaps with a memtable, a record of the ingest is written to the WAL
     140             :         // without waiting for a flush. Subsequent reads treat the ingested files as
     141             :         // a level above the overlapping memtable. Once the memtable is flushed, the
     142             :         // ingested files are moved into the lowest possible levels.
     143             :         //
     144             :         // This feature is behind a format major version because it required
     145             :         // breaking changes to the WAL format.
     146             :         FormatFlushableIngest
     147             : 
     148             :         // 23.2 versions.
     149             : 
     150             :         // FormatPrePebblev1MarkedCompacted is a format major version that guarantees
     151             :         // that all sstables explicitly marked for compaction in the manifest (see
     152             :         // FormatPrePebblev1Marked) have been compacted. Ratcheting to this format
     153             :         // version will block (without holding mutexes) until all necessary
     154             :         // compactions for files marked for compaction are complete.
     155             :         FormatPrePebblev1MarkedCompacted
     156             : 
     157             :         // FormatDeleteSizedAndObsolete is a format major version that adds support
     158             :         // for deletion tombstones that encode the size of the value they're
     159             :         // expected to delete. This format major version is required before the
     160             :         // associated key kind may be committed through batch applications or
     161             :         // ingests. It also adds support for keys that are marked obsolete (see
     162             :         // sstable/format.go for details).
     163             :         FormatDeleteSizedAndObsolete
     164             : 
     165             :         // FormatVirtualSSTables is a format major version that adds support for
     166             :         // virtual sstables that can reference a sub-range of keys in an underlying
     167             :         // physical sstable. This information is persisted through new,
     168             :         // backward-incompatible fields in the Manifest, and therefore requires
     169             :         // a format major version.
     170             :         FormatVirtualSSTables
     171             : 
     172             :         // internalFormatNewest holds the newest format major version, including
     173             :         // experimental ones excluded from the exported FormatNewest constant until
     174             :         // they've stabilized. Used in tests.
     175             :         internalFormatNewest FormatMajorVersion = iota - 1
     176             : 
     177             :         // FormatNewest always contains the most recent format major version.
     178             :         FormatNewest FormatMajorVersion = internalFormatNewest
     179             : )
     180             : 
     181             : // MaxTableFormat returns the maximum sstable.TableFormat that can be used at
     182             : // this FormatMajorVersion.
     183           1 : func (v FormatMajorVersion) MaxTableFormat() sstable.TableFormat {
     184           1 :         switch v {
     185             :         case FormatDefault, FormatMostCompatible, formatVersionedManifestMarker,
     186           1 :                 FormatVersioned, FormatSetWithDelete:
     187           1 :                 return sstable.TableFormatRocksDBv2
     188             :         case FormatBlockPropertyCollector, FormatSplitUserKeysMarked,
     189           1 :                 FormatSplitUserKeysMarkedCompacted:
     190           1 :                 return sstable.TableFormatPebblev1
     191             :         case FormatRangeKeys, FormatMinTableFormatPebblev1, FormatPrePebblev1Marked,
     192           1 :                 FormatUnusedPrePebblev1MarkedCompacted:
     193           1 :                 return sstable.TableFormatPebblev2
     194           1 :         case FormatSSTableValueBlocks, FormatFlushableIngest, FormatPrePebblev1MarkedCompacted:
     195           1 :                 return sstable.TableFormatPebblev3
     196           1 :         case FormatDeleteSizedAndObsolete, FormatVirtualSSTables:
     197           1 :                 return sstable.TableFormatPebblev4
     198           1 :         default:
     199           1 :                 panic(fmt.Sprintf("pebble: unsupported format major version: %s", v))
     200             :         }
     201             : }
     202             : 
     203             : // MinTableFormat returns the minimum sstable.TableFormat that can be used at
     204             : // this FormatMajorVersion.
     205           1 : func (v FormatMajorVersion) MinTableFormat() sstable.TableFormat {
     206           1 :         switch v {
     207             :         case FormatDefault, FormatMostCompatible, formatVersionedManifestMarker,
     208             :                 FormatVersioned, FormatSetWithDelete, FormatBlockPropertyCollector,
     209             :                 FormatSplitUserKeysMarked, FormatSplitUserKeysMarkedCompacted,
     210           1 :                 FormatRangeKeys:
     211           1 :                 return sstable.TableFormatLevelDB
     212             :         case FormatMinTableFormatPebblev1, FormatPrePebblev1Marked,
     213             :                 FormatUnusedPrePebblev1MarkedCompacted, FormatSSTableValueBlocks,
     214             :                 FormatFlushableIngest, FormatPrePebblev1MarkedCompacted,
     215           1 :                 FormatDeleteSizedAndObsolete, FormatVirtualSSTables:
     216           1 :                 return sstable.TableFormatPebblev1
     217           1 :         default:
     218           1 :                 panic(fmt.Sprintf("pebble: unsupported format major version: %s", v))
     219             :         }
     220             : }
     221             : 
     222             : // formatMajorVersionMigrations defines the migrations from one format
     223             : // major version to the next. Each migration is defined as a closure
     224             : // which will be invoked on the database before the new format major
     225             : // version is committed. Migrations must be idempotent. Migrations are
     226             : // invoked with d.mu locked.
     227             : //
     228             : // Each migration is responsible for invoking finalizeFormatVersUpgrade
     229             : // to set the new format major version.  RatchetFormatMajorVersion will
     230             : // panic if a migration returns a nil error but fails to finalize the
     231             : // new format major version.
     232             : var formatMajorVersionMigrations = map[FormatMajorVersion]func(*DB) error{
     233           0 :         FormatMostCompatible: func(d *DB) error { return nil },
     234           1 :         formatVersionedManifestMarker: func(d *DB) error {
     235           1 :                 // formatVersionedManifestMarker introduces the use of a marker
     236           1 :                 // file for pointing to the current MANIFEST file.
     237           1 : 
     238           1 :                 // Lock the manifest.
     239           1 :                 d.mu.versions.logLock()
     240           1 :                 defer d.mu.versions.logUnlock()
     241           1 : 
     242           1 :                 // Construct the filename of the currently active manifest and
     243           1 :                 // move the manifest marker to that filename. The marker is
     244           1 :                 // guaranteed to exist, because we unconditionally locate it
     245           1 :                 // during Open.
     246           1 :                 manifestFileNum := d.mu.versions.manifestFileNum
     247           1 :                 filename := base.MakeFilename(fileTypeManifest, manifestFileNum)
     248           1 :                 if err := d.mu.versions.manifestMarker.Move(filename); err != nil {
     249           0 :                         return errors.Wrap(err, "moving manifest marker")
     250           0 :                 }
     251             : 
     252             :                 // Now that we have a manifest marker file in place and pointing
     253             :                 // to the current MANIFEST, finalize the upgrade. If we fail for
     254             :                 // some reason, a retry of this migration is guaranteed to again
     255             :                 // move the manifest marker file to the latest manifest. If
     256             :                 // we're unable to finalize the upgrade, a subsequent call to
     257             :                 // Open will ignore the manifest marker.
     258           1 :                 if err := d.finalizeFormatVersUpgrade(formatVersionedManifestMarker); err != nil {
     259           0 :                         return err
     260           0 :                 }
     261             : 
     262             :                 // We've finalized the upgrade. All subsequent Open calls will
     263             :                 // ignore the CURRENT file and instead read the manifest marker.
     264             :                 // Before we unlock the manifest, we need to update versionSet
     265             :                 // to use the manifest marker on future rotations.
     266           1 :                 d.mu.versions.setCurrent = setCurrentFuncMarker(
     267           1 :                         d.mu.versions.manifestMarker,
     268           1 :                         d.mu.versions.fs,
     269           1 :                         d.mu.versions.dirname)
     270           1 :                 return nil
     271             :         },
     272             :         // The FormatVersioned version is split into two, each with their
     273             :         // own migration to ensure the post-migration cleanup happens even
     274             :         // if there's a crash immediately after finalizing the version. Once
     275             :         // a new format major version is finalized, its migration will never
     276             :         // run again. Post-migration cleanup like the one in the migration
     277             :         // below must be performed in a separate migration or every time the
     278             :         // database opens.
     279           1 :         FormatVersioned: func(d *DB) error {
     280           1 :                 // Replace the `CURRENT` file with one that points to the
     281           1 :                 // nonexistent `MANIFEST-000000` file. If an earlier Pebble
     282           1 :                 // version that does not know about format major versions
     283           1 :                 // attempts to open the database, it will error avoiding
     284           1 :                 // accidental corruption.
     285           1 :                 if err := setCurrentFile(d.mu.versions.dirname, d.mu.versions.fs, base.FileNum(0).DiskFileNum()); err != nil {
     286           0 :                         return err
     287           0 :                 }
     288           1 :                 return d.finalizeFormatVersUpgrade(FormatVersioned)
     289             :         },
     290             :         // As SetWithDelete is a new key kind, there is nothing to migrate. We can
     291             :         // simply finalize the format version and we're done.
     292           1 :         FormatSetWithDelete: func(d *DB) error {
     293           1 :                 return d.finalizeFormatVersUpgrade(FormatSetWithDelete)
     294           1 :         },
     295           1 :         FormatBlockPropertyCollector: func(d *DB) error {
     296           1 :                 return d.finalizeFormatVersUpgrade(FormatBlockPropertyCollector)
     297           1 :         },
     298           1 :         FormatSplitUserKeysMarked: func(d *DB) error {
     299           1 :                 // Mark any unmarked files with split-user keys. Note all format major
     300           1 :                 // versions migrations are invoked with DB.mu locked.
     301           1 :                 if err := d.markFilesLocked(markFilesWithSplitUserKeys(d.opts.Comparer.Equal)); err != nil {
     302           0 :                         return err
     303           0 :                 }
     304           1 :                 return d.finalizeFormatVersUpgrade(FormatSplitUserKeysMarked)
     305             :         },
     306           1 :         FormatSplitUserKeysMarkedCompacted: func(d *DB) error {
     307           1 :                 // Before finalizing the format major version, rewrite any sstables
     308           1 :                 // still marked for compaction. Note all format major versions
     309           1 :                 // migrations are invoked with DB.mu locked.
     310           1 :                 if err := d.compactMarkedFilesLocked(); err != nil {
     311           0 :                         return err
     312           0 :                 }
     313           1 :                 return d.finalizeFormatVersUpgrade(FormatSplitUserKeysMarkedCompacted)
     314             :         },
     315           1 :         FormatRangeKeys: func(d *DB) error {
     316           1 :                 return d.finalizeFormatVersUpgrade(FormatRangeKeys)
     317           1 :         },
     318           1 :         FormatMinTableFormatPebblev1: func(d *DB) error {
     319           1 :                 return d.finalizeFormatVersUpgrade(FormatMinTableFormatPebblev1)
     320           1 :         },
     321           1 :         FormatPrePebblev1Marked: func(d *DB) error {
     322           1 :                 // Mark any unmarked files that contain only table properties. Note all
     323           1 :                 // format major versions migrations are invoked with DB.mu locked.
     324           1 :                 if err := d.markFilesLocked(markFilesPrePebblev1(d.tableCache)); err != nil {
     325           0 :                         return err
     326           0 :                 }
     327           1 :                 return d.finalizeFormatVersUpgrade(FormatPrePebblev1Marked)
     328             :         },
     329           1 :         FormatUnusedPrePebblev1MarkedCompacted: func(d *DB) error {
     330           1 :                 // Intentional no-op.
     331           1 :                 return d.finalizeFormatVersUpgrade(FormatUnusedPrePebblev1MarkedCompacted)
     332           1 :         },
     333           1 :         FormatSSTableValueBlocks: func(d *DB) error {
     334           1 :                 return d.finalizeFormatVersUpgrade(FormatSSTableValueBlocks)
     335           1 :         },
     336           1 :         FormatFlushableIngest: func(d *DB) error {
     337           1 :                 return d.finalizeFormatVersUpgrade(FormatFlushableIngest)
     338           1 :         },
     339           1 :         FormatPrePebblev1MarkedCompacted: func(d *DB) error {
     340           1 :                 // Before finalizing the format major version, rewrite any sstables
     341           1 :                 // still marked for compaction. Note all format major versions
     342           1 :                 // migrations are invoked with DB.mu locked.
     343           1 :                 if err := d.compactMarkedFilesLocked(); err != nil {
     344           0 :                         return err
     345           0 :                 }
     346           1 :                 return d.finalizeFormatVersUpgrade(FormatPrePebblev1MarkedCompacted)
     347             :         },
     348           1 :         FormatDeleteSizedAndObsolete: func(d *DB) error {
     349           1 :                 return d.finalizeFormatVersUpgrade(FormatDeleteSizedAndObsolete)
     350           1 :         },
     351           1 :         FormatVirtualSSTables: func(d *DB) error {
     352           1 :                 return d.finalizeFormatVersUpgrade(FormatVirtualSSTables)
     353           1 :         },
     354             : }
     355             : 
     356             : const formatVersionMarkerName = `format-version`
     357             : 
     358             : func lookupFormatMajorVersion(
     359             :         fs vfs.FS, dirname string,
     360           1 : ) (FormatMajorVersion, *atomicfs.Marker, error) {
     361           1 :         m, versString, err := atomicfs.LocateMarker(fs, dirname, formatVersionMarkerName)
     362           1 :         if err != nil {
     363           1 :                 return 0, nil, err
     364           1 :         }
     365           1 :         if versString == "" {
     366           1 :                 return FormatMostCompatible, m, nil
     367           1 :         }
     368           1 :         v, err := strconv.ParseUint(versString, 10, 64)
     369           1 :         if err != nil {
     370           0 :                 return 0, nil, errors.Wrap(err, "parsing format major version")
     371           0 :         }
     372           1 :         vers := FormatMajorVersion(v)
     373           1 :         if vers == FormatDefault {
     374           0 :                 return 0, nil, errors.Newf("pebble: default format major version should not persisted", vers)
     375           0 :         }
     376           1 :         if vers > internalFormatNewest {
     377           1 :                 return 0, nil, errors.Newf("pebble: database %q written in format major version %d", dirname, vers)
     378           1 :         }
     379           1 :         return vers, m, nil
     380             : }
     381             : 
     382             : // FormatMajorVersion returns the database's active format major
     383             : // version. The format major version may be higher than the one
     384             : // provided in Options when the database was opened if the existing
     385             : // database was written with a higher format version.
     386           1 : func (d *DB) FormatMajorVersion() FormatMajorVersion {
     387           1 :         return FormatMajorVersion(d.mu.formatVers.vers.Load())
     388           1 : }
     389             : 
     390             : // RatchetFormatMajorVersion ratchets the opened database's format major
     391             : // version to the provided version. It errors if the provided format
     392             : // major version is below the database's current version. Once a
     393             : // database's format major version is upgraded, previous Pebble versions
     394             : // that do not know of the format version will be unable to open the
     395             : // database.
     396           1 : func (d *DB) RatchetFormatMajorVersion(fmv FormatMajorVersion) error {
     397           1 :         if err := d.closed.Load(); err != nil {
     398           1 :                 panic(err)
     399             :         }
     400             : 
     401           1 :         d.mu.Lock()
     402           1 :         defer d.mu.Unlock()
     403           1 :         return d.ratchetFormatMajorVersionLocked(fmv)
     404             : }
     405             : 
     406           1 : func (d *DB) ratchetFormatMajorVersionLocked(formatVers FormatMajorVersion) error {
     407           1 :         if d.opts.ReadOnly {
     408           0 :                 return ErrReadOnly
     409           0 :         }
     410           1 :         if formatVers > internalFormatNewest {
     411           0 :                 // Guard against accidentally forgetting to update internalFormatNewest.
     412           0 :                 return errors.Errorf("pebble: unknown format version %d", formatVers)
     413           0 :         }
     414           1 :         if currentVers := d.FormatMajorVersion(); currentVers > formatVers {
     415           0 :                 return errors.Newf("pebble: database already at format major version %d; cannot reduce to %d",
     416           0 :                         currentVers, formatVers)
     417           0 :         }
     418           1 :         if d.mu.formatVers.ratcheting {
     419           0 :                 return errors.Newf("pebble: database format major version upgrade is in-progress")
     420           0 :         }
     421           1 :         d.mu.formatVers.ratcheting = true
     422           1 :         defer func() { d.mu.formatVers.ratcheting = false }()
     423             : 
     424           1 :         for nextVers := d.FormatMajorVersion() + 1; nextVers <= formatVers; nextVers++ {
     425           1 :                 if err := formatMajorVersionMigrations[nextVers](d); err != nil {
     426           0 :                         return errors.Wrapf(err, "migrating to version %d", nextVers)
     427           0 :                 }
     428             : 
     429             :                 // NB: The migration is responsible for calling
     430             :                 // finalizeFormatVersUpgrade to finalize the upgrade. This
     431             :                 // structure is necessary because some migrations may need to
     432             :                 // update in-memory state (without ever dropping locks) after
     433             :                 // the upgrade is finalized. Here we assert that the upgrade
     434             :                 // did occur.
     435           1 :                 if d.FormatMajorVersion() != nextVers {
     436           0 :                         d.opts.Logger.Fatalf("pebble: successful migration to format version %d never finalized the upgrade", nextVers)
     437           0 :                 }
     438             :         }
     439           1 :         return nil
     440             : }
     441             : 
     442             : // finalizeFormatVersUpgrade is typically only be called from within a
     443             : // format major version migration.
     444             : //
     445             : // See formatMajorVersionMigrations.
     446           1 : func (d *DB) finalizeFormatVersUpgrade(formatVers FormatMajorVersion) error {
     447           1 :         // We use the marker to encode the active format version in the
     448           1 :         // marker filename. Unlike other uses of the atomic marker, there is
     449           1 :         // no file with the filename `formatVers.String()` on the
     450           1 :         // filesystem.
     451           1 :         if err := d.mu.formatVers.marker.Move(formatVers.String()); err != nil {
     452           0 :                 return err
     453           0 :         }
     454           1 :         d.mu.formatVers.vers.Store(uint64(formatVers))
     455           1 :         d.opts.EventListener.FormatUpgrade(formatVers)
     456           1 :         return nil
     457             : }
     458             : 
     459             : // compactMarkedFilesLocked performs a migration that schedules rewrite
     460             : // compactions to compact away any sstables marked for compaction.
     461             : // compactMarkedFilesLocked is run while ratcheting the database's format major
     462             : // version to FormatSplitUserKeysMarkedCompacted.
     463             : //
     464             : // Note that while this method is called with the DB.mu held, and will not
     465             : // return until all marked files have been compacted, the mutex is dropped while
     466             : // waiting for compactions to complete (or for slots to free up).
     467           1 : func (d *DB) compactMarkedFilesLocked() error {
     468           1 :         curr := d.mu.versions.currentVersion()
     469           1 :         for curr.Stats.MarkedForCompaction > 0 {
     470           1 :                 // Attempt to schedule a compaction to rewrite a file marked for
     471           1 :                 // compaction.
     472           1 :                 d.maybeScheduleCompactionPicker(func(picker compactionPicker, env compactionEnv) *pickedCompaction {
     473           1 :                         return picker.pickRewriteCompaction(env)
     474           1 :                 })
     475             : 
     476             :                 // The above attempt might succeed and schedule a rewrite compaction. Or
     477             :                 // there might not be available compaction concurrency to schedule the
     478             :                 // compaction.  Or compaction of the file might have already been in
     479             :                 // progress. In any scenario, wait until there's some change in the
     480             :                 // state of active compactions.
     481             : 
     482             :                 // Before waiting, check that the database hasn't been closed. Trying to
     483             :                 // schedule the compaction may have dropped d.mu while waiting for a
     484             :                 // manifest write to complete. In that dropped interim, the database may
     485             :                 // have been closed.
     486           1 :                 if err := d.closed.Load(); err != nil {
     487           0 :                         return err.(error)
     488           0 :                 }
     489             : 
     490             :                 // Some flush or compaction may have scheduled or completed while we waited
     491             :                 // for the manifest lock in maybeScheduleCompactionPicker. Get the latest
     492             :                 // Version before waiting on a compaction.
     493           1 :                 curr = d.mu.versions.currentVersion()
     494           1 : 
     495           1 :                 // Only wait on compactions if there are files still marked for compaction.
     496           1 :                 // NB: Waiting on this condition variable drops d.mu while blocked.
     497           1 :                 if curr.Stats.MarkedForCompaction > 0 {
     498           1 :                         if d.mu.compact.compactingCount == 0 {
     499           0 :                                 panic("expected a compaction of marked files in progress")
     500             :                         }
     501           1 :                         d.mu.compact.cond.Wait()
     502           1 :                         // Refresh the current version again.
     503           1 :                         curr = d.mu.versions.currentVersion()
     504             :                 }
     505             :         }
     506           1 :         return nil
     507             : }
     508             : 
     509             : // findFilesFunc scans the LSM for files, returning true if at least one
     510             : // file was found. The returned array contains the matched files, if any, per
     511             : // level.
     512             : type findFilesFunc func(v *version) (found bool, files [numLevels][]*fileMetadata, _ error)
     513             : 
     514             : // markFilesWithSplitUserKeys scans the LSM's levels 1 through 6 for adjacent
     515             : // files that contain the same user key. Such arrangements of files were
     516             : // permitted in RocksDB and in Pebble up to SHA a860bbad.
     517           1 : var markFilesWithSplitUserKeys = func(equal Equal) findFilesFunc {
     518           1 :         return func(v *version) (found bool, files [numLevels][]*fileMetadata, _ error) {
     519           1 :                 // Files with split user keys are expected to be rare and performing key
     520           1 :                 // comparisons for every file within the LSM is expensive, so drop the
     521           1 :                 // database lock while scanning the file metadata.
     522           1 :                 for l := numLevels - 1; l > 0; l-- {
     523           1 :                         iter := v.Levels[l].Iter()
     524           1 :                         var prevFile *fileMetadata
     525           1 :                         var prevUserKey []byte
     526           1 :                         for f := iter.First(); f != nil; f = iter.Next() {
     527           1 :                                 if prevUserKey != nil && equal(prevUserKey, f.Smallest.UserKey) {
     528           1 :                                         // NB: We may append a file twice, once as prevFile and once
     529           1 :                                         // as f. That's okay, and handled below.
     530           1 :                                         files[l] = append(files[l], prevFile, f)
     531           1 :                                         found = true
     532           1 :                                 }
     533           1 :                                 if f.Largest.IsExclusiveSentinel() {
     534           0 :                                         prevUserKey = nil
     535           0 :                                         prevFile = nil
     536           1 :                                 } else {
     537           1 :                                         prevUserKey = f.Largest.UserKey
     538           1 :                                         prevFile = f
     539           1 :                                 }
     540             :                         }
     541             :                 }
     542           1 :                 return
     543             :         }
     544             : }
     545             : 
     546             : // markFilesPrePebblev1 scans the LSM for files that do not support block
     547             : // properties (i.e. a table format version pre-Pebblev1).
     548           1 : var markFilesPrePebblev1 = func(tc *tableCacheContainer) findFilesFunc {
     549           1 :         return func(v *version) (found bool, files [numLevels][]*fileMetadata, err error) {
     550           1 :                 for l := numLevels - 1; l > 0; l-- {
     551           1 :                         iter := v.Levels[l].Iter()
     552           1 :                         for f := iter.First(); f != nil; f = iter.Next() {
     553           1 :                                 if f.Virtual {
     554           0 :                                         // Any physical sstable which has been virtualized must
     555           0 :                                         // have already undergone this migration, and we don't
     556           0 :                                         // need to worry about the virtual sstable themselves.
     557           0 :                                         panic("pebble: unexpected virtual sstable during migration")
     558             :                                 }
     559           1 :                                 err = tc.withReader(
     560           1 :                                         f.PhysicalMeta(), func(r *sstable.Reader) error {
     561           1 :                                                 tf, err := r.TableFormat()
     562           1 :                                                 if err != nil {
     563           0 :                                                         return err
     564           0 :                                                 }
     565           1 :                                                 if tf < sstable.TableFormatPebblev1 {
     566           1 :                                                         found = true
     567           1 :                                                         files[l] = append(files[l], f)
     568           1 :                                                 }
     569           1 :                                                 return nil
     570             :                                         })
     571           1 :                                 if err != nil {
     572           0 :                                         return
     573           0 :                                 }
     574             :                         }
     575             :                 }
     576           1 :                 return
     577             :         }
     578             : }
     579             : 
     580             : // markFilesLock durably marks the files that match the given findFilesFunc for
     581             : // compaction.
     582           1 : func (d *DB) markFilesLocked(findFn findFilesFunc) error {
     583           1 :         jobID := d.mu.nextJobID
     584           1 :         d.mu.nextJobID++
     585           1 : 
     586           1 :         // Acquire a read state to have a view of the LSM and a guarantee that none
     587           1 :         // of the referenced files will be deleted until we've unreferenced the read
     588           1 :         // state. Some findFilesFuncs may read the files, requiring they not be
     589           1 :         // deleted.
     590           1 :         rs := d.loadReadState()
     591           1 :         var (
     592           1 :                 found bool
     593           1 :                 files [numLevels][]*fileMetadata
     594           1 :                 err   error
     595           1 :         )
     596           1 :         func() {
     597           1 :                 defer rs.unrefLocked()
     598           1 :                 // Note the unusual locking: unlock, defer Lock(). The scan of the files in
     599           1 :                 // the version does not need to block other operations that require the
     600           1 :                 // DB.mu. Drop it for the scan, before re-acquiring it.
     601           1 :                 d.mu.Unlock()
     602           1 :                 defer d.mu.Lock()
     603           1 :                 found, files, err = findFn(rs.current)
     604           1 :         }()
     605           1 :         if err != nil {
     606           0 :                 return err
     607           0 :         }
     608             : 
     609             :         // The database lock has been acquired again by the defer within the above
     610             :         // anonymous function.
     611           1 :         if !found {
     612           1 :                 // Nothing to do.
     613           1 :                 return nil
     614           1 :         }
     615             : 
     616             :         // After scanning, if we found files to mark, we fetch the current state of
     617             :         // the LSM (which may have changed) and set MarkedForCompaction on the files,
     618             :         // and update the version's Stats.MarkedForCompaction count, which are both
     619             :         // protected by d.mu.
     620             : 
     621             :         // Lock the manifest for a coherent view of the LSM. The database lock has
     622             :         // been re-acquired by the defer within the above anonymous function.
     623           1 :         d.mu.versions.logLock()
     624           1 :         vers := d.mu.versions.currentVersion()
     625           1 :         for l, filesToMark := range files {
     626           1 :                 if len(filesToMark) == 0 {
     627           1 :                         continue
     628             :                 }
     629           1 :                 for _, f := range filesToMark {
     630           1 :                         // Ignore files to be marked that have already been compacted or marked.
     631           1 :                         if f.CompactionState == manifest.CompactionStateCompacted ||
     632           1 :                                 f.MarkedForCompaction {
     633           0 :                                 continue
     634             :                         }
     635             :                         // Else, mark the file for compaction in this version.
     636           1 :                         vers.Stats.MarkedForCompaction++
     637           1 :                         f.MarkedForCompaction = true
     638             :                 }
     639             :                 // The compaction picker uses the markedForCompactionAnnotator to
     640             :                 // quickly find files marked for compaction, or to quickly determine
     641             :                 // that there are no such files marked for compaction within a level.
     642             :                 // A b-tree node may be annotated with an annotation recording that
     643             :                 // there are no files marked for compaction within the node's subtree,
     644             :                 // based on the assumption that it's static.
     645             :                 //
     646             :                 // Since we're marking files for compaction, these b-tree nodes'
     647             :                 // annotations will be out of date. Clear the compaction-picking
     648             :                 // annotation, so that it's recomputed the next time the compaction
     649             :                 // picker looks for a file marked for compaction.
     650           1 :                 vers.Levels[l].InvalidateAnnotation(markedForCompactionAnnotator{})
     651             :         }
     652             : 
     653             :         // The 'marked-for-compaction' bit is persisted in the MANIFEST file
     654             :         // metadata. We've already modified the in-memory file metadata, but the
     655             :         // manifest hasn't been updated. Force rotation to a new MANIFEST file,
     656             :         // which will write every file metadata to the new manifest file and ensure
     657             :         // that the now marked-for-compaction file metadata are persisted as marked.
     658             :         // NB: This call to logAndApply will unlockthe MANIFEST, which we locked up
     659             :         // above before obtaining `vers`.
     660           1 :         return d.mu.versions.logAndApply(
     661           1 :                 jobID,
     662           1 :                 &manifest.VersionEdit{},
     663           1 :                 map[int]*LevelMetrics{},
     664           1 :                 true, /* forceRotation */
     665           1 :                 func() []compactionInfo { return d.getInProgressCompactionInfoLocked(nil) })
     666             : }

Generated by: LCOV version 1.14