LCOV - code coverage report
Current view: top level - pebble/internal/base - lazy_value.go (source / functions) Hit Total Coverage
Test: 2024-12-22 08:16Z 78d53457 - tests only.lcov Lines: 50 51 98.0 %
Date: 2024-12-22 08:16:51 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package base
       6             : 
       7             : import (
       8             :         "context"
       9             : 
      10             :         "github.com/cockroachdb/pebble/internal/invariants"
      11             : )
      12             : 
      13             : // A value can have user-defined attributes that are a function of the value
      14             : // byte slice. For now, we only support "short attributes", which can be
      15             : // encoded in 3 bits. We will likely extend this to "long attributes" later
      16             : // for values that are even more expensive to access than those in value
      17             : // blocks in the same sstable.
      18             : //
      19             : // When a sstable writer chooses not to store a value together with the key,
      20             : // it can call the ShortAttributeExtractor to extract the attribute and store
      21             : // it together with the key. This allows for cheap retrieval of
      22             : // AttributeAndLen on the read-path, without doing a more expensive retrieval
      23             : // of the value. In general, the extraction code may want to also look at the
      24             : // key to decide how to treat the value, hence the key* parameters.
      25             : //
      26             : // Write path performance: The ShortAttributeExtractor func cannot be inlined,
      27             : // so we will pay the cost of this function call. However, we will only pay
      28             : // this when (a) the value is not being stored together with the key, and (b)
      29             : // the key-value pair is being initially written to the DB, or a compaction is
      30             : // transitioning the key-value pair from being stored together to being stored
      31             : // separately.
      32             : 
      33             : // ShortAttribute encodes a user-specified attribute of the value.
      34             : type ShortAttribute uint8
      35             : 
      36             : // MaxShortAttribute is the maximum value of the short attribute (3 bits).
      37             : const MaxShortAttribute = 7
      38             : 
      39             : // ShortAttributeExtractor is an extractor that given the value, will return
      40             : // the ShortAttribute.
      41             : type ShortAttributeExtractor func(
      42             :         key []byte, keyPrefixLen int, value []byte) (ShortAttribute, error)
      43             : 
      44             : // AttributeAndLen represents the pair of value length and the short
      45             : // attribute.
      46             : type AttributeAndLen struct {
      47             :         ValueLen       int32
      48             :         ShortAttribute ShortAttribute
      49             : }
      50             : 
      51             : // LazyValue represents a value that may not already have been extracted.
      52             : // Currently, it can represent either an in-place value (stored with the key)
      53             : // or a value stored in the value section. However, the interface is general
      54             : // enough to support values that are stored in separate files.
      55             : //
      56             : // LazyValue is used in the InternalIterator interface, such that all
      57             : // positioning calls return (*InternalKey, LazyValue). It is also exposed via
      58             : // the public Iterator for callers that need to remember a recent but not
      59             : // necessarily latest LazyValue, in case they need the actual value in the
      60             : // future. An example is a caller that is iterating in reverse and looking for
      61             : // the latest MVCC version for a key -- it cannot identify the latest MVCC
      62             : // version without stepping to the previous key-value pair e.g.
      63             : // storage.pebbleMVCCScanner in CockroachDB.
      64             : //
      65             : // Performance note: It is important for this struct to not exceed a sizeof 32
      66             : // bytes, for optimizing the common case of the in-place value. Prior to
      67             : // introducing LazyValue, we were passing around a []byte which is 24 bytes.
      68             : // Passing a 40 byte or larger struct causes performance to drop by 75% on
      69             : // some benchmarks that do tight iteration loops.
      70             : //
      71             : // Memory management:
      72             : // This is subtle, but important for performance.
      73             : //
      74             : // A LazyValue returned by an InternalIterator or Iterator is unstable in that
      75             : // repositioning the iterator will invalidate the memory inside it. A caller
      76             : // wishing to maintain that LazyValue needs to call LazyValue.Clone(). Note
      77             : // that this does not fetch the value if it is not in-place. Clone() should
      78             : // ideally not be called if LazyValue.Value() has been called, since the
      79             : // cloned LazyValue will forget the extracted/fetched value, and calling
      80             : // Value() on this clone will cause the value to be extracted again. That is,
      81             : // Clone() does not make any promise about the memory stability of the
      82             : // underlying value.
      83             : //
      84             : // A user of an iterator that calls LazyValue.Value() wants as much as
      85             : // possible for the returned value []byte to point to iterator owned memory.
      86             : //
      87             : //  1. [P1] The underlying iterator that owns that memory also needs a promise
      88             : //     from that user that at any time there is at most one value []byte slice
      89             : //     that the caller is expecting it to maintain. Otherwise, the underlying
      90             : //     iterator has to maintain multiple such []byte slices which results in
      91             : //     more complicated and inefficient code.
      92             : //
      93             : //  2. [P2] The underlying iterator, in order to make the promise that it is
      94             : //     maintaining the one value []byte slice, also needs a way to know when
      95             : //     it is relieved of that promise. One way it is relieved of that promise
      96             : //     is by being told that it is being repositioned. Typically, the owner of
      97             : //     the value []byte slice is a sstable iterator, and it will know that it
      98             : //     is relieved of the promise when it is repositioned. However, consider
      99             : //     the case where the caller has used LazyValue.Clone() and repositioned
     100             : //     the iterator (which is actually a tree of iterators). In this case the
     101             : //     underlying sstable iterator may not even be open. LazyValue.Value()
     102             : //     will still work (at a higher cost), but since the sstable iterator is
     103             : //     not open, it does not have a mechanism to know when the retrieved value
     104             : //     is no longer in use. We refer to this situation as "not satisfying P2".
     105             : //     To handle this situation, the LazyValue.Value() method accepts a caller
     106             : //     owned buffer, that the callee will use if needed. The callee explicitly
     107             : //     tells the caller whether the []byte slice for the value is now owned by
     108             : //     the caller. This will be true if the callee attempted to use buf and
     109             : //     either successfully used it or allocated a new []byte slice.
     110             : //
     111             : // To ground the above in reality, we consider three examples of callers of
     112             : // LazyValue.Value():
     113             : //
     114             : //   - Iterator: it calls LazyValue.Value for its own use when merging values.
     115             : //     When merging during reverse iteration, it may have cloned the LazyValue.
     116             : //     In this case it calls LazyValue.Value() on the cloned value, merges it,
     117             : //     and then calls LazyValue.Value() on the current iterator position and
     118             : //     merges it. So it is honoring P1.
     119             : //
     120             : //   - Iterator on behalf of Iterator clients: The Iterator.Value() method
     121             : //     needs to call LazyValue.Value(). The client of Iterator is satisfying P1
     122             : //     because of the inherent Iterator interface constraint, i.e., it is calling
     123             : //     Iterator.Value() on the current Iterator position. It is possible that
     124             : //     the Iterator has cloned this LazyValue (for the reverse iteration case),
     125             : //     which the client is unaware of, so the underlying sstable iterator may
     126             : //     not be able to satisfy P2. This is ok because Iterator will call
     127             : //     LazyValue.Value with its (reusable) owned buffer.
     128             : //
     129             : //   - CockroachDB's pebbleMVCCScanner: This will use LazyValues from Iterator
     130             : //     since during reverse iteration in order to find the highest version that
     131             : //     satisfies a read it needs to clone the LazyValue, step back the iterator
     132             : //     and then decide whether it needs the value from the previously cloned
     133             : //     LazyValue. The pebbleMVCCScanner will satisfy P1. The P2 story is
     134             : //     similar to the previous case in that it will call LazyValue.Value with
     135             : //     its (reusable) owned buffer.
     136             : //
     137             : // Corollary: callers that directly use InternalIterator can know that they
     138             : // have done nothing to interfere with promise P2 can pass in a nil buf and be
     139             : // sure that it will not trigger an allocation.
     140             : //
     141             : // Repeated calling of LazyValue.Value:
     142             : // This is ok as long as the caller continues to satisfy P1. The previously
     143             : // fetched value will be remembered inside LazyValue to avoid fetching again.
     144             : // So if the caller's buffer is used the first time the value was fetched, it
     145             : // is still in use.
     146             : //
     147             : // LazyValue fields are visible outside the package for use in
     148             : // InternalIterator implementations and in Iterator, but not meant for direct
     149             : // use by users of Pebble.
     150             : type LazyValue struct {
     151             :         // ValueOrHandle represents a value, or a handle to be passed to ValueFetcher.
     152             :         // - Fetcher == nil: ValueOrHandle is a value.
     153             :         // - Fetcher != nil: ValueOrHandle is a handle and Fetcher.Attribute is
     154             :         //   initialized.
     155             :         // The ValueOrHandle exposed by InternalIterator or Iterator may not be stable
     156             :         // if the iterator is stepped. To make it stable, make a copy using Clone.
     157             :         ValueOrHandle []byte
     158             :         // Fetcher provides support for fetching an actually lazy value.
     159             :         Fetcher *LazyFetcher
     160             : }
     161             : 
     162             : // LazyFetcher supports fetching a lazy value.
     163             : //
     164             : // Fetcher and Attribute are to be initialized at creation time. The fields
     165             : // are arranged to reduce the sizeof this struct.
     166             : type LazyFetcher struct {
     167             :         // Fetcher, given a handle, returns the value.
     168             :         Fetcher ValueFetcher
     169             :         err     error
     170             :         value   []byte
     171             :         // Attribute includes the short attribute and value length.
     172             :         Attribute   AttributeAndLen
     173             :         fetched     bool
     174             :         callerOwned bool
     175             : }
     176             : 
     177             : // ValueFetcher is an interface for fetching a value.
     178             : type ValueFetcher interface {
     179             :         // Fetch returns the value, given the handle. It is acceptable to call the
     180             :         // ValueFetcher.Fetch as long as the DB is open. However, one should assume
     181             :         // there is a fast-path when the iterator tree has not moved off the sstable
     182             :         // iterator that initially provided this LazyValue. Hence, to utilize this
     183             :         // fast-path the caller should try to decide whether it needs the value or
     184             :         // not as soon as possible, with minimal possible stepping of the iterator.
     185             :         //
     186             :         // buf will be used if the fetcher cannot satisfy P2 (see earlier comment).
     187             :         // If the fetcher attempted to use buf *and* len(buf) was insufficient, it
     188             :         // will allocate a new slice for the value. In either case it will set
     189             :         // callerOwned to true.
     190             :         Fetch(
     191             :                 ctx context.Context, handle []byte, valLen int32, buf []byte,
     192             :         ) (val []byte, callerOwned bool, err error)
     193             : }
     194             : 
     195             : // Value returns the underlying value.
     196           1 : func (lv *LazyValue) Value(buf []byte) (val []byte, callerOwned bool, err error) {
     197           1 :         if lv.Fetcher == nil {
     198           1 :                 return lv.ValueOrHandle, false, nil
     199           1 :         }
     200             :         // Do the rest of the work in a separate method to attempt mid-stack
     201             :         // inlining of Value(). Unfortunately, this still does not inline since the
     202             :         // cost of 85 exceeds the budget of 80.
     203             :         //
     204             :         // TODO(sumeer): Packing the return values into a struct{[]byte error bool}
     205             :         // causes it to be below the budget. Consider this if we need to recover
     206             :         // more performance. I suspect that inlining this only matters in
     207             :         // micro-benchmarks, and in actual use cases in CockroachDB it will not
     208             :         // matter because there is substantial work done with a fetched value.
     209           1 :         return lv.fetchValue(context.TODO(), buf)
     210             : }
     211             : 
     212             : // INVARIANT: lv.Fetcher != nil
     213             : func (lv *LazyValue) fetchValue(
     214             :         ctx context.Context, buf []byte,
     215           1 : ) (val []byte, callerOwned bool, err error) {
     216           1 :         f := lv.Fetcher
     217           1 :         if !f.fetched {
     218           1 :                 f.fetched = true
     219           1 :                 f.value, f.callerOwned, f.err = f.Fetcher.Fetch(ctx,
     220           1 :                         lv.ValueOrHandle, lv.Fetcher.Attribute.ValueLen, buf)
     221           1 :         }
     222           1 :         return f.value, f.callerOwned, f.err
     223             : }
     224             : 
     225             : // IsInPlaceValue returns true iff the value was stored in-place and does not
     226             : // need to be fetched externally.
     227           1 : func (lv *LazyValue) IsInPlaceValue() bool {
     228           1 :         return lv.Fetcher == nil
     229           1 : }
     230             : 
     231             : // InPlaceValue returns the value under the assumption that it is in-place.
     232             : // This is for Pebble-internal code.
     233           1 : func (lv *LazyValue) InPlaceValue() []byte {
     234           1 :         if invariants.Enabled && lv.Fetcher != nil {
     235           0 :                 panic("value must be in-place")
     236             :         }
     237           1 :         return lv.ValueOrHandle
     238             : }
     239             : 
     240             : // Len returns the length of the value.
     241           1 : func (lv *LazyValue) Len() int {
     242           1 :         if lv.Fetcher == nil {
     243           1 :                 return len(lv.ValueOrHandle)
     244           1 :         }
     245           1 :         return int(lv.Fetcher.Attribute.ValueLen)
     246             : }
     247             : 
     248             : // TryGetShortAttribute returns the ShortAttribute and a bool indicating
     249             : // whether the ShortAttribute was populated.
     250           1 : func (lv *LazyValue) TryGetShortAttribute() (ShortAttribute, bool) {
     251           1 :         if lv.Fetcher == nil {
     252           1 :                 return 0, false
     253           1 :         }
     254           1 :         return lv.Fetcher.Attribute.ShortAttribute, true
     255             : }
     256             : 
     257             : // Clone creates a stable copy of the LazyValue, by appending bytes to buf.
     258             : // The fetcher parameter must be non-nil and may be over-written and used
     259             : // inside the returned LazyValue -- this is needed to avoid an allocation.
     260             : // Most callers have at most K cloned LazyValues, where K is hard-coded, so
     261             : // they can have a pool of exactly K LazyFetcher structs they can reuse in
     262             : // these calls. The alternative of allocating LazyFetchers from a sync.Pool is
     263             : // not viable since we have no code trigger for returning to the pool
     264             : // (LazyValues are simply GC'd).
     265             : //
     266             : // NB: It is highly preferable that LazyValue.Value() has not been called,
     267             : // since the Clone will forget any previously extracted value, and a future
     268             : // call to Value will cause it to be fetched again. We do this since we don't
     269             : // want to reason about whether or not to clone an already extracted value
     270             : // inside the Fetcher (we don't). Property P1 applies here too: if lv1.Value()
     271             : // has been called, and then lv2 is created as a clone of lv1, then calling
     272             : // lv2.Value() can invalidate any backing memory maintained inside the fetcher
     273             : // for lv1 (even though these are the same values). We initially prohibited
     274             : // calling LazyValue.Clone() if LazyValue.Value() has been called, but there
     275             : // is at least one complex caller (pebbleMVCCScanner inside CockroachDB) where
     276             : // it is not easy to prove this invariant.
     277           1 : func (lv *LazyValue) Clone(buf []byte, fetcher *LazyFetcher) (LazyValue, []byte) {
     278           1 :         var lvCopy LazyValue
     279           1 :         if lv.Fetcher != nil {
     280           1 :                 *fetcher = LazyFetcher{
     281           1 :                         Fetcher:   lv.Fetcher.Fetcher,
     282           1 :                         Attribute: lv.Fetcher.Attribute,
     283           1 :                         // Not copying anything that has been extracted.
     284           1 :                 }
     285           1 :                 lvCopy.Fetcher = fetcher
     286           1 :         }
     287           1 :         vLen := len(lv.ValueOrHandle)
     288           1 :         if vLen == 0 {
     289           1 :                 return lvCopy, buf
     290           1 :         }
     291           1 :         bufLen := len(buf)
     292           1 :         buf = append(buf, lv.ValueOrHandle...)
     293           1 :         lvCopy.ValueOrHandle = buf[bufLen : bufLen+vLen]
     294           1 :         return lvCopy, buf
     295             : }
     296             : 
     297             : // MakeInPlaceValue constructs an in-place value.
     298           1 : func MakeInPlaceValue(val []byte) LazyValue {
     299           1 :         return LazyValue{ValueOrHandle: val}
     300           1 : }

Generated by: LCOV version 1.14