LCOV - code coverage report
Current view: top level - pebble/objstorage/objstorageprovider - readahead.go (source / functions) Hit Total Coverage
Test: 2024-10-14 08:16Z 8079611f - meta test only.lcov Lines: 120 123 97.6 %
Date: 2024-10-14 08:17:37 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package objstorageprovider
       6             : 
       7             : import "github.com/cockroachdb/pebble/internal/invariants"
       8             : 
       9             : const (
      10             :         // Constants for dynamic readahead of data blocks. Note that the size values
      11             :         // make sense as some multiple of the default block size; and they should
      12             :         // both be larger than the default block size.
      13             :         minFileReadsForReadahead = 2
      14             :         // TODO(bilal): Have the initial size value be a factor of the block size,
      15             :         // as opposed to a hardcoded value.
      16             :         initialReadaheadSize = 64 << 10 /* 64KB */
      17             : )
      18             : 
      19             : // readaheadState contains state variables related to readahead. Updated on
      20             : // file reads.
      21             : type readaheadState struct {
      22             :         // Number of sequential reads.
      23             :         numReads         int64
      24             :         maxReadaheadSize int64
      25             :         // Size issued to the next call to Prefetch. Starts at or above
      26             :         // initialReadaheadSize and grows exponentially until maxReadaheadSize.
      27             :         size int64
      28             :         // prevSize is the size used in the last Prefetch call.
      29             :         prevSize int64
      30             :         // The byte offset up to which the OS has been asked to read ahead / cached.
      31             :         // When reading ahead, reads up to this limit should not incur an IO
      32             :         // operation. Reads after this limit can benefit from a new call to
      33             :         // Prefetch.
      34             :         limit int64
      35             : }
      36             : 
      37           1 : func makeReadaheadState(maxReadaheadSize int64) readaheadState {
      38           1 :         return readaheadState{
      39           1 :                 size:             initialReadaheadSize,
      40           1 :                 maxReadaheadSize: maxReadaheadSize,
      41           1 :         }
      42           1 : }
      43             : 
      44           1 : func (rs *readaheadState) recordCacheHit(offset, blockLength int64) {
      45           1 :         _ = rs.maybeReadaheadOrCacheHit(offset, blockLength, false)
      46           1 : }
      47             : 
      48             : // maybeReadahead updates state and determines whether to issue a readahead /
      49             : // prefetch call for a block read at offset for blockLength bytes.
      50             : // Returns a size value (greater than 0) that should be prefetched if readahead
      51             : // would be beneficial.
      52           1 : func (rs *readaheadState) maybeReadahead(offset, blockLength int64) int64 {
      53           1 :         return rs.maybeReadaheadOrCacheHit(offset, blockLength, true)
      54           1 : }
      55             : 
      56             : // The return value should be ignored if !readahead.
      57             : func (rs *readaheadState) maybeReadaheadOrCacheHit(
      58             :         offset, blockLength int64, readahead bool,
      59           1 : ) int64 {
      60           1 :         if invariants.Enabled && rs.maxReadaheadSize == 0 {
      61           0 :                 panic("readaheadState not initialized")
      62             :         }
      63           1 :         currentReadEnd := offset + blockLength
      64           1 :         if rs.numReads >= minFileReadsForReadahead {
      65           1 :                 // The minimum threshold of sequential reads to justify reading ahead
      66           1 :                 // has been reached.
      67           1 :                 // There are two intervals: the interval being read:
      68           1 :                 // [offset, currentReadEnd]
      69           1 :                 // as well as the interval where a read would benefit from read ahead:
      70           1 :                 // [rs.limit, rs.limit + rs.size]
      71           1 :                 // We increase the latter interval to
      72           1 :                 // [rs.limit, rs.limit + rs.maxReadaheadSize] to account for cases where
      73           1 :                 // readahead may not be beneficial with a small readahead size, but over
      74           1 :                 // time the readahead size would increase exponentially to make it
      75           1 :                 // beneficial.
      76           1 :                 if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize {
      77           1 :                         // We are doing a read in the interval ahead of
      78           1 :                         // the last readahead range. In the diagrams below, ++++ is the last
      79           1 :                         // readahead range, ==== is the range represented by
      80           1 :                         // [rs.limit, rs.limit + rs.maxReadaheadSize], and ---- is the range
      81           1 :                         // being read.
      82           1 :                         //
      83           1 :                         //               rs.limit           rs.limit + rs.maxReadaheadSize
      84           1 :                         //         ++++++++++|===========================|
      85           1 :                         //
      86           1 :                         //              |-------------|
      87           1 :                         //            offset       currentReadEnd
      88           1 :                         //
      89           1 :                         // This case is also possible, as are all cases with an overlap
      90           1 :                         // between [rs.limit, rs.limit + rs.maxReadaheadSize] and [offset,
      91           1 :                         // currentReadEnd]:
      92           1 :                         //
      93           1 :                         //               rs.limit           rs.limit + rs.maxReadaheadSize
      94           1 :                         //         ++++++++++|===========================|
      95           1 :                         //
      96           1 :                         //                                            |-------------|
      97           1 :                         //                                         offset       currentReadEnd
      98           1 :                         //
      99           1 :                         //
     100           1 :                         rs.numReads++
     101           1 :                         if readahead {
     102           1 :                                 rs.limit = offset + rs.size
     103           1 :                                 rs.prevSize = rs.size
     104           1 :                                 // Increase rs.size for the next read.
     105           1 :                                 rs.size *= 2
     106           1 :                                 if rs.size > rs.maxReadaheadSize {
     107           0 :                                         rs.size = rs.maxReadaheadSize
     108           0 :                                 }
     109           1 :                         } else {
     110           1 :                                 // This is a read that would have resulted in a readahead, had it
     111           1 :                                 // not been a cache hit.
     112           1 :                                 rs.limit = currentReadEnd
     113           1 :                         }
     114           1 :                         return rs.prevSize
     115             :                 }
     116           1 :                 if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize {
     117           1 :                         // We read too far away from rs.limit to benefit from readahead in
     118           1 :                         // any scenario. Reset all variables.
     119           1 :                         // The case where we read too far ahead:
     120           1 :                         //
     121           1 :                         // (rs.limit - rs.prevSize)    (rs.limit)   (rs.limit + rs.maxReadaheadSize)
     122           1 :                         //                    |+++++++++++++|=============|
     123           1 :                         //
     124           1 :                         //                                                  |-------------|
     125           1 :                         //                                             offset       currentReadEnd
     126           1 :                         //
     127           1 :                         // Or too far behind:
     128           1 :                         //
     129           1 :                         // (rs.limit - rs.prevSize)    (rs.limit)   (rs.limit + rs.maxReadaheadSize)
     130           1 :                         //                    |+++++++++++++|=============|
     131           1 :                         //
     132           1 :                         //    |-------------|
     133           1 :                         // offset       currentReadEnd
     134           1 :                         //
     135           1 :                         rs.numReads = 1
     136           1 :                         rs.limit = currentReadEnd
     137           1 :                         rs.size = initialReadaheadSize
     138           1 :                         rs.prevSize = 0
     139           1 : 
     140           1 :                         return 0
     141           1 :                 }
     142             :                 // The previous if-block predicates were all false. This mechanically implies:
     143             :                 //
     144             :                 // INVARIANT:
     145             :                 //  !(currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize) &&
     146             :                 //  !(currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize)
     147             :                 // Which mechanically simplifies to:
     148             :                 //  currentReadEnd < rs.limit  && currentReadEnd >= rs.limit-rs.prevSize &&
     149             :                 //  offset <= rs.limit+rs.maxReadaheadSize
     150             :                 //
     151             :                 // So reads in the range [rs.limit - rs.prevSize, rs.limit] end up
     152             :                 // here. This is a read that is potentially benefitting from a past
     153             :                 // readahead, but there's no reason to issue a readahead call at the
     154             :                 // moment.
     155             :                 //
     156             :                 // (rs.limit - rs.prevSize)            (rs.limit + rs.maxReadaheadSize)
     157             :                 //                    |+++++++++++++|===============|
     158             :                 //                             (rs.limit)
     159             :                 //
     160             :                 //                        |-------|
     161             :                 //                     offset    currentReadEnd
     162             :                 //
     163           1 :                 rs.numReads++
     164           1 :                 return 0
     165             :         }
     166             :         // Not yet at the numReads threshold to justify readahead. But we want to
     167             :         // capture whether readahead will be beneficial in the future.
     168           1 :         if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize {
     169           1 :                 // Blocks are being read sequentially and would benefit from readahead
     170           1 :                 // down the line.
     171           1 :                 //
     172           1 :                 //                       (rs.limit)   (rs.limit + rs.maxReadaheadSize)
     173           1 :                 //                         |=============|
     174           1 :                 //
     175           1 :                 //                    |-------|
     176           1 :                 //                offset    currentReadEnd
     177           1 :                 //
     178           1 :                 rs.numReads++
     179           1 :                 // It is possible to fall here when rs.limit has not been initialized. If
     180           1 :                 // we don't initialize, rs.limit, it is possible that the first read
     181           1 :                 // offset was at rs.limit+rs.maxReadaheadSize-delta and the enclosing
     182           1 :                 // if-block predicate was true, and the next read is sequential but has
     183           1 :                 // offset > rs.limit+rs.maxReadaheadSize (if we left rs.limit at 0), and
     184           1 :                 // the enclosing if-block predicate will be false and we will incorrectly
     185           1 :                 // think that readahead is not beneficial. The same issue arises if
     186           1 :                 // rs.limit has been initialized and currentReadEnd is advancing past
     187           1 :                 // rs.limit.
     188           1 :                 rs.limit = currentReadEnd
     189           1 :                 return 0
     190           1 :         }
     191             :         // We read too far ahead of the last read, or before it. This indicates
     192             :         // a random read, where readahead is not desirable. Reset all variables.
     193             :         //
     194             :         // (rs.limit - rs.maxReadaheadSize)  (rs.limit)   (rs.limit + rs.maxReadaheadSize)
     195             :         //                     |+++++++++++++|=============|
     196             :         //
     197             :         //                                                    |-------|
     198             :         //                                                offset    currentReadEnd
     199             :         //
     200           1 :         rs.numReads = 1
     201           1 :         rs.limit = currentReadEnd
     202           1 :         rs.size = initialReadaheadSize
     203           1 :         rs.prevSize = 0
     204           1 :         return 0
     205             : }

Generated by: LCOV version 1.14