Line data Source code
1 : // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 : // of this source code is governed by a BSD-style license that can be found in 3 : // the LICENSE file. 4 : 5 : package objstorageprovider 6 : 7 : import "github.com/cockroachdb/pebble/internal/invariants" 8 : 9 : const ( 10 : // Constants for dynamic readahead of data blocks. Note that the size values 11 : // make sense as some multiple of the default block size; and they should 12 : // both be larger than the default block size. 13 : minFileReadsForReadahead = 2 14 : // TODO(bilal): Have the initial size value be a factor of the block size, 15 : // as opposed to a hardcoded value. 16 : initialReadaheadSize = 64 << 10 /* 64KB */ 17 : ) 18 : 19 : // readaheadState contains state variables related to readahead. Updated on 20 : // file reads. 21 : type readaheadState struct { 22 : // Number of sequential reads. 23 : numReads int64 24 : maxReadaheadSize int64 25 : // Size issued to the next call to Prefetch. Starts at or above 26 : // initialReadaheadSize and grows exponentially until maxReadaheadSize. 27 : size int64 28 : // prevSize is the size used in the last Prefetch call. 29 : prevSize int64 30 : // The byte offset up to which the OS has been asked to read ahead / cached. 31 : // When reading ahead, reads up to this limit should not incur an IO 32 : // operation. Reads after this limit can benefit from a new call to 33 : // Prefetch. 34 : limit int64 35 : } 36 : 37 1 : func makeReadaheadState(maxReadaheadSize int64) readaheadState { 38 1 : return readaheadState{ 39 1 : size: initialReadaheadSize, 40 1 : maxReadaheadSize: maxReadaheadSize, 41 1 : } 42 1 : } 43 : 44 1 : func (rs *readaheadState) recordCacheHit(offset, blockLength int64) { 45 1 : _ = rs.maybeReadaheadOrCacheHit(offset, blockLength, false) 46 1 : } 47 : 48 : // maybeReadahead updates state and determines whether to issue a readahead / 49 : // prefetch call for a block read at offset for blockLength bytes. 50 : // Returns a size value (greater than 0) that should be prefetched if readahead 51 : // would be beneficial. 52 1 : func (rs *readaheadState) maybeReadahead(offset, blockLength int64) int64 { 53 1 : return rs.maybeReadaheadOrCacheHit(offset, blockLength, true) 54 1 : } 55 : 56 : // The return value should be ignored if !readahead. 57 : func (rs *readaheadState) maybeReadaheadOrCacheHit( 58 : offset, blockLength int64, readahead bool, 59 1 : ) int64 { 60 1 : if invariants.Enabled && rs.maxReadaheadSize == 0 { 61 0 : panic("readaheadState not initialized") 62 : } 63 1 : currentReadEnd := offset + blockLength 64 1 : if rs.numReads >= minFileReadsForReadahead { 65 1 : // The minimum threshold of sequential reads to justify reading ahead 66 1 : // has been reached. 67 1 : // There are two intervals: the interval being read: 68 1 : // [offset, currentReadEnd] 69 1 : // as well as the interval where a read would benefit from read ahead: 70 1 : // [rs.limit, rs.limit + rs.size] 71 1 : // We increase the latter interval to 72 1 : // [rs.limit, rs.limit + rs.maxReadaheadSize] to account for cases where 73 1 : // readahead may not be beneficial with a small readahead size, but over 74 1 : // time the readahead size would increase exponentially to make it 75 1 : // beneficial. 76 1 : if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { 77 1 : // We are doing a read in the interval ahead of 78 1 : // the last readahead range. In the diagrams below, ++++ is the last 79 1 : // readahead range, ==== is the range represented by 80 1 : // [rs.limit, rs.limit + rs.maxReadaheadSize], and ---- is the range 81 1 : // being read. 82 1 : // 83 1 : // rs.limit rs.limit + rs.maxReadaheadSize 84 1 : // ++++++++++|===========================| 85 1 : // 86 1 : // |-------------| 87 1 : // offset currentReadEnd 88 1 : // 89 1 : // This case is also possible, as are all cases with an overlap 90 1 : // between [rs.limit, rs.limit + rs.maxReadaheadSize] and [offset, 91 1 : // currentReadEnd]: 92 1 : // 93 1 : // rs.limit rs.limit + rs.maxReadaheadSize 94 1 : // ++++++++++|===========================| 95 1 : // 96 1 : // |-------------| 97 1 : // offset currentReadEnd 98 1 : // 99 1 : // 100 1 : rs.numReads++ 101 1 : if readahead { 102 1 : rs.limit = offset + rs.size 103 1 : rs.prevSize = rs.size 104 1 : // Increase rs.size for the next read. 105 1 : rs.size *= 2 106 1 : if rs.size > rs.maxReadaheadSize { 107 0 : rs.size = rs.maxReadaheadSize 108 0 : } 109 1 : } else { 110 1 : // This is a read that would have resulted in a readahead, had it 111 1 : // not been a cache hit. 112 1 : rs.limit = currentReadEnd 113 1 : } 114 1 : return rs.prevSize 115 : } 116 1 : if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize { 117 1 : // We read too far away from rs.limit to benefit from readahead in 118 1 : // any scenario. Reset all variables. 119 1 : // The case where we read too far ahead: 120 1 : // 121 1 : // (rs.limit - rs.prevSize) (rs.limit) (rs.limit + rs.maxReadaheadSize) 122 1 : // |+++++++++++++|=============| 123 1 : // 124 1 : // |-------------| 125 1 : // offset currentReadEnd 126 1 : // 127 1 : // Or too far behind: 128 1 : // 129 1 : // (rs.limit - rs.prevSize) (rs.limit) (rs.limit + rs.maxReadaheadSize) 130 1 : // |+++++++++++++|=============| 131 1 : // 132 1 : // |-------------| 133 1 : // offset currentReadEnd 134 1 : // 135 1 : rs.numReads = 1 136 1 : rs.limit = currentReadEnd 137 1 : rs.size = initialReadaheadSize 138 1 : rs.prevSize = 0 139 1 : 140 1 : return 0 141 1 : } 142 : // The previous if-block predicates were all false. This mechanically implies: 143 : // 144 : // INVARIANT: 145 : // !(currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize) && 146 : // !(currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize) 147 : // Which mechanically simplifies to: 148 : // currentReadEnd < rs.limit && currentReadEnd >= rs.limit-rs.prevSize && 149 : // offset <= rs.limit+rs.maxReadaheadSize 150 : // 151 : // So reads in the range [rs.limit - rs.prevSize, rs.limit] end up 152 : // here. This is a read that is potentially benefitting from a past 153 : // readahead, but there's no reason to issue a readahead call at the 154 : // moment. 155 : // 156 : // (rs.limit - rs.prevSize) (rs.limit + rs.maxReadaheadSize) 157 : // |+++++++++++++|===============| 158 : // (rs.limit) 159 : // 160 : // |-------| 161 : // offset currentReadEnd 162 : // 163 1 : rs.numReads++ 164 1 : return 0 165 : } 166 : // Not yet at the numReads threshold to justify readahead. But we want to 167 : // capture whether readahead will be beneficial in the future. 168 1 : if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { 169 1 : // Blocks are being read sequentially and would benefit from readahead 170 1 : // down the line. 171 1 : // 172 1 : // (rs.limit) (rs.limit + rs.maxReadaheadSize) 173 1 : // |=============| 174 1 : // 175 1 : // |-------| 176 1 : // offset currentReadEnd 177 1 : // 178 1 : rs.numReads++ 179 1 : // It is possible to fall here when rs.limit has not been initialized. If 180 1 : // we don't initialize, rs.limit, it is possible that the first read 181 1 : // offset was at rs.limit+rs.maxReadaheadSize-delta and the enclosing 182 1 : // if-block predicate was true, and the next read is sequential but has 183 1 : // offset > rs.limit+rs.maxReadaheadSize (if we left rs.limit at 0), and 184 1 : // the enclosing if-block predicate will be false and we will incorrectly 185 1 : // think that readahead is not beneficial. The same issue arises if 186 1 : // rs.limit has been initialized and currentReadEnd is advancing past 187 1 : // rs.limit. 188 1 : rs.limit = currentReadEnd 189 1 : return 0 190 1 : } 191 : // We read too far ahead of the last read, or before it. This indicates 192 : // a random read, where readahead is not desirable. Reset all variables. 193 : // 194 : // (rs.limit - rs.maxReadaheadSize) (rs.limit) (rs.limit + rs.maxReadaheadSize) 195 : // |+++++++++++++|=============| 196 : // 197 : // |-------| 198 : // offset currentReadEnd 199 : // 200 1 : rs.numReads = 1 201 1 : rs.limit = currentReadEnd 202 1 : rs.size = initialReadaheadSize 203 1 : rs.prevSize = 0 204 1 : return 0 205 : }