Line data Source code
1 : // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 : // of this source code is governed by a BSD-style license that can be found in 3 : // the LICENSE file. 4 : 5 : package objstorageprovider 6 : 7 : const ( 8 : // Constants for dynamic readahead of data blocks. Note that the size values 9 : // make sense as some multiple of the default block size; and they should 10 : // both be larger than the default block size. 11 : minFileReadsForReadahead = 2 12 : // TODO(bilal): Have the initial size value be a factor of the block size, 13 : // as opposed to a hardcoded value. 14 : initialReadaheadSize = 64 << 10 /* 64KB */ 15 : ) 16 : 17 : // readaheadState contains state variables related to readahead. Updated on 18 : // file reads. 19 : type readaheadState struct { 20 : // Number of sequential reads. 21 : numReads int64 22 : maxReadaheadSize int64 23 : // Size issued to the next call to Prefetch. Starts at or above 24 : // initialReadaheadSize and grows exponentially until maxReadaheadSize. 25 : size int64 26 : // prevSize is the size used in the last Prefetch call. 27 : prevSize int64 28 : // The byte offset up to which the OS has been asked to read ahead / cached. 29 : // When reading ahead, reads up to this limit should not incur an IO 30 : // operation. Reads after this limit can benefit from a new call to 31 : // Prefetch. 32 : limit int64 33 : } 34 : 35 2 : func makeReadaheadState(maxReadaheadSize int64) readaheadState { 36 2 : return readaheadState{ 37 2 : size: initialReadaheadSize, 38 2 : maxReadaheadSize: maxReadaheadSize, 39 2 : } 40 2 : } 41 : 42 2 : func (rs *readaheadState) recordCacheHit(offset, blockLength int64) { 43 2 : currentReadEnd := offset + blockLength 44 2 : if rs.numReads >= minFileReadsForReadahead { 45 2 : if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { 46 2 : // This is a read that would have resulted in a readahead, had it 47 2 : // not been a cache hit. 48 2 : rs.limit = currentReadEnd 49 2 : return 50 2 : } 51 2 : if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize { 52 2 : // We read too far away from rs.limit to benefit from readahead in 53 2 : // any scenario. Reset all variables. 54 2 : rs.numReads = 1 55 2 : rs.limit = currentReadEnd 56 2 : rs.size = initialReadaheadSize 57 2 : rs.prevSize = 0 58 2 : return 59 2 : } 60 : // Reads in the range [rs.limit - rs.prevSize, rs.limit] end up 61 : // here. This is a read that is potentially benefitting from a past 62 : // readahead. 63 2 : return 64 : } 65 2 : if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { 66 2 : // Blocks are being read sequentially and would benefit from readahead 67 2 : // down the line. 68 2 : rs.numReads++ 69 2 : return 70 2 : } 71 : // We read too far ahead of the last read, or before it. This indicates 72 : // a random read, where readahead is not desirable. Reset all variables. 73 2 : rs.numReads = 1 74 2 : rs.limit = currentReadEnd 75 2 : rs.size = initialReadaheadSize 76 2 : rs.prevSize = 0 77 : } 78 : 79 : // maybeReadahead updates state and determines whether to issue a readahead / 80 : // prefetch call for a block read at offset for blockLength bytes. 81 : // Returns a size value (greater than 0) that should be prefetched if readahead 82 : // would be beneficial. 83 2 : func (rs *readaheadState) maybeReadahead(offset, blockLength int64) int64 { 84 2 : currentReadEnd := offset + blockLength 85 2 : if rs.numReads >= minFileReadsForReadahead { 86 2 : // The minimum threshold of sequential reads to justify reading ahead 87 2 : // has been reached. 88 2 : // There are two intervals: the interval being read: 89 2 : // [offset, currentReadEnd] 90 2 : // as well as the interval where a read would benefit from read ahead: 91 2 : // [rs.limit, rs.limit + rs.size] 92 2 : // We increase the latter interval to 93 2 : // [rs.limit, rs.limit + rs.maxReadaheadSize] to account for cases where 94 2 : // readahead may not be beneficial with a small readahead size, but over 95 2 : // time the readahead size would increase exponentially to make it 96 2 : // beneficial. 97 2 : if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { 98 2 : // We are doing a read in the interval ahead of 99 2 : // the last readahead range. In the diagrams below, ++++ is the last 100 2 : // readahead range, ==== is the range represented by 101 2 : // [rs.limit, rs.limit + rs.maxReadaheadSize], and ---- is the range 102 2 : // being read. 103 2 : // 104 2 : // rs.limit rs.limit + rs.maxReadaheadSize 105 2 : // ++++++++++|===========================| 106 2 : // 107 2 : // |-------------| 108 2 : // offset currentReadEnd 109 2 : // 110 2 : // This case is also possible, as are all cases with an overlap 111 2 : // between [rs.limit, rs.limit + rs.maxReadaheadSize] and [offset, 112 2 : // currentReadEnd]: 113 2 : // 114 2 : // rs.limit rs.limit + rs.maxReadaheadSize 115 2 : // ++++++++++|===========================| 116 2 : // 117 2 : // |-------------| 118 2 : // offset currentReadEnd 119 2 : // 120 2 : // 121 2 : rs.numReads++ 122 2 : rs.limit = offset + rs.size 123 2 : rs.prevSize = rs.size 124 2 : // Increase rs.size for the next read. 125 2 : rs.size *= 2 126 2 : if rs.size > rs.maxReadaheadSize { 127 2 : rs.size = rs.maxReadaheadSize 128 2 : } 129 2 : return rs.prevSize 130 : } 131 2 : if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize { 132 2 : // The above conditional has rs.limit > rs.prevSize to confirm that 133 2 : // rs.limit - rs.prevSize would not underflow. 134 2 : // We read too far away from rs.limit to benefit from readahead in 135 2 : // any scenario. Reset all variables. 136 2 : // The case where we read too far ahead: 137 2 : // 138 2 : // (rs.limit - rs.prevSize) (rs.limit) (rs.limit + rs.maxReadaheadSize) 139 2 : // |+++++++++++++|=============| 140 2 : // 141 2 : // |-------------| 142 2 : // offset currentReadEnd 143 2 : // 144 2 : // Or too far behind: 145 2 : // 146 2 : // (rs.limit - rs.prevSize) (rs.limit) (rs.limit + rs.maxReadaheadSize) 147 2 : // |+++++++++++++|=============| 148 2 : // 149 2 : // |-------------| 150 2 : // offset currentReadEnd 151 2 : // 152 2 : rs.numReads = 1 153 2 : rs.limit = currentReadEnd 154 2 : rs.size = initialReadaheadSize 155 2 : rs.prevSize = 0 156 2 : 157 2 : return 0 158 2 : } 159 : // Reads in the range [rs.limit - rs.prevSize, rs.limit] end up 160 : // here. This is a read that is potentially benefitting from a past 161 : // readahead, but there's no reason to issue a readahead call at the 162 : // moment. 163 : // 164 : // (rs.limit - rs.prevSize) (rs.limit + rs.maxReadaheadSize) 165 : // |+++++++++++++|===============| 166 : // (rs.limit) 167 : // 168 : // |-------| 169 : // offset currentReadEnd 170 : // 171 2 : rs.numReads++ 172 2 : return 0 173 : } 174 2 : if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { 175 2 : // Blocks are being read sequentially and would benefit from readahead 176 2 : // down the line. 177 2 : // 178 2 : // (rs.limit) (rs.limit + rs.maxReadaheadSize) 179 2 : // |=============| 180 2 : // 181 2 : // |-------| 182 2 : // offset currentReadEnd 183 2 : // 184 2 : rs.numReads++ 185 2 : return 0 186 2 : } 187 : // We read too far ahead of the last read, or before it. This indicates 188 : // a random read, where readahead is not desirable. Reset all variables. 189 : // 190 : // (rs.limit - rs.maxReadaheadSize) (rs.limit) (rs.limit + rs.maxReadaheadSize) 191 : // |+++++++++++++|=============| 192 : // 193 : // |-------| 194 : // offset currentReadEnd 195 : // 196 2 : rs.numReads = 1 197 2 : rs.limit = currentReadEnd 198 2 : rs.size = initialReadaheadSize 199 2 : rs.prevSize = 0 200 2 : return 0 201 : }