Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/smart_open/bytebuffer.py: 93%

41 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:57 +0000

1# -*- coding: utf-8 -*- 

2# 

3# Copyright (C) 2019 Radim Rehurek <me@radimrehurek.com> 

4# 

5# This code is distributed under the terms and conditions 

6# from the MIT License (MIT). 

7# 

8"""Implements ByteBuffer class for amortizing network transfer overhead.""" 

9 

10import io 

11 

12 

13class ByteBuffer(object): 

14 """Implements a byte buffer that allows callers to read data with minimal 

15 copying, and has a fast __len__ method. The buffer is parametrized by its 

16 chunk_size, which is the number of bytes that it will read in from the 

17 supplied reader or iterable when the buffer is being filled. As primary use 

18 case for this buffer is to amortize the overhead costs of transferring data 

19 over the network (rather than capping memory consumption), it leads to more 

20 predictable performance to always read the same amount of bytes each time 

21 the buffer is filled, hence the chunk_size parameter instead of some fixed 

22 capacity. 

23 

24 The bytes are stored in a bytestring, and previously-read bytes are freed 

25 when the buffer is next filled (by slicing the bytestring into a smaller 

26 copy). 

27 

28 Example 

29 ------- 

30 

31 Note that while this example works in both Python 2 and 3, the doctest only 

32 passes in Python 3 due to the bytestring literals in the expected values. 

33 

34 >>> buf = ByteBuffer(chunk_size = 8) 

35 >>> message_bytes = iter([b'Hello, W', b'orld!']) 

36 >>> buf.fill(message_bytes) 

37 8 

38 >>> len(buf) # only chunk_size bytes are filled 

39 8 

40 >>> buf.peek() 

41 b'Hello, W' 

42 >>> len(buf) # peek() does not change read position 

43 8 

44 >>> buf.read(6) 

45 b'Hello,' 

46 >>> len(buf) # read() does change read position 

47 2 

48 >>> buf.fill(message_bytes) 

49 5 

50 >>> buf.read() 

51 b' World!' 

52 >>> len(buf) 

53 0 

54 """ 

55 

56 def __init__(self, chunk_size=io.DEFAULT_BUFFER_SIZE): 

57 """Create a ByteBuffer instance that reads chunk_size bytes when filled. 

58 Note that the buffer has no maximum size. 

59 

60 Parameters 

61 ----------- 

62 chunk_size: int, optional 

63 The the number of bytes that will be read from the supplied reader 

64 or iterable when filling the buffer. 

65 """ 

66 self._chunk_size = chunk_size 

67 self.empty() 

68 

69 def __len__(self): 

70 """Return the number of unread bytes in the buffer as an int""" 

71 return len(self._bytes) - self._pos 

72 

73 def read(self, size=-1): 

74 """Read bytes from the buffer and advance the read position. Returns 

75 the bytes in a bytestring. 

76 

77 Parameters 

78 ---------- 

79 size: int, optional 

80 Maximum number of bytes to read. If negative or not supplied, read 

81 all unread bytes in the buffer. 

82 

83 Returns 

84 ------- 

85 bytes 

86 """ 

87 part = self.peek(size) 

88 self._pos += len(part) 

89 return part 

90 

91 def peek(self, size=-1): 

92 """Get bytes from the buffer without advancing the read position. 

93 Returns the bytes in a bytestring. 

94 

95 Parameters 

96 ---------- 

97 size: int, optional 

98 Maximum number of bytes to return. If negative or not supplied, 

99 return all unread bytes in the buffer. 

100 

101 Returns 

102 ------- 

103 bytes 

104 """ 

105 if size < 0 or size > len(self): 

106 size = len(self) 

107 

108 part = bytes(self._bytes[self._pos:self._pos+size]) 

109 return part 

110 

111 def empty(self): 

112 """Remove all bytes from the buffer""" 

113 self._bytes = bytearray() 

114 self._pos = 0 

115 

116 def fill(self, source, size=-1): 

117 """Fill the buffer with bytes from source until one of these 

118 conditions is met: 

119 * size bytes have been read from source (if size >= 0); 

120 * chunk_size bytes have been read from source; 

121 * no more bytes can be read from source; 

122 Returns the number of new bytes added to the buffer. 

123 Note: all previously-read bytes in the buffer are removed. 

124 

125 Parameters 

126 ---------- 

127 source: a file-like object, or iterable/list that contains bytes 

128 The source of bytes to fill the buffer with. If this argument has 

129 the `read` attribute, it's assumed to be a file-like object and 

130 `read` is called to get the bytes; otherwise it's assumed to be an 

131 iterable or list that contains bytes, and a for loop is used to get 

132 the bytes. 

133 size: int, optional 

134 The number of bytes to try to read from source. If not supplied, 

135 negative, or larger than the buffer's chunk_size, then chunk_size 

136 bytes are read. Note that if source is an iterable or list, then 

137 it's possible that more than size bytes will be read if iterating 

138 over source produces more than one byte at a time. 

139 

140 Returns 

141 ------- 

142 int, the number of new bytes added to the buffer. 

143 """ 

144 size = size if size >= 0 else self._chunk_size 

145 size = min(size, self._chunk_size) 

146 

147 if self._pos != 0: 

148 self._bytes = self._bytes[self._pos:] 

149 self._pos = 0 

150 

151 if hasattr(source, 'read'): 

152 new_bytes = source.read(size) 

153 else: 

154 new_bytes = bytearray() 

155 for more_bytes in source: 

156 new_bytes += more_bytes 

157 if len(new_bytes) >= size: 

158 break 

159 

160 self._bytes += new_bytes 

161 return len(new_bytes) 

162 

163 def readline(self, terminator): 

164 """Read a line from this buffer efficiently. 

165 

166 A line is a contiguous sequence of bytes that ends with either: 

167 

168 1. The ``terminator`` character 

169 2. The end of the buffer itself 

170 

171 :param byte terminator: The line terminator character. 

172 :rtype: bytes 

173 

174 """ 

175 index = self._bytes.find(terminator, self._pos) 

176 if index == -1: 

177 size = len(self) 

178 else: 

179 size = index - self._pos + 1 

180 return self.read(size)