Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/smart_open/bytebuffer.py: 93%
41 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:57 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:57 +0000
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2019 Radim Rehurek <me@radimrehurek.com>
4#
5# This code is distributed under the terms and conditions
6# from the MIT License (MIT).
7#
8"""Implements ByteBuffer class for amortizing network transfer overhead."""
10import io
13class ByteBuffer(object):
14 """Implements a byte buffer that allows callers to read data with minimal
15 copying, and has a fast __len__ method. The buffer is parametrized by its
16 chunk_size, which is the number of bytes that it will read in from the
17 supplied reader or iterable when the buffer is being filled. As primary use
18 case for this buffer is to amortize the overhead costs of transferring data
19 over the network (rather than capping memory consumption), it leads to more
20 predictable performance to always read the same amount of bytes each time
21 the buffer is filled, hence the chunk_size parameter instead of some fixed
22 capacity.
24 The bytes are stored in a bytestring, and previously-read bytes are freed
25 when the buffer is next filled (by slicing the bytestring into a smaller
26 copy).
28 Example
29 -------
31 Note that while this example works in both Python 2 and 3, the doctest only
32 passes in Python 3 due to the bytestring literals in the expected values.
34 >>> buf = ByteBuffer(chunk_size = 8)
35 >>> message_bytes = iter([b'Hello, W', b'orld!'])
36 >>> buf.fill(message_bytes)
37 8
38 >>> len(buf) # only chunk_size bytes are filled
39 8
40 >>> buf.peek()
41 b'Hello, W'
42 >>> len(buf) # peek() does not change read position
43 8
44 >>> buf.read(6)
45 b'Hello,'
46 >>> len(buf) # read() does change read position
47 2
48 >>> buf.fill(message_bytes)
49 5
50 >>> buf.read()
51 b' World!'
52 >>> len(buf)
53 0
54 """
56 def __init__(self, chunk_size=io.DEFAULT_BUFFER_SIZE):
57 """Create a ByteBuffer instance that reads chunk_size bytes when filled.
58 Note that the buffer has no maximum size.
60 Parameters
61 -----------
62 chunk_size: int, optional
63 The the number of bytes that will be read from the supplied reader
64 or iterable when filling the buffer.
65 """
66 self._chunk_size = chunk_size
67 self.empty()
69 def __len__(self):
70 """Return the number of unread bytes in the buffer as an int"""
71 return len(self._bytes) - self._pos
73 def read(self, size=-1):
74 """Read bytes from the buffer and advance the read position. Returns
75 the bytes in a bytestring.
77 Parameters
78 ----------
79 size: int, optional
80 Maximum number of bytes to read. If negative or not supplied, read
81 all unread bytes in the buffer.
83 Returns
84 -------
85 bytes
86 """
87 part = self.peek(size)
88 self._pos += len(part)
89 return part
91 def peek(self, size=-1):
92 """Get bytes from the buffer without advancing the read position.
93 Returns the bytes in a bytestring.
95 Parameters
96 ----------
97 size: int, optional
98 Maximum number of bytes to return. If negative or not supplied,
99 return all unread bytes in the buffer.
101 Returns
102 -------
103 bytes
104 """
105 if size < 0 or size > len(self):
106 size = len(self)
108 part = bytes(self._bytes[self._pos:self._pos+size])
109 return part
111 def empty(self):
112 """Remove all bytes from the buffer"""
113 self._bytes = bytearray()
114 self._pos = 0
116 def fill(self, source, size=-1):
117 """Fill the buffer with bytes from source until one of these
118 conditions is met:
119 * size bytes have been read from source (if size >= 0);
120 * chunk_size bytes have been read from source;
121 * no more bytes can be read from source;
122 Returns the number of new bytes added to the buffer.
123 Note: all previously-read bytes in the buffer are removed.
125 Parameters
126 ----------
127 source: a file-like object, or iterable/list that contains bytes
128 The source of bytes to fill the buffer with. If this argument has
129 the `read` attribute, it's assumed to be a file-like object and
130 `read` is called to get the bytes; otherwise it's assumed to be an
131 iterable or list that contains bytes, and a for loop is used to get
132 the bytes.
133 size: int, optional
134 The number of bytes to try to read from source. If not supplied,
135 negative, or larger than the buffer's chunk_size, then chunk_size
136 bytes are read. Note that if source is an iterable or list, then
137 it's possible that more than size bytes will be read if iterating
138 over source produces more than one byte at a time.
140 Returns
141 -------
142 int, the number of new bytes added to the buffer.
143 """
144 size = size if size >= 0 else self._chunk_size
145 size = min(size, self._chunk_size)
147 if self._pos != 0:
148 self._bytes = self._bytes[self._pos:]
149 self._pos = 0
151 if hasattr(source, 'read'):
152 new_bytes = source.read(size)
153 else:
154 new_bytes = bytearray()
155 for more_bytes in source:
156 new_bytes += more_bytes
157 if len(new_bytes) >= size:
158 break
160 self._bytes += new_bytes
161 return len(new_bytes)
163 def readline(self, terminator):
164 """Read a line from this buffer efficiently.
166 A line is a contiguous sequence of bytes that ends with either:
168 1. The ``terminator`` character
169 2. The end of the buffer itself
171 :param byte terminator: The line terminator character.
172 :rtype: bytes
174 """
175 index = self._bytes.find(terminator, self._pos)
176 if index == -1:
177 size = len(self)
178 else:
179 size = index - self._pos + 1
180 return self.read(size)