Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/requests

1# -*- coding: utf-8 -*-

2"""

4requests_toolbelt.multipart.encoder

5===================================

7This holds all of the implementation details of the MultipartEncoder

9"""

10import contextlib

11import io

12import os

13from uuid import uuid4

15import requests

17from .._compat import fields

20class FileNotSupportedError(Exception):

21 """File not supported error."""

24class MultipartEncoder(object):

26 """

28 The ``MultipartEncoder`` object is a generic interface to the engine that

29 will create a ``multipart/form-data`` body for you.

31 The basic usage is:

33 .. code-block:: python

35 import requests

36 from requests_toolbelt import MultipartEncoder

38 encoder = MultipartEncoder({'field': 'value',

39 'other_field': 'other_value'})

40 r = requests.post('https://httpbin.org/post', data=encoder,

41 headers={'Content-Type': encoder.content_type})

43 If you do not need to take advantage of streaming the post body, you can

44 also do:

46 .. code-block:: python

48 r = requests.post('https://httpbin.org/post',

49 data=encoder.to_string(),

50 headers={'Content-Type': encoder.content_type})

52 If you want the encoder to use a specific order, you can use an

53 OrderedDict or more simply, a list of tuples:

55 .. code-block:: python

57 encoder = MultipartEncoder([('field', 'value'),

58 ('other_field', 'other_value')])

60 .. versionchanged:: 0.4.0

62 You can also provide tuples as part values as you would provide them to

63 requests' ``files`` parameter.

65 .. code-block:: python

67 encoder = MultipartEncoder({

68 'field': ('file_name', b'{"a": "b"}', 'application/json',

69 {'X-My-Header': 'my-value'})

70 ])

72 .. warning::

74 This object will end up directly in :mod:`httplib`. Currently,

75 :mod:`httplib` has a hard-coded read size of **8192 bytes**. This

76 means that it will loop until the file has been read and your upload

77 could take a while. This is **not** a bug in requests. A feature is

78 being considered for this object to allow you, the user, to specify

79 what size should be returned on a read. If you have opinions on this,

80 please weigh in on `this issue`_.

82 .. _this issue:

83 https://github.com/requests/toolbelt/issues/75

85 """

87 def __init__(self, fields, boundary=None, encoding='utf-8'):

88 #: Boundary value either passed in by the user or created

89 self.boundary_value = boundary or uuid4().hex

91 # Computed boundary

92 self.boundary = '--{}'.format(self.boundary_value)

94 #: Encoding of the data being passed in

95 self.encoding = encoding

97 # Pre-encoded boundary

98 self._encoded_boundary = b''.join([

99 encode_with(self.boundary, self.encoding),

100 encode_with('\r\n', self.encoding)

101 ])

102

103 #: Fields provided by the user

104 self.fields = fields

105

106 #: Whether or not the encoder is finished

107 self.finished = False

108

109 #: Pre-computed parts of the upload

110 self.parts = []

111

112 # Pre-computed parts iterator

113 self._iter_parts = iter([])

114

115 # The part we're currently working with

116 self._current_part = None

117

118 # Cached computation of the body's length

119 self._len = None

120

121 # Our buffer

122 self._buffer = CustomBytesIO(encoding=encoding)

123

124 # Pre-compute each part's headers

125 self._prepare_parts()

126

127 # Load boundary into buffer

128 self._write_boundary()

129

130 @property

131 def len(self):

132 """Length of the multipart/form-data body.

133

134 requests will first attempt to get the length of the body by calling

135 ``len(body)`` and then by checking for the ``len`` attribute.

136

137 On 32-bit systems, the ``__len__`` method cannot return anything

138 larger than an integer (in C) can hold. If the total size of the body

139 is even slightly larger than 4GB users will see an OverflowError. This

140 manifested itself in `bug #80`_.

141

142 As such, we now calculate the length lazily as a property.

143

144 .. _bug #80:

145 https://github.com/requests/toolbelt/issues/80

146 """

147 # If _len isn't already calculated, calculate, return, and set it

148 return self._len or self._calculate_length()

149

150 def __repr__(self):

151 return '<MultipartEncoder: {!r}>'.format(self.fields)

152

153 def _calculate_length(self):

154 """

155 This uses the parts to calculate the length of the body.

156

157 This returns the calculated length so __len__ can be lazy.

158 """

159 boundary_len = len(self.boundary) # Length of --{boundary}

160 # boundary length + header length + body length + len('\r\n') * 2

161 self._len = sum(

162 (boundary_len + total_len(p) + 4) for p in self.parts

163 ) + boundary_len + 4

164 return self._len

165

166 def _calculate_load_amount(self, read_size):

167 """This calculates how many bytes need to be added to the buffer.

168

169 When a consumer read's ``x`` from the buffer, there are two cases to

170 satisfy:

171

172 1. Enough data in the buffer to return the requested amount

173 2. Not enough data

174

175 This function uses the amount of unread bytes in the buffer and

176 determines how much the Encoder has to load before it can return the

177 requested amount of bytes.

178

179 :param int read_size: the number of bytes the consumer requests

180 :returns: int -- the number of bytes that must be loaded into the

181 buffer before the read can be satisfied. This will be strictly

182 non-negative

183 """

184 amount = read_size - total_len(self._buffer)

185 return amount if amount > 0 else 0

186

187 def _load(self, amount):

188 """Load ``amount`` number of bytes into the buffer."""

189 self._buffer.smart_truncate()

190 part = self._current_part or self._next_part()

191 while amount == -1 or amount > 0:

192 written = 0

193 if part and not part.bytes_left_to_write():

194 written += self._write(b'\r\n')

195 written += self._write_boundary()

196 part = self._next_part()

197

198 if not part:

199 written += self._write_closing_boundary()

200 self.finished = True

201 break

202

203 written += part.write_to(self._buffer, amount)

204

205 if amount != -1:

206 amount -= written

207

208 def _next_part(self):

209 try:

210 p = self._current_part = next(self._iter_parts)

211 except StopIteration:

212 p = None

213 return p

214

215 def _iter_fields(self):

216 _fields = self.fields

217 if hasattr(self.fields, 'items'):

218 _fields = list(self.fields.items())

219 for k, v in _fields:

220 file_name = None

221 file_type = None

222 file_headers = None

223 if isinstance(v, (list, tuple)):

224 if len(v) == 2:

225 file_name, file_pointer = v

226 elif len(v) == 3:

227 file_name, file_pointer, file_type = v

228 else:

229 file_name, file_pointer, file_type, file_headers = v

230 else:

231 file_pointer = v

232

233 field = fields.RequestField(name=k, data=file_pointer,

234 filename=file_name,

235 headers=file_headers)

236 field.make_multipart(content_type=file_type)

237 yield field

238

239 def _prepare_parts(self):

240 """This uses the fields provided by the user and creates Part objects.

241

242 It populates the `parts` attribute and uses that to create a

243 generator for iteration.

244 """

245 enc = self.encoding

246 self.parts = [Part.from_field(f, enc) for f in self._iter_fields()]

247 self._iter_parts = iter(self.parts)

248

249 def _write(self, bytes_to_write):

250 """Write the bytes to the end of the buffer.

251

252 :param bytes bytes_to_write: byte-string (or bytearray) to append to

253 the buffer

254 :returns: int -- the number of bytes written

255 """

256 return self._buffer.append(bytes_to_write)

257

258 def _write_boundary(self):

259 """Write the boundary to the end of the buffer."""

260 return self._write(self._encoded_boundary)

261

262 def _write_closing_boundary(self):

263 """Write the bytes necessary to finish a multipart/form-data body."""

264 with reset(self._buffer):

265 self._buffer.seek(-2, 2)

266 self._buffer.write(b'--\r\n')

267 return 2

268

269 def _write_headers(self, headers):

270 """Write the current part's headers to the buffer."""

271 return self._write(encode_with(headers, self.encoding))

272

273 @property

274 def content_type(self):

275 return str(

276 'multipart/form-data; boundary={}'.format(self.boundary_value)

277 )

278

279 def to_string(self):

280 """Return the entirety of the data in the encoder.

281

282 .. note::

283

284 This simply reads all of the data it can. If you have started

285 streaming or reading data from the encoder, this method will only

286 return whatever data is left in the encoder.

287

288 .. note::

289

290 This method affects the internal state of the encoder. Calling

291 this method will exhaust the encoder.

292

293 :returns: the multipart message

294 :rtype: bytes

295 """

296

297 return self.read()

298

299 def read(self, size=-1):

300 """Read data from the streaming encoder.

301

302 :param int size: (optional), If provided, ``read`` will return exactly

303 that many bytes. If it is not provided, it will return the

304 remaining bytes.

305 :returns: bytes

306 """

307 if self.finished:

308 return self._buffer.read(size)

309

310 bytes_to_load = size

311 if bytes_to_load != -1 and bytes_to_load is not None:

312 bytes_to_load = self._calculate_load_amount(int(size))

313

314 self._load(bytes_to_load)

315 return self._buffer.read(size)

316

317

318def IDENTITY(monitor):

319 return monitor

320

321

322class MultipartEncoderMonitor(object):

323

324 """

325 An object used to monitor the progress of a :class:`MultipartEncoder`.

326

327 The :class:`MultipartEncoder` should only be responsible for preparing and

328 streaming the data. For anyone who wishes to monitor it, they shouldn't be

329 using that instance to manage that as well. Using this class, they can

330 monitor an encoder and register a callback. The callback receives the

331 instance of the monitor.

332

333 To use this monitor, you construct your :class:`MultipartEncoder` as you

334 normally would.

335

336 .. code-block:: python

337

338 from requests_toolbelt import (MultipartEncoder,

339 MultipartEncoderMonitor)

340 import requests

341

342 def callback(monitor):

343 # Do something with this information

344 pass

345

346 m = MultipartEncoder(fields={'field0': 'value0'})

347 monitor = MultipartEncoderMonitor(m, callback)

348 headers = {'Content-Type': monitor.content_type}

349 r = requests.post('https://httpbin.org/post', data=monitor,

350 headers=headers)

351

352 Alternatively, if your use case is very simple, you can use the following

353 pattern.

354

355 .. code-block:: python

356

357 from requests_toolbelt import MultipartEncoderMonitor

358 import requests

359

360 def callback(monitor):

361 # Do something with this information

362 pass

363

364 monitor = MultipartEncoderMonitor.from_fields(

365 fields={'field0': 'value0'}, callback

366 )

367 headers = {'Content-Type': montior.content_type}

368 r = requests.post('https://httpbin.org/post', data=monitor,

369 headers=headers)

370

371 """

372

373 def __init__(self, encoder, callback=None):

374 #: Instance of the :class:`MultipartEncoder` being monitored

375 self.encoder = encoder

376

377 #: Optionally function to call after a read

378 self.callback = callback or IDENTITY

379

380 #: Number of bytes already read from the :class:`MultipartEncoder`

381 #: instance

382 self.bytes_read = 0

383

384 #: Avoid the same problem in bug #80

385 self.len = self.encoder.len

386

387 @classmethod

388 def from_fields(cls, fields, boundary=None, encoding='utf-8',

389 callback=None):

390 encoder = MultipartEncoder(fields, boundary, encoding)

391 return cls(encoder, callback)

392

393 @property

394 def content_type(self):

395 return self.encoder.content_type

396

397 def to_string(self):

398 return self.read()

399

400 def read(self, size=-1):

401 string = self.encoder.read(size)

402 self.bytes_read += len(string)

403 self.callback(self)

404 return string

405

406

407def encode_with(string, encoding):

408 """Encoding ``string`` with ``encoding`` if necessary.

409

410 :param str string: If string is a bytes object, it will not encode it.

411 Otherwise, this function will encode it with the provided encoding.

412 :param str encoding: The encoding with which to encode string.

413 :returns: encoded bytes object

414 """

415 if not (string is None or isinstance(string, bytes)):

416 return string.encode(encoding)

417 return string

418

419

420def readable_data(data, encoding):

421 """Coerce the data to an object with a ``read`` method."""

422 if hasattr(data, 'read'):

423 return data

424

425 return CustomBytesIO(data, encoding)

426

427

428def total_len(o):

429 if hasattr(o, '__len__'):

430 return len(o)

431

432 if hasattr(o, 'len'):

433 return o.len

434

435 if hasattr(o, 'fileno'):

436 try:

437 fileno = o.fileno()

438 except io.UnsupportedOperation:

439 pass

440 else:

441 return os.fstat(fileno).st_size

442

443 if hasattr(o, 'getvalue'):

444 # e.g. BytesIO, cStringIO.StringIO

445 return len(o.getvalue())

446

447

448@contextlib.contextmanager

449def reset(buffer):

450 """Keep track of the buffer's current position and write to the end.

451

452 This is a context manager meant to be used when adding data to the buffer.

453 It eliminates the need for every function to be concerned with the

454 position of the cursor in the buffer.

455 """

456 original_position = buffer.tell()

457 buffer.seek(0, 2)

458 yield

459 buffer.seek(original_position, 0)

460

461

462def coerce_data(data, encoding):

463 """Ensure that every object's __len__ behaves uniformly."""

464 if not isinstance(data, CustomBytesIO):

465 if hasattr(data, 'getvalue'):

466 return CustomBytesIO(data.getvalue(), encoding)

467

468 if hasattr(data, 'fileno'):

469 return FileWrapper(data)

470

471 if not hasattr(data, 'read'):

472 return CustomBytesIO(data, encoding)

473

474 return data

475

476

477def to_list(fields):

478 if hasattr(fields, 'items'):

479 return list(fields.items())

480 return list(fields)

481

482

483class Part(object):

484 def __init__(self, headers, body):

485 self.headers = headers

486 self.body = body

487 self.headers_unread = True

488 self.len = len(self.headers) + total_len(self.body)

489

490 @classmethod

491 def from_field(cls, field, encoding):

492 """Create a part from a Request Field generated by urllib3."""

493 headers = encode_with(field.render_headers(), encoding)

494 body = coerce_data(field.data, encoding)

495 return cls(headers, body)

496

497 def bytes_left_to_write(self):

498 """Determine if there are bytes left to write.

499

500 :returns: bool -- ``True`` if there are bytes left to write, otherwise

501 ``False``

502 """

503 to_read = 0

504 if self.headers_unread:

505 to_read += len(self.headers)

506

507 return (to_read + total_len(self.body)) > 0

508

509 def write_to(self, buffer, size):

510 """Write the requested amount of bytes to the buffer provided.

511

512 The number of bytes written may exceed size on the first read since we

513 load the headers ambitiously.

514

515 :param CustomBytesIO buffer: buffer we want to write bytes to

516 :param int size: number of bytes requested to be written to the buffer

517 :returns: int -- number of bytes actually written

518 """

519 written = 0

520 if self.headers_unread:

521 written += buffer.append(self.headers)

522 self.headers_unread = False

523

524 while total_len(self.body) > 0 and (size == -1 or written < size):

525 amount_to_read = size

526 if size != -1:

527 amount_to_read = size - written

528 written += buffer.append(self.body.read(amount_to_read))

529

530 return written

531

532

533class CustomBytesIO(io.BytesIO):

534 def __init__(self, buffer=None, encoding='utf-8'):

535 buffer = encode_with(buffer, encoding)

536 super(CustomBytesIO, self).__init__(buffer)

537

538 def _get_end(self):

539 current_pos = self.tell()

540 self.seek(0, 2)

541 length = self.tell()

542 self.seek(current_pos, 0)

543 return length

544

545 @property

546 def len(self):

547 length = self._get_end()

548 return length - self.tell()

549

550 def append(self, bytes):

551 with reset(self):

552 written = self.write(bytes)

553 return written

554

555 def smart_truncate(self):

556 to_be_read = total_len(self)

557 already_read = self._get_end() - to_be_read

558

559 if already_read >= to_be_read:

560 old_bytes = self.read()

561 self.seek(0, 0)

562 self.truncate()

563 self.write(old_bytes)

564 self.seek(0, 0) # We want to be at the beginning

565

566

567class FileWrapper(object):

568 def __init__(self, file_object):

569 self.fd = file_object

570

571 @property

572 def len(self):

573 return total_len(self.fd) - self.fd.tell()

574

575 def read(self, length=-1):

576 return self.fd.read(length)

577

578

579class FileFromURLWrapper(object):

580 """File from URL wrapper.

581

582 The :class:`FileFromURLWrapper` object gives you the ability to stream file

583 from provided URL in chunks by :class:`MultipartEncoder`.

584 Provide a stateless solution for streaming file from one server to another.

585 You can use the :class:`FileFromURLWrapper` without a session or with

586 a session as demonstated by the examples below:

587

588 .. code-block:: python

589 # no session

590

591 import requests

592 from requests_toolbelt import MultipartEncoder, FileFromURLWrapper

593

594 url = 'https://httpbin.org/image/png'

595 streaming_encoder = MultipartEncoder(

596 fields={

597 'file': FileFromURLWrapper(url)

598 }

599 )

600 r = requests.post(

601 'https://httpbin.org/post', data=streaming_encoder,

602 headers={'Content-Type': streaming_encoder.content_type}

603 )

604

605 .. code-block:: python

606 # using a session

607

608 import requests

609 from requests_toolbelt import MultipartEncoder, FileFromURLWrapper

610

611 session = requests.Session()

612 url = 'https://httpbin.org/image/png'

613 streaming_encoder = MultipartEncoder(

614 fields={

615 'file': FileFromURLWrapper(url, session=session)

616 }

617 )

618 r = session.post(

619 'https://httpbin.org/post', data=streaming_encoder,

620 headers={'Content-Type': streaming_encoder.content_type}

621 )

622

623 """

624

625 def __init__(self, file_url, session=None):

626 self.session = session or requests.Session()

627 requested_file = self._request_for_file(file_url)

628 self.len = int(requested_file.headers['content-length'])

629 self.raw_data = requested_file.raw

630

631 def _request_for_file(self, file_url):

632 """Make call for file under provided URL."""

633 response = self.session.get(file_url, stream=True)

634 content_length = response.headers.get('content-length', None)

635 if content_length is None:

636 error_msg = (

637 "Data from provided URL {url} is not supported. Lack of "

638 "content-length Header in requested file response.".format(

639 url=file_url)

640 )

641 raise FileNotSupportedError(error_msg)

642 elif not content_length.isdigit():

643 error_msg = (

644 "Data from provided URL {url} is not supported. content-length"

645 " header value is not a digit.".format(url=file_url)

646 )

647 raise FileNotSupportedError(error_msg)

648 return response

649

650 def read(self, chunk_size):

651 """Read file in chunks."""

652 chunk_size = chunk_size if chunk_size >= 0 else self.len

653 chunk = self.raw_data.read(chunk_size) or b''

654 self.len -= len(chunk) if chunk else 0 # left to read

655 return chunk

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/requests_toolbelt/multipart/encoder.py: 27%

249 statements