Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/multipart.py: 30%
593 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:52 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:52 +0000
1import base64
2import binascii
3import json
4import re
5import uuid
6import warnings
7import zlib
8from collections import deque
9from types import TracebackType
10from typing import (
11 TYPE_CHECKING,
12 Any,
13 AsyncIterator,
14 Deque,
15 Dict,
16 Iterator,
17 List,
18 Mapping,
19 Optional,
20 Sequence,
21 Tuple,
22 Type,
23 Union,
24 cast,
25)
26from urllib.parse import parse_qsl, unquote, urlencode
28from multidict import CIMultiDict, CIMultiDictProxy, MultiMapping
30from .compression_utils import ZLibCompressor, ZLibDecompressor
31from .hdrs import (
32 CONTENT_DISPOSITION,
33 CONTENT_ENCODING,
34 CONTENT_LENGTH,
35 CONTENT_TRANSFER_ENCODING,
36 CONTENT_TYPE,
37)
38from .helpers import CHAR, TOKEN, parse_mimetype, reify
39from .http import HeadersParser
40from .payload import (
41 JsonPayload,
42 LookupError,
43 Order,
44 Payload,
45 StringPayload,
46 get_payload,
47 payload_type,
48)
49from .streams import StreamReader
51__all__ = (
52 "MultipartReader",
53 "MultipartWriter",
54 "BodyPartReader",
55 "BadContentDispositionHeader",
56 "BadContentDispositionParam",
57 "parse_content_disposition",
58 "content_disposition_filename",
59)
62if TYPE_CHECKING: # pragma: no cover
63 from .client_reqrep import ClientResponse
66class BadContentDispositionHeader(RuntimeWarning):
67 pass
70class BadContentDispositionParam(RuntimeWarning):
71 pass
74def parse_content_disposition(
75 header: Optional[str],
76) -> Tuple[Optional[str], Dict[str, str]]:
77 def is_token(string: str) -> bool:
78 return bool(string) and TOKEN >= set(string)
80 def is_quoted(string: str) -> bool:
81 return string[0] == string[-1] == '"'
83 def is_rfc5987(string: str) -> bool:
84 return is_token(string) and string.count("'") == 2
86 def is_extended_param(string: str) -> bool:
87 return string.endswith("*")
89 def is_continuous_param(string: str) -> bool:
90 pos = string.find("*") + 1
91 if not pos:
92 return False
93 substring = string[pos:-1] if string.endswith("*") else string[pos:]
94 return substring.isdigit()
96 def unescape(text: str, *, chars: str = "".join(map(re.escape, CHAR))) -> str:
97 return re.sub(f"\\\\([{chars}])", "\\1", text)
99 if not header:
100 return None, {}
102 disptype, *parts = header.split(";")
103 if not is_token(disptype):
104 warnings.warn(BadContentDispositionHeader(header))
105 return None, {}
107 params: Dict[str, str] = {}
108 while parts:
109 item = parts.pop(0)
111 if "=" not in item:
112 warnings.warn(BadContentDispositionHeader(header))
113 return None, {}
115 key, value = item.split("=", 1)
116 key = key.lower().strip()
117 value = value.lstrip()
119 if key in params:
120 warnings.warn(BadContentDispositionHeader(header))
121 return None, {}
123 if not is_token(key):
124 warnings.warn(BadContentDispositionParam(item))
125 continue
127 elif is_continuous_param(key):
128 if is_quoted(value):
129 value = unescape(value[1:-1])
130 elif not is_token(value):
131 warnings.warn(BadContentDispositionParam(item))
132 continue
134 elif is_extended_param(key):
135 if is_rfc5987(value):
136 encoding, _, value = value.split("'", 2)
137 encoding = encoding or "utf-8"
138 else:
139 warnings.warn(BadContentDispositionParam(item))
140 continue
142 try:
143 value = unquote(value, encoding, "strict")
144 except UnicodeDecodeError: # pragma: nocover
145 warnings.warn(BadContentDispositionParam(item))
146 continue
148 else:
149 failed = True
150 if is_quoted(value):
151 failed = False
152 value = unescape(value[1:-1].lstrip("\\/"))
153 elif is_token(value):
154 failed = False
155 elif parts:
156 # maybe just ; in filename, in any case this is just
157 # one case fix, for proper fix we need to redesign parser
158 _value = f"{value};{parts[0]}"
159 if is_quoted(_value):
160 parts.pop(0)
161 value = unescape(_value[1:-1].lstrip("\\/"))
162 failed = False
164 if failed:
165 warnings.warn(BadContentDispositionHeader(header))
166 return None, {}
168 params[key] = value
170 return disptype.lower(), params
173def content_disposition_filename(
174 params: Mapping[str, str], name: str = "filename"
175) -> Optional[str]:
176 name_suf = "%s*" % name
177 if not params:
178 return None
179 elif name_suf in params:
180 return params[name_suf]
181 elif name in params:
182 return params[name]
183 else:
184 parts = []
185 fnparams = sorted(
186 (key, value) for key, value in params.items() if key.startswith(name_suf)
187 )
188 for num, (key, value) in enumerate(fnparams):
189 _, tail = key.split("*", 1)
190 if tail.endswith("*"):
191 tail = tail[:-1]
192 if tail == str(num):
193 parts.append(value)
194 else:
195 break
196 if not parts:
197 return None
198 value = "".join(parts)
199 if "'" in value:
200 encoding, _, value = value.split("'", 2)
201 encoding = encoding or "utf-8"
202 return unquote(value, encoding, "strict")
203 return value
206class MultipartResponseWrapper:
207 """Wrapper around the MultipartReader.
209 It takes care about
210 underlying connection and close it when it needs in.
211 """
213 def __init__(
214 self,
215 resp: "ClientResponse",
216 stream: "MultipartReader",
217 ) -> None:
218 self.resp = resp
219 self.stream = stream
221 def __aiter__(self) -> "MultipartResponseWrapper":
222 return self
224 async def __anext__(
225 self,
226 ) -> Union["MultipartReader", "BodyPartReader"]:
227 part = await self.next()
228 if part is None:
229 raise StopAsyncIteration
230 return part
232 def at_eof(self) -> bool:
233 """Returns True when all response data had been read."""
234 return self.resp.content.at_eof()
236 async def next(
237 self,
238 ) -> Optional[Union["MultipartReader", "BodyPartReader"]]:
239 """Emits next multipart reader object."""
240 item = await self.stream.next()
241 if self.stream.at_eof():
242 await self.release()
243 return item
245 async def release(self) -> None:
246 """Release the connection gracefully.
248 All remaining content is read to the void.
249 """
250 await self.resp.release()
253class BodyPartReader:
254 """Multipart reader for single body part."""
256 chunk_size = 8192
258 def __init__(
259 self,
260 boundary: bytes,
261 headers: "CIMultiDictProxy[str]",
262 content: StreamReader,
263 *,
264 _newline: bytes = b"\r\n",
265 ) -> None:
266 self.headers = headers
267 self._boundary = boundary
268 self._newline = _newline
269 self._content = content
270 self._at_eof = False
271 length = self.headers.get(CONTENT_LENGTH, None)
272 self._length = int(length) if length is not None else None
273 self._read_bytes = 0
274 self._unread: Deque[bytes] = deque()
275 self._prev_chunk: Optional[bytes] = None
276 self._content_eof = 0
277 self._cache: Dict[str, Any] = {}
279 def __aiter__(self) -> AsyncIterator["BodyPartReader"]:
280 return self # type: ignore[return-value]
282 async def __anext__(self) -> bytes:
283 part = await self.next()
284 if part is None:
285 raise StopAsyncIteration
286 return part
288 async def next(self) -> Optional[bytes]:
289 item = await self.read()
290 if not item:
291 return None
292 return item
294 async def read(self, *, decode: bool = False) -> bytes:
295 """Reads body part data.
297 decode: Decodes data following by encoding
298 method from Content-Encoding header. If it missed
299 data remains untouched
300 """
301 if self._at_eof:
302 return b""
303 data = bytearray()
304 while not self._at_eof:
305 data.extend(await self.read_chunk(self.chunk_size))
306 if decode:
307 return self.decode(data)
308 return data
310 async def read_chunk(self, size: int = chunk_size) -> bytes:
311 """Reads body part content chunk of the specified size.
313 size: chunk size
314 """
315 if self._at_eof:
316 return b""
317 if self._length:
318 chunk = await self._read_chunk_from_length(size)
319 else:
320 chunk = await self._read_chunk_from_stream(size)
322 # For the case of base64 data, we must read a fragment of size with a
323 # remainder of 0 by dividing by 4 for string without symbols \n or \r
324 encoding = self.headers.get(CONTENT_TRANSFER_ENCODING)
325 if encoding and encoding.lower() == "base64":
326 stripped_chunk = b"".join(chunk.split())
327 remainder = len(stripped_chunk) % 4
329 while remainder != 0 and not self.at_eof():
330 over_chunk_size = 4 - remainder
331 over_chunk = b""
333 if self._prev_chunk:
334 over_chunk = self._prev_chunk[:over_chunk_size]
335 self._prev_chunk = self._prev_chunk[len(over_chunk) :]
337 if len(over_chunk) != over_chunk_size:
338 over_chunk += await self._content.read(4 - len(over_chunk))
340 if not over_chunk:
341 self._at_eof = True
343 stripped_chunk += b"".join(over_chunk.split())
344 chunk += over_chunk
345 remainder = len(stripped_chunk) % 4
347 self._read_bytes += len(chunk)
348 if self._read_bytes == self._length:
349 self._at_eof = True
350 if self._at_eof:
351 newline = await self._content.readline()
352 assert (
353 newline == self._newline
354 ), "reader did not read all the data or it is malformed"
355 return chunk
357 async def _read_chunk_from_length(self, size: int) -> bytes:
358 # Reads body part content chunk of the specified size.
359 # The body part must has Content-Length header with proper value.
360 assert self._length is not None, "Content-Length required for chunked read"
361 chunk_size = min(size, self._length - self._read_bytes)
362 chunk = await self._content.read(chunk_size)
363 return chunk
365 async def _read_chunk_from_stream(self, size: int) -> bytes:
366 # Reads content chunk of body part with unknown length.
367 # The Content-Length header for body part is not necessary.
368 assert (
369 size >= len(self._boundary) + 2
370 ), "Chunk size must be greater or equal than boundary length + 2"
371 first_chunk = self._prev_chunk is None
372 if first_chunk:
373 self._prev_chunk = await self._content.read(size)
375 chunk = await self._content.read(size)
376 self._content_eof += int(self._content.at_eof())
377 assert self._content_eof < 3, "Reading after EOF"
378 assert self._prev_chunk is not None
379 window = self._prev_chunk + chunk
381 intermeditate_boundary = self._newline + self._boundary
383 if first_chunk:
384 pos = 0
385 else:
386 pos = max(0, len(self._prev_chunk) - len(intermeditate_boundary))
388 idx = window.find(intermeditate_boundary, pos)
389 if idx >= 0:
390 # pushing boundary back to content
391 with warnings.catch_warnings():
392 warnings.filterwarnings("ignore", category=DeprecationWarning)
393 self._content.unread_data(window[idx:])
394 if size > idx:
395 self._prev_chunk = self._prev_chunk[:idx]
396 chunk = window[len(self._prev_chunk) : idx]
397 if not chunk:
398 self._at_eof = True
400 result = self._prev_chunk
401 self._prev_chunk = chunk
402 return result
404 async def readline(self) -> bytes:
405 """Reads body part by line by line."""
406 if self._at_eof:
407 return b""
409 if self._unread:
410 line = self._unread.popleft()
411 else:
412 line = await self._content.readline()
414 if line.startswith(self._boundary):
415 # the very last boundary may not come with \r\n,
416 # so set single rules for everyone
417 sline = line.rstrip(b"\r\n")
418 boundary = self._boundary
419 last_boundary = self._boundary + b"--"
420 # ensure that we read exactly the boundary, not something alike
421 if sline == boundary or sline == last_boundary:
422 self._at_eof = True
423 self._unread.append(line)
424 return b""
425 else:
426 next_line = await self._content.readline()
427 if next_line.startswith(self._boundary):
428 # strip newline but only once
429 line = line[: -len(self._newline)]
430 self._unread.append(next_line)
432 return line
434 async def release(self) -> None:
435 """Like read(), but reads all the data to the void."""
436 if self._at_eof:
437 return
438 while not self._at_eof:
439 await self.read_chunk(self.chunk_size)
441 async def text(self, *, encoding: Optional[str] = None) -> str:
442 """Like read(), but assumes that body part contains text data."""
443 data = await self.read(decode=True)
444 # see https://www.w3.org/TR/html5/forms.html#multipart/form-data-encoding-algorithm # NOQA
445 # and https://dvcs.w3.org/hg/xhr/raw-file/tip/Overview.html#dom-xmlhttprequest-send # NOQA
446 encoding = encoding or self.get_charset(default="utf-8")
447 return data.decode(encoding)
449 async def json(self, *, encoding: Optional[str] = None) -> Optional[Dict[str, Any]]:
450 """Like read(), but assumes that body parts contains JSON data."""
451 data = await self.read(decode=True)
452 if not data:
453 return None
454 encoding = encoding or self.get_charset(default="utf-8")
455 return cast(Dict[str, Any], json.loads(data.decode(encoding)))
457 async def form(self, *, encoding: Optional[str] = None) -> List[Tuple[str, str]]:
458 """Like read(), but assumes that body parts contain form urlencoded data."""
459 data = await self.read(decode=True)
460 if not data:
461 return []
462 if encoding is not None:
463 real_encoding = encoding
464 else:
465 real_encoding = self.get_charset(default="utf-8")
466 try:
467 decoded_data = data.rstrip().decode(real_encoding)
468 except UnicodeDecodeError:
469 raise ValueError("data cannot be decoded with %s encoding" % real_encoding)
471 return parse_qsl(
472 decoded_data,
473 keep_blank_values=True,
474 encoding=real_encoding,
475 )
477 def at_eof(self) -> bool:
478 """Returns True if the boundary was reached or False otherwise."""
479 return self._at_eof
481 def decode(self, data: bytes) -> bytes:
482 """Decodes data.
484 Decoding is done according the specified Content-Encoding
485 or Content-Transfer-Encoding headers value.
486 """
487 if CONTENT_TRANSFER_ENCODING in self.headers:
488 data = self._decode_content_transfer(data)
489 if CONTENT_ENCODING in self.headers:
490 return self._decode_content(data)
491 return data
493 def _decode_content(self, data: bytes) -> bytes:
494 encoding = self.headers.get(CONTENT_ENCODING, "").lower()
495 if encoding == "identity":
496 return data
497 if encoding in {"deflate", "gzip"}:
498 return ZLibDecompressor(
499 encoding=encoding,
500 suppress_deflate_header=True,
501 ).decompress_sync(data)
503 raise RuntimeError(f"unknown content encoding: {encoding}")
505 def _decode_content_transfer(self, data: bytes) -> bytes:
506 encoding = self.headers.get(CONTENT_TRANSFER_ENCODING, "").lower()
508 if encoding == "base64":
509 return base64.b64decode(data)
510 elif encoding == "quoted-printable":
511 return binascii.a2b_qp(data)
512 elif encoding in ("binary", "8bit", "7bit"):
513 return data
514 else:
515 raise RuntimeError(
516 "unknown content transfer encoding: {}" "".format(encoding)
517 )
519 def get_charset(self, default: str) -> str:
520 """Returns charset parameter from Content-Type header or default."""
521 ctype = self.headers.get(CONTENT_TYPE, "")
522 mimetype = parse_mimetype(ctype)
523 return mimetype.parameters.get("charset", default)
525 @reify
526 def name(self) -> Optional[str]:
527 """Returns name specified in Content-Disposition header.
529 If the header is missing or malformed, returns None.
530 """
531 _, params = parse_content_disposition(self.headers.get(CONTENT_DISPOSITION))
532 return content_disposition_filename(params, "name")
534 @reify
535 def filename(self) -> Optional[str]:
536 """Returns filename specified in Content-Disposition header.
538 Returns None if the header is missing or malformed.
539 """
540 _, params = parse_content_disposition(self.headers.get(CONTENT_DISPOSITION))
541 return content_disposition_filename(params, "filename")
544@payload_type(BodyPartReader, order=Order.try_first)
545class BodyPartReaderPayload(Payload):
546 def __init__(self, value: BodyPartReader, *args: Any, **kwargs: Any) -> None:
547 super().__init__(value, *args, **kwargs)
549 params: Dict[str, str] = {}
550 if value.name is not None:
551 params["name"] = value.name
552 if value.filename is not None:
553 params["filename"] = value.filename
555 if params:
556 self.set_content_disposition("attachment", True, **params)
558 async def write(self, writer: Any) -> None:
559 field = self._value
560 chunk = await field.read_chunk(size=2**16)
561 while chunk:
562 await writer.write(field.decode(chunk))
563 chunk = await field.read_chunk(size=2**16)
566class MultipartReader:
567 """Multipart body reader."""
569 #: Response wrapper, used when multipart readers constructs from response.
570 response_wrapper_cls = MultipartResponseWrapper
571 #: Multipart reader class, used to handle multipart/* body parts.
572 #: None points to type(self)
573 multipart_reader_cls = None
574 #: Body part reader class for non multipart/* content types.
575 part_reader_cls = BodyPartReader
577 def __init__(
578 self,
579 headers: Mapping[str, str],
580 content: StreamReader,
581 *,
582 _newline: bytes = b"\r\n",
583 ) -> None:
584 self.headers = headers
585 self._boundary = ("--" + self._get_boundary()).encode()
586 self._newline = _newline
587 self._content = content
588 self._last_part: Optional[Union["MultipartReader", BodyPartReader]] = None
589 self._at_eof = False
590 self._at_bof = True
591 self._unread: List[bytes] = []
593 def __aiter__(
594 self,
595 ) -> AsyncIterator["BodyPartReader"]:
596 return self # type: ignore[return-value]
598 async def __anext__(
599 self,
600 ) -> Optional[Union["MultipartReader", BodyPartReader]]:
601 part = await self.next()
602 if part is None:
603 raise StopAsyncIteration
604 return part
606 @classmethod
607 def from_response(
608 cls,
609 response: "ClientResponse",
610 ) -> MultipartResponseWrapper:
611 """Constructs reader instance from HTTP response.
613 :param response: :class:`~aiohttp.client.ClientResponse` instance
614 """
615 obj = cls.response_wrapper_cls(
616 response, cls(response.headers, response.content)
617 )
618 return obj
620 def at_eof(self) -> bool:
621 """Returns True if the final boundary was reached, false otherwise."""
622 return self._at_eof
624 async def next(
625 self,
626 ) -> Optional[Union["MultipartReader", BodyPartReader]]:
627 """Emits the next multipart body part."""
628 # So, if we're at BOF, we need to skip till the boundary.
629 if self._at_eof:
630 return None
631 await self._maybe_release_last_part()
632 if self._at_bof:
633 await self._read_until_first_boundary()
634 self._at_bof = False
635 else:
636 await self._read_boundary()
637 if self._at_eof: # we just read the last boundary, nothing to do there
638 return None
639 self._last_part = await self.fetch_next_part()
640 return self._last_part
642 async def release(self) -> None:
643 """Reads all the body parts to the void till the final boundary."""
644 while not self._at_eof:
645 item = await self.next()
646 if item is None:
647 break
648 await item.release()
650 async def fetch_next_part(
651 self,
652 ) -> Union["MultipartReader", BodyPartReader]:
653 """Returns the next body part reader."""
654 headers = await self._read_headers()
655 return self._get_part_reader(headers)
657 def _get_part_reader(
658 self,
659 headers: "CIMultiDictProxy[str]",
660 ) -> Union["MultipartReader", BodyPartReader]:
661 """Dispatches the response by the `Content-Type` header.
663 Returns a suitable reader instance.
665 :param dict headers: Response headers
666 """
667 ctype = headers.get(CONTENT_TYPE, "")
668 mimetype = parse_mimetype(ctype)
670 if mimetype.type == "multipart":
671 if self.multipart_reader_cls is None:
672 return type(self)(headers, self._content)
673 return self.multipart_reader_cls(
674 headers, self._content, _newline=self._newline
675 )
676 else:
677 return self.part_reader_cls(
678 self._boundary, headers, self._content, _newline=self._newline
679 )
681 def _get_boundary(self) -> str:
682 mimetype = parse_mimetype(self.headers[CONTENT_TYPE])
684 assert mimetype.type == "multipart", "multipart/* content type expected"
686 if "boundary" not in mimetype.parameters:
687 raise ValueError(
688 "boundary missed for Content-Type: %s" % self.headers[CONTENT_TYPE]
689 )
691 boundary = mimetype.parameters["boundary"]
692 if len(boundary) > 70:
693 raise ValueError("boundary %r is too long (70 chars max)" % boundary)
695 return boundary
697 async def _readline(self) -> bytes:
698 if self._unread:
699 return self._unread.pop()
700 return await self._content.readline()
702 async def _read_until_first_boundary(self) -> None:
703 while True:
704 chunk = await self._readline()
705 if chunk == b"":
706 raise ValueError(
707 "Could not find starting boundary %r" % (self._boundary)
708 )
709 newline = None
710 end_boundary = self._boundary + b"--"
711 if chunk.startswith(end_boundary):
712 _, newline = chunk.split(end_boundary, 1)
713 elif chunk.startswith(self._boundary):
714 _, newline = chunk.split(self._boundary, 1)
715 if newline is not None:
716 assert newline in (b"\r\n", b"\n"), (newline, chunk, self._boundary)
717 self._newline = newline
719 chunk = chunk.rstrip()
720 if chunk == self._boundary:
721 return
722 elif chunk == end_boundary:
723 self._at_eof = True
724 return
726 async def _read_boundary(self) -> None:
727 chunk = (await self._readline()).rstrip()
728 if chunk == self._boundary:
729 pass
730 elif chunk == self._boundary + b"--":
731 self._at_eof = True
732 epilogue = await self._readline()
733 next_line = await self._readline()
735 # the epilogue is expected and then either the end of input or the
736 # parent multipart boundary, if the parent boundary is found then
737 # it should be marked as unread and handed to the parent for
738 # processing
739 if next_line[:2] == b"--":
740 self._unread.append(next_line)
741 # otherwise the request is likely missing an epilogue and both
742 # lines should be passed to the parent for processing
743 # (this handles the old behavior gracefully)
744 else:
745 self._unread.extend([next_line, epilogue])
746 else:
747 raise ValueError(f"Invalid boundary {chunk!r}, expected {self._boundary!r}")
749 async def _read_headers(self) -> "CIMultiDictProxy[str]":
750 lines = [b""]
751 while True:
752 chunk = await self._content.readline()
753 chunk = chunk.strip()
754 lines.append(chunk)
755 if not chunk:
756 break
757 parser = HeadersParser()
758 headers, raw_headers = parser.parse_headers(lines)
759 return headers
761 async def _maybe_release_last_part(self) -> None:
762 """Ensures that the last read body part is read completely."""
763 if self._last_part is not None:
764 if not self._last_part.at_eof():
765 await self._last_part.release()
766 self._unread.extend(self._last_part._unread)
767 self._last_part = None
770_Part = Tuple[Payload, str, str]
773class MultipartWriter(Payload):
774 """Multipart body writer."""
776 def __init__(self, subtype: str = "mixed", boundary: Optional[str] = None) -> None:
777 boundary = boundary if boundary is not None else uuid.uuid4().hex
778 # The underlying Payload API demands a str (utf-8), not bytes,
779 # so we need to ensure we don't lose anything during conversion.
780 # As a result, require the boundary to be ASCII only.
781 # In both situations.
783 try:
784 self._boundary = boundary.encode("ascii")
785 except UnicodeEncodeError:
786 raise ValueError("boundary should contain ASCII only chars") from None
788 if len(boundary) > 70:
789 raise ValueError("boundary %r is too long (70 chars max)" % boundary)
791 ctype = f"multipart/{subtype}; boundary={self._boundary_value}"
793 super().__init__(None, content_type=ctype)
795 self._parts: List[_Part] = []
797 def __enter__(self) -> "MultipartWriter":
798 return self
800 def __exit__(
801 self,
802 exc_type: Optional[Type[BaseException]],
803 exc_val: Optional[BaseException],
804 exc_tb: Optional[TracebackType],
805 ) -> None:
806 pass
808 def __iter__(self) -> Iterator[_Part]:
809 return iter(self._parts)
811 def __len__(self) -> int:
812 return len(self._parts)
814 def __bool__(self) -> bool:
815 return True
817 _valid_tchar_regex = re.compile(rb"\A[!#$%&'*+\-.^_`|~\w]+\Z")
818 _invalid_qdtext_char_regex = re.compile(rb"[\x00-\x08\x0A-\x1F\x7F]")
820 @property
821 def _boundary_value(self) -> str:
822 """Wrap boundary parameter value in quotes, if necessary.
824 Reads self.boundary and returns a unicode string.
825 """
826 # Refer to RFCs 7231, 7230, 5234.
827 #
828 # parameter = token "=" ( token / quoted-string )
829 # token = 1*tchar
830 # quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
831 # qdtext = HTAB / SP / %x21 / %x23-5B / %x5D-7E / obs-text
832 # obs-text = %x80-FF
833 # quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
834 # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
835 # / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
836 # / DIGIT / ALPHA
837 # ; any VCHAR, except delimiters
838 # VCHAR = %x21-7E
839 value = self._boundary
840 if re.match(self._valid_tchar_regex, value):
841 return value.decode("ascii") # cannot fail
843 if re.search(self._invalid_qdtext_char_regex, value):
844 raise ValueError("boundary value contains invalid characters")
846 # escape %x5C and %x22
847 quoted_value_content = value.replace(b"\\", b"\\\\")
848 quoted_value_content = quoted_value_content.replace(b'"', b'\\"')
850 return '"' + quoted_value_content.decode("ascii") + '"'
852 @property
853 def boundary(self) -> str:
854 return self._boundary.decode("ascii")
856 def append(self, obj: Any, headers: Optional[MultiMapping[str]] = None) -> Payload:
857 if headers is None:
858 headers = CIMultiDict()
860 if isinstance(obj, Payload):
861 obj.headers.update(headers)
862 return self.append_payload(obj)
863 else:
864 try:
865 payload = get_payload(obj, headers=headers)
866 except LookupError:
867 raise TypeError("Cannot create payload from %r" % obj)
868 else:
869 return self.append_payload(payload)
871 def append_payload(self, payload: Payload) -> Payload:
872 """Adds a new body part to multipart writer."""
873 # compression
874 encoding: Optional[str] = payload.headers.get(
875 CONTENT_ENCODING,
876 "",
877 ).lower()
878 if encoding and encoding not in ("deflate", "gzip", "identity"):
879 raise RuntimeError(f"unknown content encoding: {encoding}")
880 if encoding == "identity":
881 encoding = None
883 # te encoding
884 te_encoding: Optional[str] = payload.headers.get(
885 CONTENT_TRANSFER_ENCODING,
886 "",
887 ).lower()
888 if te_encoding not in ("", "base64", "quoted-printable", "binary"):
889 raise RuntimeError(
890 "unknown content transfer encoding: {}" "".format(te_encoding)
891 )
892 if te_encoding == "binary":
893 te_encoding = None
895 # size
896 size = payload.size
897 if size is not None and not (encoding or te_encoding):
898 payload.headers[CONTENT_LENGTH] = str(size)
900 self._parts.append((payload, encoding, te_encoding)) # type: ignore[arg-type]
901 return payload
903 def append_json(
904 self, obj: Any, headers: Optional[MultiMapping[str]] = None
905 ) -> Payload:
906 """Helper to append JSON part."""
907 if headers is None:
908 headers = CIMultiDict()
910 return self.append_payload(JsonPayload(obj, headers=headers))
912 def append_form(
913 self,
914 obj: Union[Sequence[Tuple[str, str]], Mapping[str, str]],
915 headers: Optional[MultiMapping[str]] = None,
916 ) -> Payload:
917 """Helper to append form urlencoded part."""
918 assert isinstance(obj, (Sequence, Mapping))
920 if headers is None:
921 headers = CIMultiDict()
923 if isinstance(obj, Mapping):
924 obj = list(obj.items())
925 data = urlencode(obj, doseq=True)
927 return self.append_payload(
928 StringPayload(
929 data, headers=headers, content_type="application/x-www-form-urlencoded"
930 )
931 )
933 @property
934 def size(self) -> Optional[int]:
935 """Size of the payload."""
936 total = 0
937 for part, encoding, te_encoding in self._parts:
938 if encoding or te_encoding or part.size is None:
939 return None
941 total += int(
942 2
943 + len(self._boundary)
944 + 2
945 + part.size # b'--'+self._boundary+b'\r\n'
946 + len(part._binary_headers)
947 + 2 # b'\r\n'
948 )
950 total += 2 + len(self._boundary) + 4 # b'--'+self._boundary+b'--\r\n'
951 return total
953 async def write(self, writer: Any, close_boundary: bool = True) -> None:
954 """Write body."""
955 for part, encoding, te_encoding in self._parts:
956 await writer.write(b"--" + self._boundary + b"\r\n")
957 await writer.write(part._binary_headers)
959 if encoding or te_encoding:
960 w = MultipartPayloadWriter(writer)
961 if encoding:
962 w.enable_compression(encoding)
963 if te_encoding:
964 w.enable_encoding(te_encoding)
965 await part.write(w) # type: ignore[arg-type]
966 await w.write_eof()
967 else:
968 await part.write(writer)
970 await writer.write(b"\r\n")
972 if close_boundary:
973 await writer.write(b"--" + self._boundary + b"--\r\n")
976class MultipartPayloadWriter:
977 def __init__(self, writer: Any) -> None:
978 self._writer = writer
979 self._encoding: Optional[str] = None
980 self._compress: Optional[ZLibCompressor] = None
981 self._encoding_buffer: Optional[bytearray] = None
983 def enable_encoding(self, encoding: str) -> None:
984 if encoding == "base64":
985 self._encoding = encoding
986 self._encoding_buffer = bytearray()
987 elif encoding == "quoted-printable":
988 self._encoding = "quoted-printable"
990 def enable_compression(
991 self, encoding: str = "deflate", strategy: int = zlib.Z_DEFAULT_STRATEGY
992 ) -> None:
993 self._compress = ZLibCompressor(
994 encoding=encoding,
995 suppress_deflate_header=True,
996 strategy=strategy,
997 )
999 async def write_eof(self) -> None:
1000 if self._compress is not None:
1001 chunk = self._compress.flush()
1002 if chunk:
1003 self._compress = None
1004 await self.write(chunk)
1006 if self._encoding == "base64":
1007 if self._encoding_buffer:
1008 await self._writer.write(base64.b64encode(self._encoding_buffer))
1010 async def write(self, chunk: bytes) -> None:
1011 if self._compress is not None:
1012 if chunk:
1013 chunk = await self._compress.compress(chunk)
1014 if not chunk:
1015 return
1017 if self._encoding == "base64":
1018 buf = self._encoding_buffer
1019 assert buf is not None
1020 buf.extend(chunk)
1022 if buf:
1023 div, mod = divmod(len(buf), 3)
1024 enc_chunk, self._encoding_buffer = (buf[: div * 3], buf[div * 3 :])
1025 if enc_chunk:
1026 b64chunk = base64.b64encode(enc_chunk)
1027 await self._writer.write(b64chunk)
1028 elif self._encoding == "quoted-printable":
1029 await self._writer.write(binascii.b2a_qp(chunk))
1030 else:
1031 await self._writer.write(chunk)