Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/python

1from __future__ import annotations

3import logging

4import os

5import shutil

6import sys

7import tempfile

8from enum import IntEnum

9from io import BufferedRandom, BytesIO

10from numbers import Number

11from typing import TYPE_CHECKING, cast

13from .decoders import Base64Decoder, QuotedPrintableDecoder

14from .exceptions import FileError, FormParserError, MultipartParseError, QuerystringParseError

16if TYPE_CHECKING:

17 from collections.abc import Callable

18 from typing import Any, Literal, Protocol, TypeAlias, TypedDict

20 class SupportsRead(Protocol):

21 def read(self, __n: int) -> bytes: ...

23 class QuerystringCallbacks(TypedDict, total=False):

24 on_field_start: Callable[[], None]

25 on_field_name: Callable[[bytes, int, int], None]

26 on_field_data: Callable[[bytes, int, int], None]

27 on_field_end: Callable[[], None]

28 on_end: Callable[[], None]

30 class OctetStreamCallbacks(TypedDict, total=False):

31 on_start: Callable[[], None]

32 on_data: Callable[[bytes, int, int], None]

33 on_end: Callable[[], None]

35 class MultipartCallbacks(TypedDict, total=False):

36 on_part_begin: Callable[[], None]

37 on_part_data: Callable[[bytes, int, int], None]

38 on_part_end: Callable[[], None]

39 on_header_begin: Callable[[], None]

40 on_header_field: Callable[[bytes, int, int], None]

41 on_header_value: Callable[[bytes, int, int], None]

42 on_header_end: Callable[[], None]

43 on_headers_finished: Callable[[], None]

44 on_end: Callable[[], None]

46 class FileConfig(TypedDict, total=False):

47 UPLOAD_DIR: str | bytes | None

48 UPLOAD_DELETE_TMP: bool

49 UPLOAD_KEEP_FILENAME: bool

50 UPLOAD_KEEP_EXTENSIONS: bool

51 MAX_MEMORY_FILE_SIZE: int

53 class FormParserConfig(FileConfig):

54 UPLOAD_ERROR_ON_BAD_CTE: bool

55 MAX_BODY_SIZE: float

56 MAX_HEADER_COUNT: int

57 MAX_HEADER_SIZE: int

59 CallbackName: TypeAlias = Literal[

60 "start",

61 "data",

62 "end",

63 "field_start",

64 "field_name",

65 "field_data",

66 "field_end",

67 "part_begin",

68 "part_data",

69 "part_end",

70 "header_begin",

71 "header_field",

72 "header_value",

73 "header_end",

74 "headers_finished",

75 ]

77# Unique missing object.

78_missing = object()

81class QuerystringState(IntEnum):

82 """Querystring parser states.

84 These are used to keep track of the state of the parser, and are used to determine

85 what to do when new data is encountered.

86 """

88 BEFORE_FIELD = 0

89 FIELD_NAME = 1

90 FIELD_DATA = 2

93class MultipartState(IntEnum):

94 """Multipart parser states.

96 These are used to keep track of the state of the parser, and are used to determine

97 what to do when new data is encountered.

98 """

100 START = 0

101 START_BOUNDARY = 1

102 HEADER_FIELD_START = 2

103 HEADER_FIELD = 3

104 HEADER_VALUE_START = 4

105 HEADER_VALUE = 5

106 HEADER_VALUE_ALMOST_DONE = 6

107 HEADERS_ALMOST_DONE = 7

108 PART_DATA_START = 8

109 PART_DATA = 9

110 PART_DATA_END = 10

111 END_BOUNDARY = 11

112 END = 12

113

114

115# Flags for the multipart parser.

116FLAG_PART_BOUNDARY = 1

117FLAG_LAST_BOUNDARY = 2

118

119# Get constants. Since iterating over a str on Python 2 gives you a 1-length

120# string, but iterating over a bytes object on Python 3 gives you an integer,

121# we need to save these constants.

122CR = b"\r"[0]

123LF = b"\n"[0]

124COLON = b":"[0]

125SPACE = b" "[0]

126HYPHEN = b"-"[0]

127AMPERSAND = b"&"[0]

128LOWER_A = b"a"[0]

129LOWER_Z = b"z"[0]

130NULL = b"\x00"[0]

131

132# fmt: off

133# Mask for ASCII characters that can be http tokens.

134# Per RFC7230 - 3.2.6, this is all alpha-numeric characters

135# and these: !#$%&'*+-.^_`|~

136TOKEN_CHARS = (

137 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"

138 b"abcdefghijklmnopqrstuvwxyz"

139 b"0123456789"

140 b"!#$%&'*+-.^_`|~")

141TOKEN_CHARS_SET = frozenset(TOKEN_CHARS)

142# fmt: on

143

144DEFAULT_MAX_HEADER_COUNT = 8

145"""Default maximum number of headers allowed per multipart part."""

146

147DEFAULT_MAX_HEADER_SIZE = 4096 + 128

148"""Default maximum size of a single multipart header line, including syntax overhead."""

149

150MAX_BOUNDARY_LENGTH = 256

151"""Maximum allowed length of a multipart boundary.

152

153[RFC 2046 §5.1.1](https://datatracker.ietf.org/doc/html/rfc2046#section-5.1.1)

154recommends boundaries be at most 70 bytes. 256 bytes is generous headroom over

155every HTTP client.

156"""

157

158

159def _parseparam(s: str) -> list[str]:

160 # Vendored from the standard library's

161 # [`email.message._parseparam`](https://github.com/python/cpython/blob/v3.14.2/Lib/email/message.py#L73-L96)

162 # to split a header into its `;`-separated parts without treating a `;` inside a double-quoted string as a

163 # separator - and without the RFC 2231 decoding that `email.message.Message.get_params` would apply on top.

164 s = ";" + s

165 plist: list[str] = []

166 start = 0

167 while s.find(";", start) == start:

168 start += 1

169 end = s.find(";", start)

170 ind, diff = start, 0

171 while end > 0:

172 diff += s.count('"', ind, end) - s.count('\\"', ind, end)

173 if diff % 2 == 0:

174 break

175 end, ind = ind, s.find(";", end + 1)

176 if end < 0:

177 end = len(s)

178 i = s.find("=", start, end)

179 if i == -1:

180 f = s[start:end]

181 else:

182 f = s[start:i].rstrip().lower() + "=" + s[i + 1 : end].lstrip()

183 plist.append(f.strip())

184 start = end

185 return plist

186

187

188def parse_options_header(value: str | bytes | None) -> tuple[bytes, dict[bytes, bytes]]:

189 """Parses a Content-Type header into a value in the following format: (content_type, {parameters})."""

190 if not value:

191 return (b"", {})

192

193 # If we are passed bytes, we assume that it conforms to WSGI, encoding in latin-1.

194 if isinstance(value, bytes): # pragma: no cover

195 value = value.decode("latin-1")

196

197 # For types

198 assert isinstance(value, str), "Value should be a string by now"

199

200 # If we have no options, return the string as-is.

201 if ";" not in value:

202 return (value.lower().strip().encode("latin-1"), {})

203

204 ctype, *segments = _parseparam(value)

205 options: dict[bytes, bytes] = {}

206 for segment in segments:

207 key, _, val = segment.partition("=")

208 # [RFC 7578 §4.2](https://datatracker.ietf.org/doc/html/rfc7578#section-4.2)

209 # forbids the RFC 5987/2231 extended syntax (`key*=`, `key*0`, ...) in

210 # multipart/form-data, so we ignore those parameters and keep the plain

211 # `key` authoritative.

212 if "*" in key:

213 continue

214 if len(val) >= 2 and val[0] == '"' and val[-1] == '"':

215 val = val[1:-1].replace("\\\\", "\\").replace('\\"', '"')

216 # Work around an IE6 bug where the full file path is sent instead of

217 # just the filename.

218 if key == "filename" and (val[1:3] == ":\\" or val[:2] == "\\\\"):

219 val = val.split("\\")[-1]

220 options[key.encode("latin-1")] = val.encode("latin-1")

221 return ctype.encode("latin-1"), options

222

223

224class Field:

225 """A Field object represents a (parsed) form field. It represents a single

226 field with a corresponding name and value.

227

228 The name that a :class:`Field` will be instantiated with is the same name

229 that would be found in the following HTML::

230

231 <input name="name_goes_here" type="text"/>

232

233 This class defines two methods, :meth:`on_data` and :meth:`on_end`, that

234 will be called when data is written to the Field, and when the Field is

235 finalized, respectively.

236

237 Args:

238 name: The name of the form field.

239 content_type: The value of the Content-Type header for this field.

240 """

241

242 def __init__(self, name: bytes | None, *, content_type: str | None = None) -> None:

243 self._name = name

244 self._value: list[bytes] = []

245 self._content_type = content_type

246

247 # We cache the joined version of _value for speed.

248 self._cache = _missing

249

250 @classmethod

251 def from_value(cls, name: bytes, value: bytes | None) -> Field:

252 """Create an instance of a :class:`Field`, and set the corresponding

253 value - either None or an actual value. This method will also

254 finalize the Field itself.

255

256 Args:

257 name: the name of the form field.

258 value: the value of the form field - either a bytestring or None.

259

260 Returns:

261 A new instance of a [`Field`][python_multipart.Field].

262 """

263

264 f = cls(name)

265 if value is None:

266 f.set_none()

267 else:

268 f.write(value)

269 f.finalize()

270 return f

271

272 def write(self, data: bytes) -> int:

273 """Write some data into the form field.

274

275 Args:

276 data: The data to write to the field.

277

278 Returns:

279 The number of bytes written.

280 """

281 return self.on_data(data)

282

283 def on_data(self, data: bytes) -> int:

284 """This method is a callback that will be called whenever data is

285 written to the Field.

286

287 Args:

288 data: The data to write to the field.

289

290 Returns:

291 The number of bytes written.

292 """

293 self._value.append(data)

294 self._cache = _missing

295 return len(data)

296

297 def on_end(self) -> None:

298 """This method is called whenever the Field is finalized."""

299 if self._cache is _missing:

300 self._cache = b"".join(self._value)

301

302 def finalize(self) -> None:

303 """Finalize the form field."""

304 self.on_end()

305

306 def close(self) -> None:

307 """Close the Field object. This will free any underlying cache."""

308 # Free our value array.

309 if self._cache is _missing:

310 self._cache = b"".join(self._value)

311

312 del self._value

313

314 def set_none(self) -> None:

315 """Some fields in a querystring can possibly have a value of None - for

316 example, the string "foo&bar=&baz=asdf" will have a field with the

317 name "foo" and value None, one with name "bar" and value "", and one

318 with name "baz" and value "asdf". Since the write() interface doesn't

319 support writing None, this function will set the field value to None.

320 """

321 self._cache = None

322

323 @property

324 def field_name(self) -> bytes | None:

325 """This property returns the name of the field."""

326 return self._name

327

328 @property

329 def value(self) -> bytes | None:

330 """This property returns the value of the form field."""

331 if self._cache is _missing:

332 self._cache = b"".join(self._value)

333

334 assert isinstance(self._cache, bytes) or self._cache is None

335 return self._cache

336

337 @property

338 def content_type(self) -> str | None:

339 """This property returns the content_type value of the field."""

340 return self._content_type

341

342 def __eq__(self, other: object) -> bool:

343 if isinstance(other, Field):

344 return self.field_name == other.field_name and self.value == other.value

345 else:

346 return NotImplemented

347

348 def __repr__(self) -> str:

349 if self.value is not None and len(self.value) > 97:

350 # We get the repr, and then insert three dots before the final

351 # quote.

352 v = repr(self.value[:97])[:-1] + "...'"

353 else:

354 v = repr(self.value)

355

356 return f"{self.__class__.__name__}(field_name={self.field_name!r}, value={v})"

357

358

359class File:

360 """This class represents an uploaded file. It handles writing file data to

361 either an in-memory file or a temporary file on-disk, if the optional

362 threshold is passed.

363

364 There are some options that can be passed to the File to change behavior

365 of the class. Valid options are as follows:

366

368 |-----------------------|-------|---------|-------------|

371 | UPLOAD_KEEP_FILENAME | `bool`| False | Whether or not to keep the filename of the uploaded file. If True, then the filename will be converted to a safe representation (e.g. by removing any invalid path segments), and then saved with the same name). Otherwise, a temporary name will be used. |

372 | UPLOAD_KEEP_EXTENSIONS| `bool`| False | Whether or not to keep the uploaded file's extension. If False, the file will be saved with the default temporary extension (usually ".tmp"). Otherwise, the file's extension will be maintained. Note that this will properly combine with the UPLOAD_KEEP_FILENAME setting. |

373 | MAX_MEMORY_FILE_SIZE | `int` | 1 MiB | The maximum number of bytes of a File to keep in memory. By default, the contents of a File are kept into memory until a certain limit is reached, after which the contents of the File are written to a temporary file. This behavior can be disabled by setting this value to an appropriately large value (or, for example, infinity, such as `float('inf')`. |

374

375 Args:

376 file_name: The name of the file that this [`File`][python_multipart.File] represents.

377 field_name: The name of the form field that this file was uploaded with. This can be None, if, for example,

378 the file was uploaded with Content-Type application/octet-stream.

379 config: The configuration for this File. See above for valid configuration keys and their corresponding values.

380 content_type: The value of the Content-Type header.

381 """ # noqa: E501

382

383 def __init__(

384 self,

385 file_name: bytes | None,

386 field_name: bytes | None = None,

387 config: FileConfig = {},

388 *,

389 content_type: str | None = None,

390 ) -> None:

391 # Save configuration, set other variables default.

392 self.logger = logging.getLogger(__name__)

393 self._config = config

394 self._in_memory = True

395 self._bytes_written = 0

396 self._fileobj: BytesIO | BufferedRandom = BytesIO()

397

398 # Save the provided field/file name and content type.

399 self._field_name = field_name

400 self._file_name = file_name

401 self._content_type = content_type

402

403 # Our actual file name is None by default, since, depending on our

404 # config, we may not actually use the provided name.

405 self._actual_file_name: bytes | None = None

406

407 # Split the extension from the filename.

408 if file_name is not None:

409 # Extract just the basename to avoid directory traversal

410 basename = os.path.basename(file_name)

411 base, ext = os.path.splitext(basename)

412 self._file_base = base

413 self._ext = ext

414

415 @property

416 def field_name(self) -> bytes | None:

417 """The form field associated with this file. May be None if there isn't

418 one, for example when we have an application/octet-stream upload.

419 """

420 return self._field_name

421

422 @property

423 def file_name(self) -> bytes | None:

424 """The file name given in the upload request."""

425 return self._file_name

426

427 @property

428 def actual_file_name(self) -> bytes | None:

429 """The file name that this file is saved as. Will be None if it's not

430 currently saved on disk.

431 """

432 return self._actual_file_name

433

434 @property

435 def file_object(self) -> BytesIO | BufferedRandom:

436 """The file object that we're currently writing to. Note that this

437 will either be an instance of a :class:`io.BytesIO`, or a regular file

438 object.

439 """

440 return self._fileobj

441

442 @property

443 def size(self) -> int:

444 """The total size of this file, counted as the number of bytes that

445 currently have been written to the file.

446 """

447 return self._bytes_written

448

449 @property

450 def in_memory(self) -> bool:

451 """A boolean representing whether or not this file object is currently

452 stored in-memory or on-disk.

453 """

454 return self._in_memory

455

456 @property

457 def content_type(self) -> str | None:

458 """The Content-Type value for this part, if it was set."""

459 return self._content_type

460

461 def flush_to_disk(self) -> None:

462 """If the file is already on-disk, do nothing. Otherwise, copy from

463 the in-memory buffer to a disk file, and then reassign our internal

464 file object to this new disk file.

465

466 Note that if you attempt to flush a file that is already on-disk, a

467 warning will be logged to this module's logger.

468 """

469 if not self._in_memory:

470 self.logger.warning("Trying to flush to disk when we're not in memory")

471 return

472

473 # Go back to the start of our file.

474 self._fileobj.seek(0)

475

476 # Open a new file.

477 new_file = self._get_disk_file()

478

479 # Copy the file objects.

480 shutil.copyfileobj(self._fileobj, new_file)

481

482 # Seek to the new position in our new file.

483 new_file.seek(self._bytes_written)

484

485 # Reassign the fileobject.

486 old_fileobj = self._fileobj

487 self._fileobj = new_file

488

489 # We're no longer in memory.

490 self._in_memory = False

491

492 # Close the old file object.

493 old_fileobj.close()

494

495 def _get_disk_file(self) -> BufferedRandom:

496 """This function is responsible for getting a file object on-disk for us."""

497 self.logger.info("Opening a file on disk")

498

499 file_dir = self._config.get("UPLOAD_DIR")

500 keep_filename = self._config.get("UPLOAD_KEEP_FILENAME", False)

501 keep_extensions = self._config.get("UPLOAD_KEEP_EXTENSIONS", False)

502 delete_tmp = self._config.get("UPLOAD_DELETE_TMP", True)

503 tmp_file: None | BufferedRandom = None

504

505 # If we have a directory and are to keep the filename...

506 if file_dir is not None and keep_filename:

507 self.logger.info("Saving with filename in: %r", file_dir)

508

509 # Build our filename.

510 # TODO: what happens if we don't have a filename?

511 fname = self._file_base + self._ext if keep_extensions else self._file_base

512

513 path = os.path.join(file_dir, fname) # type: ignore[arg-type]

514 try:

515 self.logger.info("Opening file: %r", path)

516 tmp_file = open(path, "w+b")

517 except OSError:

518 tmp_file = None

519

520 self.logger.exception("Error opening temporary file")

521 raise FileError("Error opening temporary file: %r" % path)

522 else:

523 # Build options array.

524 # Note that on Python 3, tempfile doesn't support byte names. We

525 # encode our paths using the default filesystem encoding.

526 suffix = self._ext.decode(sys.getfilesystemencoding()) if keep_extensions else None

527

528 if file_dir is None:

529 dir = None

530 elif isinstance(file_dir, bytes):

531 dir = file_dir.decode(sys.getfilesystemencoding())

532 else:

533 dir = file_dir # pragma: no cover

534

535 # Create a temporary (named) file with the appropriate settings.

536 self.logger.info(

537 "Creating a temporary file with options: %r", {"suffix": suffix, "delete": delete_tmp, "dir": dir}

538 )

539 try:

540 tmp_file = cast(BufferedRandom, tempfile.NamedTemporaryFile(suffix=suffix, delete=delete_tmp, dir=dir))

541 except OSError:

542 self.logger.exception("Error creating named temporary file")

543 raise FileError("Error creating named temporary file")

544

545 assert tmp_file is not None

546 # Encode filename as bytes.

547 if isinstance(tmp_file.name, str):

548 fname = tmp_file.name.encode(sys.getfilesystemencoding())

549 else:

550 fname = cast(bytes, tmp_file.name) # pragma: no cover

551

552 self._actual_file_name = fname

553 return tmp_file

554

555 def write(self, data: bytes) -> int:

556 """Write some data to the File.

557

558 :param data: a bytestring

559 """

560 return self.on_data(data)

561

562 def on_data(self, data: bytes) -> int:

563 """This method is a callback that will be called whenever data is

564 written to the File.

565

566 Args:

567 data: The data to write to the file.

568

569 Returns:

570 The number of bytes written.

571 """

572 bwritten = self._fileobj.write(data)

573

574 # If the bytes written isn't the same as the length, just return.

575 if bwritten != len(data):

576 self.logger.warning("bwritten != len(data) (%d != %d)", bwritten, len(data))

577 return bwritten

578

579 # Keep track of how many bytes we've written.

580 self._bytes_written += bwritten

581

582 # If we're in-memory and are over our limit, we create a file.

583 max_memory_file_size = self._config.get("MAX_MEMORY_FILE_SIZE")

584 if self._in_memory and max_memory_file_size is not None and (self._bytes_written > max_memory_file_size):

585 self.logger.info("Flushing to disk")

586 self.flush_to_disk()

587

588 # Return the number of bytes written.

589 return bwritten

590

591 def on_end(self) -> None:

592 """This method is called whenever the Field is finalized."""

593 # Flush the underlying file object

594 self._fileobj.flush()

595

596 def finalize(self) -> None:

597 """Finalize the form file. This will not close the underlying file,

598 but simply signal that we are finished writing to the File.

599 """

600 self.on_end()

601

602 def close(self) -> None:

603 """Close the File object. This will actually close the underlying

604 file object (whether it's a :class:`io.BytesIO` or an actual file

605 object).

606 """

607 self._fileobj.close()

608

609 def __repr__(self) -> str:

610 return f"{self.__class__.__name__}(file_name={self.file_name!r}, field_name={self.field_name!r})"

611

612

613class BaseParser:

614 """This class is the base class for all parsers. It contains the logic for

615 calling and adding callbacks.

616

617 A callback can be one of two different forms. "Notification callbacks" are

618 callbacks that are called when something happens - for example, when a new

619 part of a multipart message is encountered by the parser. "Data callbacks"

620 are called when we get some sort of data - for example, part of the body of

621 a multipart chunk. Notification callbacks are called with no parameters,

622 whereas data callbacks are called with three, as follows::

623

624 data_callback(data, start, end)

625

626 The "data" parameter is a bytestring (i.e. "foo" on Python 2, or b"foo" on

627 Python 3). "start" and "end" are integer indexes into the "data" string

628 that represent the data of interest. Thus, in a data callback, the slice

629 `data[start:end]` represents the data that the callback is "interested in".

630 The callback is not passed a copy of the data, since copying severely hurts

631 performance.

632 """

633

634 def __init__(self) -> None:

635 self.logger = logging.getLogger(__name__)

636 self.callbacks: QuerystringCallbacks | OctetStreamCallbacks | MultipartCallbacks = {}

637

638 def callback(

639 self, name: CallbackName, data: bytes | None = None, start: int | None = None, end: int | None = None

640 ) -> None:

641 """This function calls a provided callback with some data. If the

642 callback is not set, will do nothing.

643

644 Args:

645 name: The name of the callback to call (as a string).

646 data: Data to pass to the callback. If None, then it is assumed that the callback is a notification

647 callback, and no parameters are given.

648 end: An integer that is passed to the data callback.

649 start: An integer that is passed to the data callback.

650 """

651 func = self.callbacks.get("on_" + name)

652 if func is None:

653 return

654 func = cast("Callable[..., Any]", func)

655 # Depending on whether we're given a buffer...

656 if data is not None:

657 # Don't do anything if we have start == end.

658 if start is not None and start == end:

659 return

660 func(data, start, end)

661 else:

662 func()

663

664 def set_callback(self, name: CallbackName, new_func: Callable[..., Any] | None) -> None:

665 """Update the function for a callback. Removes from the callbacks dict

666 if new_func is None.

667

668 :param name: The name of the callback to call (as a string).

669

670 :param new_func: The new function for the callback. If None, then the

671 callback will be removed (with no error if it does not

672 exist).

673 """

674 if new_func is None:

675 self.callbacks.pop("on_" + name, None) # type: ignore[misc]

676 else:

677 self.callbacks["on_" + name] = new_func # type: ignore[literal-required]

678

679 def close(self) -> None:

680 pass # pragma: no cover

681

682 def finalize(self) -> None:

683 pass # pragma: no cover

684

685 def __repr__(self) -> str:

686 return "%s()" % self.__class__.__name__

687

688

689class OctetStreamParser(BaseParser):

690 """This parser parses an octet-stream request body and calls callbacks when

691 incoming data is received. Callbacks are as follows:

692

693 | Callback Name | Parameters | Description |

694 |----------------|-----------------|-----------------------------------------------------|

695 | on_start | None | Called when the first data is parsed. |

696 | on_data | data, start, end| Called for each data chunk that is parsed. |

697 | on_end | None | Called when the parser is finished parsing all data.|

698

699 Args:

700 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

701 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

702 """

703

704 def __init__(self, callbacks: OctetStreamCallbacks = {}, max_size: float = float("inf")):

705 super().__init__()

706 self.callbacks = callbacks

707 self._started = False

708

709 if not isinstance(max_size, Number) or max_size < 1:

710 raise ValueError("max_size must be a positive number, not %r" % max_size)

711 self.max_size: int | float = max_size

712 self._current_size = 0

713

714 def write(self, data: bytes) -> int:

715 """Write some data to the parser, which will perform size verification,

716 and then pass the data to the underlying callback.

717

718 Args:

719 data: The data to write to the parser.

720

721 Returns:

722 The number of bytes written.

723 """

724 if not self._started:

725 self.callback("start")

726 self._started = True

727

728 # Truncate data length.

729 data_len = len(data)

730 if (self._current_size + data_len) > self.max_size:

731 # We truncate the length of data that we are to process.

732 new_size = int(self.max_size - self._current_size)

733 self.logger.warning(

734 "Current size is %d (max %d), so truncating data length from %d to %d",

735 self._current_size,

736 self.max_size,

737 data_len,

738 new_size,

739 )

740 data_len = new_size

741

742 # Increment size, then callback, in case there's an exception.

743 self._current_size += data_len

744 self.callback("data", data, 0, data_len)

745 return data_len

746

747 def finalize(self) -> None:

748 """Finalize this parser, which signals to that we are finished parsing,

749 and sends the on_end callback.

750 """

751 self.callback("end")

752

753 def __repr__(self) -> str:

754 return "%s()" % self.__class__.__name__

755

756

757class QuerystringParser(BaseParser):

758 """This is a streaming querystring parser. It will consume data, and call

759 the callbacks given when it has data.

760

761 | Callback Name | Parameters | Description |

762 |----------------|-----------------|-----------------------------------------------------|

763 | on_field_start | None | Called when a new field is encountered. |

764 | on_field_name | data, start, end| Called when a portion of a field's name is encountered. |

765 | on_field_data | data, start, end| Called when a portion of a field's data is encountered. |

766 | on_field_end | None | Called when the end of a field is encountered. |

767 | on_end | None | Called when the parser is finished parsing all data.|

768

769 Args:

770 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

771 strict_parsing: Whether or not to parse the body strictly. Defaults to False. If this is set to True, then the

772 behavior of the parser changes as the following: if a field has a value with an equal sign

773 (e.g. "foo=bar", or "foo="), it is always included. If a field has no equals sign (e.g. "...&name&..."),

774 it will be treated as an error if 'strict_parsing' is True, otherwise included. If an error is encountered,

775 then a [`QuerystringParseError`][python_multipart.exceptions.QuerystringParseError] will be raised.

776 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

777 """ # noqa: E501

778

779 state: QuerystringState

780

781 def __init__(

782 self, callbacks: QuerystringCallbacks = {}, strict_parsing: bool = False, max_size: float = float("inf")

783 ) -> None:

784 super().__init__()

785 self.state = QuerystringState.BEFORE_FIELD

786 self._found_sep = False

787

788 self.callbacks = callbacks

789

790 # Max-size stuff

791 if not isinstance(max_size, Number) or max_size < 1:

792 raise ValueError("max_size must be a positive number, not %r" % max_size)

793 self.max_size: int | float = max_size

794 self._current_size = 0

795

796 # Should parsing be strict?

797 self.strict_parsing = strict_parsing

798

799 def write(self, data: bytes) -> int:

800 """Write some data to the parser, which will perform size verification,

801 parse into either a field name or value, and then pass the

802 corresponding data to the underlying callback. If an error is

803 encountered while parsing, a QuerystringParseError will be raised. The

804 "offset" attribute of the raised exception will be set to the offset in

805 the input data chunk (NOT the overall stream) that caused the error.

806

807 Args:

808 data: The data to write to the parser.

809

810 Returns:

811 The number of bytes written.

812 """

813 # Handle sizing.

814 data_len = len(data)

815 if (self._current_size + data_len) > self.max_size:

816 # We truncate the length of data that we are to process.

817 new_size = int(self.max_size - self._current_size)

818 self.logger.warning(

819 "Current size is %d (max %d), so truncating data length from %d to %d",

820 self._current_size,

821 self.max_size,

822 data_len,

823 new_size,

824 )

825 data_len = new_size

826

827 l = 0

828 try:

829 l = self._internal_write(data, data_len)

830 finally:

831 self._current_size += l

832

833 return l

834

835 def _internal_write(self, data: bytes, length: int) -> int:

836 state = self.state

837 strict_parsing = self.strict_parsing

838 found_sep = self._found_sep

839

840 i = 0

841 while i < length:

842 ch = data[i]

843

844 # Depending on our state...

845 if state == QuerystringState.BEFORE_FIELD:

846 # If the 'found_sep' flag is set, we've already encountered

847 # and skipped a single separator. If so, we check our strict

848 # parsing flag and decide what to do. Otherwise, we haven't

849 # yet reached a separator, and thus, if we do, we need to skip

850 # it as it will be the boundary between fields that's supposed

851 # to be there.

852 if ch == AMPERSAND:

853 if found_sep:

854 # If we're parsing strictly, we disallow blank chunks.

855 if strict_parsing:

856 raise QuerystringParseError("Skipping duplicate ampersand at %d" % i, offset=i)

857 else:

858 self.logger.debug("Skipping duplicate ampersand at %d", i)

859 else:

860 # This case is when we're skipping the (first)

861 # separator between fields, so we just set our flag

862 # and continue on.

863 found_sep = True

864 else:

865 # Emit a field-start event, and go to that state. Also,

866 # reset the "found_sep" flag, for the next time we get to

867 # this state.

868 self.callback("field_start")

869 i -= 1

870 state = QuerystringState.FIELD_NAME

871 found_sep = False

872

873 elif state == QuerystringState.FIELD_NAME:

874 # Try and find a separator - we ensure that, if we do, we only

875 # look for the equal sign before it.

876 sep_pos = data.find(b"&", i, length)

877

878 # See if we can find an equals sign in the remaining data. If

879 # so, we can immediately emit the field name and jump to the

880 # data state.

881 if sep_pos != -1:

882 equals_pos = data.find(b"=", i, sep_pos)

883 else:

884 equals_pos = data.find(b"=", i, length)

885

886 if equals_pos != -1:

887 # Emit this name.

888 self.callback("field_name", data, i, equals_pos)

889

890 # Jump i to this position. Note that it will then have 1

891 # added to it below, which means the next iteration of this

892 # loop will inspect the character after the equals sign.

893 i = equals_pos

894 state = QuerystringState.FIELD_DATA

895 else:

896 # No equals sign found.

897 if not strict_parsing:

898 # See also comments in the QuerystringState.FIELD_DATA case below.

899 # If we found the separator, we emit the name and just

900 # end - there's no data callback at all (not even with

901 # a blank value).

902 if sep_pos != -1:

903 self.callback("field_name", data, i, sep_pos)

904 self.callback("field_end")

905

906 i = sep_pos - 1

907 state = QuerystringState.BEFORE_FIELD

908 else:

909 # Otherwise, no separator in this block, so the

910 # rest of this chunk must be a name.

911 self.callback("field_name", data, i, length)

912 i = length

913

914 else:

915 # We're parsing strictly. If we find a separator,

916 # this is an error - we require an equals sign.

917 if sep_pos != -1:

918 raise QuerystringParseError(

919 "When strict_parsing is True, we require an "

920 "equals sign in all field chunks. Did not "

921 "find one in the chunk that starts at %d" % (i,),

922 offset=i,

923 )

924

925 # No separator in the rest of this chunk, so it's just

926 # a field name.

927 self.callback("field_name", data, i, length)

928 i = length

929

930 elif state == QuerystringState.FIELD_DATA:

931 # Try finding an ampersand after this position.

932 sep_pos = data.find(b"&", i, length)

933

934 # If we found it, callback this bit as data and then go back

935 # to expecting to find a field.

936 if sep_pos != -1:

937 self.callback("field_data", data, i, sep_pos)

938 self.callback("field_end")

939

940 # Note that we go to the separator, which brings us to the

941 # "before field" state. This allows us to properly emit

942 # "field_start" events only when we actually have data for

943 # a field of some sort.

944 i = sep_pos - 1

945 state = QuerystringState.BEFORE_FIELD

946

947 # Otherwise, emit the rest as data and finish.

948 else:

949 self.callback("field_data", data, i, length)

950 i = length

951

952 else: # pragma: no cover (error case)

953 msg = "Reached an unknown state %d at %d" % (state, i)

954 self.logger.warning(msg)

955 raise QuerystringParseError(msg, offset=i)

956

957 i += 1

958

959 self.state = state

960 self._found_sep = found_sep

961 return length

962

963 def finalize(self) -> None:

964 """Finalize this parser, which signals to that we are finished parsing,

965 if we're still in the middle of a field, an on_field_end callback, and

966 then the on_end callback.

967 """

968 # If we're currently in the middle of a field, we finish it.

969 if self.state in (QuerystringState.FIELD_DATA, QuerystringState.FIELD_NAME):

970 self.callback("field_end")

971 self.callback("end")

972

973 def __repr__(self) -> str:

974 return "{}(strict_parsing={!r}, max_size={!r})".format(

975 self.__class__.__name__, self.strict_parsing, self.max_size

976 )

977

978

979class MultipartParser(BaseParser):

980 """This class is a streaming multipart/form-data parser.

981

982 | Callback Name | Parameters | Description |

983 |--------------------|-----------------|-------------|

984 | on_part_begin | None | Called when a new part of the multipart message is encountered. |

985 | on_part_data | data, start, end| Called when a portion of a part's data is encountered. |

986 | on_part_end | None | Called when the end of a part is reached. |

987 | on_header_begin | None | Called when we've found a new header in a part of a multipart message |

988 | on_header_field | data, start, end| Called each time an additional portion of a header is read (i.e. the part of the header that is before the colon; the "Foo" in "Foo: Bar"). |

989 | on_header_value | data, start, end| Called when we get data for a header. |

990 | on_header_end | None | Called when the current header is finished - i.e. we've reached the newline at the end of the header. |

991 | on_headers_finished| None | Called when all headers are finished, and before the part data starts. |

992 | on_end | None | Called when the parser is finished parsing all data. |

993

994 Args:

995 boundary: The multipart boundary. This is required, and must match what is given in the HTTP request - usually in the Content-Type header.

996 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

997 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

998 max_header_count: The maximum number of headers allowed per part.

999 max_header_size: The maximum size of a single header line (excluding the trailing CRLF).

1000 """ # noqa: E501