Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/python

1from __future__ import annotations

3import logging

4import os

5import shutil

6import sys

7import tempfile

8from email.message import Message

9from enum import IntEnum

10from io import BufferedRandom, BytesIO

11from numbers import Number

12from typing import TYPE_CHECKING, cast

14from .decoders import Base64Decoder, QuotedPrintableDecoder

15from .exceptions import FileError, FormParserError, MultipartParseError, QuerystringParseError

17if TYPE_CHECKING: # pragma: no cover

18 from collections.abc import Callable

19 from typing import Any, Literal, Protocol, TypeAlias, TypedDict

21 class SupportsRead(Protocol):

22 def read(self, __n: int) -> bytes: ...

24 class QuerystringCallbacks(TypedDict, total=False):

25 on_field_start: Callable[[], None]

26 on_field_name: Callable[[bytes, int, int], None]

27 on_field_data: Callable[[bytes, int, int], None]

28 on_field_end: Callable[[], None]

29 on_end: Callable[[], None]

31 class OctetStreamCallbacks(TypedDict, total=False):

32 on_start: Callable[[], None]

33 on_data: Callable[[bytes, int, int], None]

34 on_end: Callable[[], None]

36 class MultipartCallbacks(TypedDict, total=False):

37 on_part_begin: Callable[[], None]

38 on_part_data: Callable[[bytes, int, int], None]

39 on_part_end: Callable[[], None]

40 on_header_begin: Callable[[], None]

41 on_header_field: Callable[[bytes, int, int], None]

42 on_header_value: Callable[[bytes, int, int], None]

43 on_header_end: Callable[[], None]

44 on_headers_finished: Callable[[], None]

45 on_end: Callable[[], None]

47 class FormParserConfig(TypedDict):

48 UPLOAD_DIR: str | None

49 UPLOAD_KEEP_FILENAME: bool

50 UPLOAD_KEEP_EXTENSIONS: bool

51 UPLOAD_ERROR_ON_BAD_CTE: bool

52 MAX_MEMORY_FILE_SIZE: int

53 MAX_BODY_SIZE: float

55 class FileConfig(TypedDict, total=False):

56 UPLOAD_DIR: str | bytes | None

57 UPLOAD_DELETE_TMP: bool

58 UPLOAD_KEEP_FILENAME: bool

59 UPLOAD_KEEP_EXTENSIONS: bool

60 MAX_MEMORY_FILE_SIZE: int

62 class _FormProtocol(Protocol):

63 def write(self, data: bytes) -> int: ...

65 def finalize(self) -> None: ...

67 def close(self) -> None: ...

69 class FieldProtocol(_FormProtocol, Protocol):

70 def __init__(self, name: bytes | None) -> None: ...

72 def set_none(self) -> None: ...

74 class FileProtocol(_FormProtocol, Protocol):

75 def __init__(self, file_name: bytes | None, field_name: bytes | None, config: FileConfig) -> None: ...

77 OnFieldCallback = Callable[[FieldProtocol], None]

78 OnFileCallback = Callable[[FileProtocol], None]

80 CallbackName: TypeAlias = Literal[

81 "start",

82 "data",

83 "end",

84 "field_start",

85 "field_name",

86 "field_data",

87 "field_end",

88 "part_begin",

89 "part_data",

90 "part_end",

91 "header_begin",

92 "header_field",

93 "header_value",

94 "header_end",

95 "headers_finished",

96 ]

98# Unique missing object.

99_missing = object()

100

101

102class QuerystringState(IntEnum):

103 """Querystring parser states.

104

105 These are used to keep track of the state of the parser, and are used to determine

106 what to do when new data is encountered.

107 """

108

109 BEFORE_FIELD = 0

110 FIELD_NAME = 1

111 FIELD_DATA = 2

112

113

114class MultipartState(IntEnum):

115 """Multipart parser states.

116

117 These are used to keep track of the state of the parser, and are used to determine

118 what to do when new data is encountered.

119 """

120

121 START = 0

122 START_BOUNDARY = 1

123 HEADER_FIELD_START = 2

124 HEADER_FIELD = 3

125 HEADER_VALUE_START = 4

126 HEADER_VALUE = 5

127 HEADER_VALUE_ALMOST_DONE = 6

128 HEADERS_ALMOST_DONE = 7

129 PART_DATA_START = 8

130 PART_DATA = 9

131 PART_DATA_END = 10

132 END_BOUNDARY = 11

133 END = 12

134

135

136# Flags for the multipart parser.

137FLAG_PART_BOUNDARY = 1

138FLAG_LAST_BOUNDARY = 2

139

140# Get constants. Since iterating over a str on Python 2 gives you a 1-length

141# string, but iterating over a bytes object on Python 3 gives you an integer,

142# we need to save these constants.

143CR = b"\r"[0]

144LF = b"\n"[0]

145COLON = b":"[0]

146SPACE = b" "[0]

147HYPHEN = b"-"[0]

148AMPERSAND = b"&"[0]

149SEMICOLON = b";"[0]

150LOWER_A = b"a"[0]

151LOWER_Z = b"z"[0]

152NULL = b"\x00"[0]

153

154# fmt: off

155# Mask for ASCII characters that can be http tokens.

156# Per RFC7230 - 3.2.6, this is all alpha-numeric characters

157# and these: !#$%&'*+-.^_`|~

158TOKEN_CHARS_SET = frozenset(

159 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"

160 b"abcdefghijklmnopqrstuvwxyz"

161 b"0123456789"

162 b"!#$%&'*+-.^_`|~")

163# fmt: on

164

165

166def parse_options_header(value: str | bytes | None) -> tuple[bytes, dict[bytes, bytes]]:

167 """Parses a Content-Type header into a value in the following format: (content_type, {parameters})."""

168 # Uses email.message.Message to parse the header as described in PEP 594.

169 # Ref: https://peps.python.org/pep-0594/#cgi

170 if not value:

171 return (b"", {})

172

173 # If we are passed bytes, we assume that it conforms to WSGI, encoding in latin-1.

174 if isinstance(value, bytes): # pragma: no cover

175 value = value.decode("latin-1")

176

177 # For types

178 assert isinstance(value, str), "Value should be a string by now"

179

180 # If we have no options, return the string as-is.

181 if ";" not in value:

182 return (value.lower().strip().encode("latin-1"), {})

183

184 # Split at the first semicolon, to get our value and then options.

185 # ctype, rest = value.split(b';', 1)

186 message = Message()

187 message["content-type"] = value

188 params = message.get_params()

189 # If there were no parameters, this would have already returned above

190 assert params, "At least the content type value should be present"

191 ctype = params.pop(0)[0].encode("latin-1")

192 options: dict[bytes, bytes] = {}

193 for param in params:

194 key, value = param

195 # If the value returned from get_params() is a 3-tuple, the last

196 # element corresponds to the value.

197 # See: https://docs.python.org/3/library/email.compat32-message.html

198 if isinstance(value, tuple):

199 value = value[-1]

200 # If the value is a filename, we need to fix a bug on IE6 that sends

201 # the full file path instead of the filename.

202 if key == "filename":

203 if value[1:3] == ":\\" or value[:2] == "\\\\":

204 value = value.split("\\")[-1]

205 options[key.encode("latin-1")] = value.encode("latin-1")

206 return ctype, options

207

208

209class Field:

210 """A Field object represents a (parsed) form field. It represents a single

211 field with a corresponding name and value.

212

213 The name that a :class:`Field` will be instantiated with is the same name

214 that would be found in the following HTML::

215

216 <input name="name_goes_here" type="text"/>

217

218 This class defines two methods, :meth:`on_data` and :meth:`on_end`, that

219 will be called when data is written to the Field, and when the Field is

220 finalized, respectively.

221

222 Args:

223 name: The name of the form field.

224 """

225

226 def __init__(self, name: bytes | None) -> None:

227 self._name = name

228 self._value: list[bytes] = []

229

230 # We cache the joined version of _value for speed.

231 self._cache = _missing

232

233 @classmethod

234 def from_value(cls, name: bytes, value: bytes | None) -> Field:

235 """Create an instance of a :class:`Field`, and set the corresponding

236 value - either None or an actual value. This method will also

237 finalize the Field itself.

238

239 Args:

240 name: the name of the form field.

241 value: the value of the form field - either a bytestring or None.

242

243 Returns:

244 A new instance of a [`Field`][python_multipart.Field].

245 """

246

247 f = cls(name)

248 if value is None:

249 f.set_none()

250 else:

251 f.write(value)

252 f.finalize()

253 return f

254

255 def write(self, data: bytes) -> int:

256 """Write some data into the form field.

257

258 Args:

259 data: The data to write to the field.

260

261 Returns:

262 The number of bytes written.

263 """

264 return self.on_data(data)

265

266 def on_data(self, data: bytes) -> int:

267 """This method is a callback that will be called whenever data is

268 written to the Field.

269

270 Args:

271 data: The data to write to the field.

272

273 Returns:

274 The number of bytes written.

275 """

276 self._value.append(data)

277 self._cache = _missing

278 return len(data)

279

280 def on_end(self) -> None:

281 """This method is called whenever the Field is finalized."""

282 if self._cache is _missing:

283 self._cache = b"".join(self._value)

284

285 def finalize(self) -> None:

286 """Finalize the form field."""

287 self.on_end()

288

289 def close(self) -> None:

290 """Close the Field object. This will free any underlying cache."""

291 # Free our value array.

292 if self._cache is _missing:

293 self._cache = b"".join(self._value)

294

295 del self._value

296

297 def set_none(self) -> None:

298 """Some fields in a querystring can possibly have a value of None - for

299 example, the string "foo&bar=&baz=asdf" will have a field with the

300 name "foo" and value None, one with name "bar" and value "", and one

301 with name "baz" and value "asdf". Since the write() interface doesn't

302 support writing None, this function will set the field value to None.

303 """

304 self._cache = None

305

306 @property

307 def field_name(self) -> bytes | None:

308 """This property returns the name of the field."""

309 return self._name

310

311 @property

312 def value(self) -> bytes | None:

313 """This property returns the value of the form field."""

314 if self._cache is _missing:

315 self._cache = b"".join(self._value)

316

317 assert isinstance(self._cache, bytes) or self._cache is None

318 return self._cache

319

320 def __eq__(self, other: object) -> bool:

321 if isinstance(other, Field):

322 return self.field_name == other.field_name and self.value == other.value

323 else:

324 return NotImplemented

325

326 def __repr__(self) -> str:

327 if self.value is not None and len(self.value) > 97:

328 # We get the repr, and then insert three dots before the final

329 # quote.

330 v = repr(self.value[:97])[:-1] + "...'"

331 else:

332 v = repr(self.value)

333

334 return f"{self.__class__.__name__}(field_name={self.field_name!r}, value={v})"

335

336

337class File:

338 """This class represents an uploaded file. It handles writing file data to

339 either an in-memory file or a temporary file on-disk, if the optional

340 threshold is passed.

341

342 There are some options that can be passed to the File to change behavior

343 of the class. Valid options are as follows:

344

346 |-----------------------|-------|---------|-------------|

349 | UPLOAD_KEEP_FILENAME | `bool`| False | Whether or not to keep the filename of the uploaded file. If True, then the filename will be converted to a safe representation (e.g. by removing any invalid path segments), and then saved with the same name). Otherwise, a temporary name will be used. |

350 | UPLOAD_KEEP_EXTENSIONS| `bool`| False | Whether or not to keep the uploaded file's extension. If False, the file will be saved with the default temporary extension (usually ".tmp"). Otherwise, the file's extension will be maintained. Note that this will properly combine with the UPLOAD_KEEP_FILENAME setting. |

351 | MAX_MEMORY_FILE_SIZE | `int` | 1 MiB | The maximum number of bytes of a File to keep in memory. By default, the contents of a File are kept into memory until a certain limit is reached, after which the contents of the File are written to a temporary file. This behavior can be disabled by setting this value to an appropriately large value (or, for example, infinity, such as `float('inf')`. |

352

353 Args:

354 file_name: The name of the file that this [`File`][python_multipart.File] represents.

355 field_name: The name of the form field that this file was uploaded with. This can be None, if, for example,

356 the file was uploaded with Content-Type application/octet-stream.

357 config: The configuration for this File. See above for valid configuration keys and their corresponding values.

358 """ # noqa: E501

359

360 def __init__(self, file_name: bytes | None, field_name: bytes | None = None, config: FileConfig = {}) -> None:

361 # Save configuration, set other variables default.

362 self.logger = logging.getLogger(__name__)

363 self._config = config

364 self._in_memory = True

365 self._bytes_written = 0

366 self._fileobj: BytesIO | BufferedRandom = BytesIO()

367

368 # Save the provided field/file name.

369 self._field_name = field_name

370 self._file_name = file_name

371

372 # Our actual file name is None by default, since, depending on our

373 # config, we may not actually use the provided name.

374 self._actual_file_name: bytes | None = None

375

376 # Split the extension from the filename.

377 if file_name is not None:

378 # Extract just the basename to avoid directory traversal

379 basename = os.path.basename(file_name)

380 base, ext = os.path.splitext(basename)

381 self._file_base = base

382 self._ext = ext

383

384 @property

385 def field_name(self) -> bytes | None:

386 """The form field associated with this file. May be None if there isn't

387 one, for example when we have an application/octet-stream upload.

388 """

389 return self._field_name

390

391 @property

392 def file_name(self) -> bytes | None:

393 """The file name given in the upload request."""

394 return self._file_name

395

396 @property

397 def actual_file_name(self) -> bytes | None:

398 """The file name that this file is saved as. Will be None if it's not

399 currently saved on disk.

400 """

401 return self._actual_file_name

402

403 @property

404 def file_object(self) -> BytesIO | BufferedRandom:

405 """The file object that we're currently writing to. Note that this

406 will either be an instance of a :class:`io.BytesIO`, or a regular file

407 object.

408 """

409 return self._fileobj

410

411 @property

412 def size(self) -> int:

413 """The total size of this file, counted as the number of bytes that

414 currently have been written to the file.

415 """

416 return self._bytes_written

417

418 @property

419 def in_memory(self) -> bool:

420 """A boolean representing whether or not this file object is currently

421 stored in-memory or on-disk.

422 """

423 return self._in_memory

424

425 def flush_to_disk(self) -> None:

426 """If the file is already on-disk, do nothing. Otherwise, copy from

427 the in-memory buffer to a disk file, and then reassign our internal

428 file object to this new disk file.

429

430 Note that if you attempt to flush a file that is already on-disk, a

431 warning will be logged to this module's logger.

432 """

433 if not self._in_memory:

434 self.logger.warning("Trying to flush to disk when we're not in memory")

435 return

436

437 # Go back to the start of our file.

438 self._fileobj.seek(0)

439

440 # Open a new file.

441 new_file = self._get_disk_file()

442

443 # Copy the file objects.

444 shutil.copyfileobj(self._fileobj, new_file)

445

446 # Seek to the new position in our new file.

447 new_file.seek(self._bytes_written)

448

449 # Reassign the fileobject.

450 old_fileobj = self._fileobj

451 self._fileobj = new_file

452

453 # We're no longer in memory.

454 self._in_memory = False

455

456 # Close the old file object.

457 old_fileobj.close()

458

459 def _get_disk_file(self) -> BufferedRandom:

460 """This function is responsible for getting a file object on-disk for us."""

461 self.logger.info("Opening a file on disk")

462

463 file_dir = self._config.get("UPLOAD_DIR")

464 keep_filename = self._config.get("UPLOAD_KEEP_FILENAME", False)

465 keep_extensions = self._config.get("UPLOAD_KEEP_EXTENSIONS", False)

466 delete_tmp = self._config.get("UPLOAD_DELETE_TMP", True)

467 tmp_file: None | BufferedRandom = None

468

469 # If we have a directory and are to keep the filename...

470 if file_dir is not None and keep_filename:

471 self.logger.info("Saving with filename in: %r", file_dir)

472

473 # Build our filename.

474 # TODO: what happens if we don't have a filename?

475 fname = self._file_base + self._ext if keep_extensions else self._file_base

476

477 path = os.path.join(file_dir, fname) # type: ignore[arg-type]

478 try:

479 self.logger.info("Opening file: %r", path)

480 tmp_file = open(path, "w+b")

481 except OSError:

482 tmp_file = None

483

484 self.logger.exception("Error opening temporary file")

485 raise FileError("Error opening temporary file: %r" % path)

486 else:

487 # Build options array.

488 # Note that on Python 3, tempfile doesn't support byte names. We

489 # encode our paths using the default filesystem encoding.

490 suffix = self._ext.decode(sys.getfilesystemencoding()) if keep_extensions else None

491

492 if file_dir is None:

493 dir = None

494 elif isinstance(file_dir, bytes):

495 dir = file_dir.decode(sys.getfilesystemencoding())

496 else:

497 dir = file_dir # pragma: no cover

498

499 # Create a temporary (named) file with the appropriate settings.

500 self.logger.info(

501 "Creating a temporary file with options: %r", {"suffix": suffix, "delete": delete_tmp, "dir": dir}

502 )

503 try:

504 tmp_file = cast(BufferedRandom, tempfile.NamedTemporaryFile(suffix=suffix, delete=delete_tmp, dir=dir))

505 except OSError:

506 self.logger.exception("Error creating named temporary file")

507 raise FileError("Error creating named temporary file")

508

509 assert tmp_file is not None

510 # Encode filename as bytes.

511 if isinstance(tmp_file.name, str):

512 fname = tmp_file.name.encode(sys.getfilesystemencoding())

513 else:

514 fname = cast(bytes, tmp_file.name) # pragma: no cover

515

516 self._actual_file_name = fname

517 return tmp_file

518

519 def write(self, data: bytes) -> int:

520 """Write some data to the File.

521

522 :param data: a bytestring

523 """

524 return self.on_data(data)

525

526 def on_data(self, data: bytes) -> int:

527 """This method is a callback that will be called whenever data is

528 written to the File.

529

530 Args:

531 data: The data to write to the file.

532

533 Returns:

534 The number of bytes written.

535 """

536 bwritten = self._fileobj.write(data)

537

538 # If the bytes written isn't the same as the length, just return.

539 if bwritten != len(data):

540 self.logger.warning("bwritten != len(data) (%d != %d)", bwritten, len(data))

541 return bwritten

542

543 # Keep track of how many bytes we've written.

544 self._bytes_written += bwritten

545

546 # If we're in-memory and are over our limit, we create a file.

547 max_memory_file_size = self._config.get("MAX_MEMORY_FILE_SIZE")

548 if self._in_memory and max_memory_file_size is not None and (self._bytes_written > max_memory_file_size):

549 self.logger.info("Flushing to disk")

550 self.flush_to_disk()

551

552 # Return the number of bytes written.

553 return bwritten

554

555 def on_end(self) -> None:

556 """This method is called whenever the Field is finalized."""

557 # Flush the underlying file object

558 self._fileobj.flush()

559

560 def finalize(self) -> None:

561 """Finalize the form file. This will not close the underlying file,

562 but simply signal that we are finished writing to the File.

563 """

564 self.on_end()

565

566 def close(self) -> None:

567 """Close the File object. This will actually close the underlying

568 file object (whether it's a :class:`io.BytesIO` or an actual file

569 object).

570 """

571 self._fileobj.close()

572

573 def __repr__(self) -> str:

574 return f"{self.__class__.__name__}(file_name={self.file_name!r}, field_name={self.field_name!r})"

575

576

577class BaseParser:

578 """This class is the base class for all parsers. It contains the logic for

579 calling and adding callbacks.

580

581 A callback can be one of two different forms. "Notification callbacks" are

582 callbacks that are called when something happens - for example, when a new

583 part of a multipart message is encountered by the parser. "Data callbacks"

584 are called when we get some sort of data - for example, part of the body of

585 a multipart chunk. Notification callbacks are called with no parameters,

586 whereas data callbacks are called with three, as follows::

587

588 data_callback(data, start, end)

589

590 The "data" parameter is a bytestring (i.e. "foo" on Python 2, or b"foo" on

591 Python 3). "start" and "end" are integer indexes into the "data" string

592 that represent the data of interest. Thus, in a data callback, the slice

593 `data[start:end]` represents the data that the callback is "interested in".

594 The callback is not passed a copy of the data, since copying severely hurts

595 performance.

596 """

597

598 def __init__(self) -> None:

599 self.logger = logging.getLogger(__name__)

600 self.callbacks: QuerystringCallbacks | OctetStreamCallbacks | MultipartCallbacks = {}

601

602 def callback(

603 self, name: CallbackName, data: bytes | None = None, start: int | None = None, end: int | None = None

604 ) -> None:

605 """This function calls a provided callback with some data. If the

606 callback is not set, will do nothing.

607

608 Args:

609 name: The name of the callback to call (as a string).

610 data: Data to pass to the callback. If None, then it is assumed that the callback is a notification

611 callback, and no parameters are given.

612 end: An integer that is passed to the data callback.

613 start: An integer that is passed to the data callback.

614 """

615 on_name = "on_" + name

616 func = self.callbacks.get(on_name)

617 if func is None:

618 return

619 func = cast("Callable[..., Any]", func)

620 # Depending on whether we're given a buffer...

621 if data is not None:

622 # Don't do anything if we have start == end.

623 if start is not None and start == end:

624 return

625

626 self.logger.debug("Calling %s with data[%d:%d]", on_name, start, end)

627 func(data, start, end)

628 else:

629 self.logger.debug("Calling %s with no data", on_name)

630 func()

631

632 def set_callback(self, name: CallbackName, new_func: Callable[..., Any] | None) -> None:

633 """Update the function for a callback. Removes from the callbacks dict

634 if new_func is None.

635

636 :param name: The name of the callback to call (as a string).

637

638 :param new_func: The new function for the callback. If None, then the

639 callback will be removed (with no error if it does not

640 exist).

641 """

642 if new_func is None:

643 self.callbacks.pop("on_" + name, None) # type: ignore[misc]

644 else:

645 self.callbacks["on_" + name] = new_func # type: ignore[literal-required]

646

647 def close(self) -> None:

648 pass # pragma: no cover

649

650 def finalize(self) -> None:

651 pass # pragma: no cover

652

653 def __repr__(self) -> str:

654 return "%s()" % self.__class__.__name__

655

656

657class OctetStreamParser(BaseParser):

658 """This parser parses an octet-stream request body and calls callbacks when

659 incoming data is received. Callbacks are as follows:

660

661 | Callback Name | Parameters | Description |

662 |----------------|-----------------|-----------------------------------------------------|

663 | on_start | None | Called when the first data is parsed. |

664 | on_data | data, start, end| Called for each data chunk that is parsed. |

665 | on_end | None | Called when the parser is finished parsing all data.|

666

667 Args:

668 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

669 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

670 """

671

672 def __init__(self, callbacks: OctetStreamCallbacks = {}, max_size: float = float("inf")):

673 super().__init__()

674 self.callbacks = callbacks

675 self._started = False

676

677 if not isinstance(max_size, Number) or max_size < 1:

678 raise ValueError("max_size must be a positive number, not %r" % max_size)

679 self.max_size: int | float = max_size

680 self._current_size = 0

681

682 def write(self, data: bytes) -> int:

683 """Write some data to the parser, which will perform size verification,

684 and then pass the data to the underlying callback.

685

686 Args:

687 data: The data to write to the parser.

688

689 Returns:

690 The number of bytes written.

691 """

692 if not self._started:

693 self.callback("start")

694 self._started = True

695

696 # Truncate data length.

697 data_len = len(data)

698 if (self._current_size + data_len) > self.max_size:

699 # We truncate the length of data that we are to process.

700 new_size = int(self.max_size - self._current_size)

701 self.logger.warning(

702 "Current size is %d (max %d), so truncating data length from %d to %d",

703 self._current_size,

704 self.max_size,

705 data_len,

706 new_size,

707 )

708 data_len = new_size

709

710 # Increment size, then callback, in case there's an exception.

711 self._current_size += data_len

712 self.callback("data", data, 0, data_len)

713 return data_len

714

715 def finalize(self) -> None:

716 """Finalize this parser, which signals to that we are finished parsing,

717 and sends the on_end callback.

718 """

719 self.callback("end")

720

721 def __repr__(self) -> str:

722 return "%s()" % self.__class__.__name__

723

724

725class QuerystringParser(BaseParser):

726 """This is a streaming querystring parser. It will consume data, and call

727 the callbacks given when it has data.

728

729 | Callback Name | Parameters | Description |

730 |----------------|-----------------|-----------------------------------------------------|

731 | on_field_start | None | Called when a new field is encountered. |

732 | on_field_name | data, start, end| Called when a portion of a field's name is encountered. |

733 | on_field_data | data, start, end| Called when a portion of a field's data is encountered. |

734 | on_field_end | None | Called when the end of a field is encountered. |

735 | on_end | None | Called when the parser is finished parsing all data.|

736

737 Args:

738 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

739 strict_parsing: Whether or not to parse the body strictly. Defaults to False. If this is set to True, then the

740 behavior of the parser changes as the following: if a field has a value with an equal sign

741 (e.g. "foo=bar", or "foo="), it is always included. If a field has no equals sign (e.g. "...&name&..."),

742 it will be treated as an error if 'strict_parsing' is True, otherwise included. If an error is encountered,

743 then a [`QuerystringParseError`][python_multipart.exceptions.QuerystringParseError] will be raised.

744 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

745 """ # noqa: E501

746

747 state: QuerystringState

748

749 def __init__(

750 self, callbacks: QuerystringCallbacks = {}, strict_parsing: bool = False, max_size: float = float("inf")

751 ) -> None:

752 super().__init__()

753 self.state = QuerystringState.BEFORE_FIELD

754 self._found_sep = False

755

756 self.callbacks = callbacks

757

758 # Max-size stuff

759 if not isinstance(max_size, Number) or max_size < 1:

760 raise ValueError("max_size must be a positive number, not %r" % max_size)

761 self.max_size: int | float = max_size

762 self._current_size = 0

763

764 # Should parsing be strict?

765 self.strict_parsing = strict_parsing

766

767 def write(self, data: bytes) -> int:

768 """Write some data to the parser, which will perform size verification,

769 parse into either a field name or value, and then pass the

770 corresponding data to the underlying callback. If an error is

771 encountered while parsing, a QuerystringParseError will be raised. The

772 "offset" attribute of the raised exception will be set to the offset in

773 the input data chunk (NOT the overall stream) that caused the error.

774

775 Args:

776 data: The data to write to the parser.

777

778 Returns:

779 The number of bytes written.

780 """

781 # Handle sizing.

782 data_len = len(data)

783 if (self._current_size + data_len) > self.max_size:

784 # We truncate the length of data that we are to process.

785 new_size = int(self.max_size - self._current_size)

786 self.logger.warning(

787 "Current size is %d (max %d), so truncating data length from %d to %d",

788 self._current_size,

789 self.max_size,

790 data_len,

791 new_size,

792 )

793 data_len = new_size

794

795 l = 0

796 try:

797 l = self._internal_write(data, data_len)

798 finally:

799 self._current_size += l

800

801 return l

802

803 def _internal_write(self, data: bytes, length: int) -> int:

804 state = self.state

805 strict_parsing = self.strict_parsing

806 found_sep = self._found_sep

807

808 i = 0

809 while i < length:

810 ch = data[i]

811

812 # Depending on our state...

813 if state == QuerystringState.BEFORE_FIELD:

814 # If the 'found_sep' flag is set, we've already encountered

815 # and skipped a single separator. If so, we check our strict

816 # parsing flag and decide what to do. Otherwise, we haven't

817 # yet reached a separator, and thus, if we do, we need to skip

818 # it as it will be the boundary between fields that's supposed

819 # to be there.

820 if ch == AMPERSAND or ch == SEMICOLON:

821 if found_sep:

822 # If we're parsing strictly, we disallow blank chunks.

823 if strict_parsing:

824 e = QuerystringParseError("Skipping duplicate ampersand/semicolon at %d" % i)

825 e.offset = i

826 raise e

827 else:

828 self.logger.debug("Skipping duplicate ampersand/semicolon at %d", i)

829 else:

830 # This case is when we're skipping the (first)

831 # separator between fields, so we just set our flag

832 # and continue on.

833 found_sep = True

834 else:

835 # Emit a field-start event, and go to that state. Also,

836 # reset the "found_sep" flag, for the next time we get to

837 # this state.

838 self.callback("field_start")

839 i -= 1

840 state = QuerystringState.FIELD_NAME

841 found_sep = False

842

843 elif state == QuerystringState.FIELD_NAME:

844 # Try and find a separator - we ensure that, if we do, we only

845 # look for the equal sign before it.

846 sep_pos = data.find(b"&", i)

847 if sep_pos == -1:

848 sep_pos = data.find(b";", i)

849

850 # See if we can find an equals sign in the remaining data. If

851 # so, we can immediately emit the field name and jump to the

852 # data state.

853 if sep_pos != -1:

854 equals_pos = data.find(b"=", i, sep_pos)

855 else:

856 equals_pos = data.find(b"=", i)

857

858 if equals_pos != -1:

859 # Emit this name.

860 self.callback("field_name", data, i, equals_pos)

861

862 # Jump i to this position. Note that it will then have 1

863 # added to it below, which means the next iteration of this

864 # loop will inspect the character after the equals sign.

865 i = equals_pos

866 state = QuerystringState.FIELD_DATA

867 else:

868 # No equals sign found.

869 if not strict_parsing:

870 # See also comments in the QuerystringState.FIELD_DATA case below.

871 # If we found the separator, we emit the name and just

872 # end - there's no data callback at all (not even with

873 # a blank value).

874 if sep_pos != -1:

875 self.callback("field_name", data, i, sep_pos)

876 self.callback("field_end")

877

878 i = sep_pos - 1

879 state = QuerystringState.BEFORE_FIELD

880 else:

881 # Otherwise, no separator in this block, so the

882 # rest of this chunk must be a name.

883 self.callback("field_name", data, i, length)

884 i = length

885

886 else:

887 # We're parsing strictly. If we find a separator,

888 # this is an error - we require an equals sign.

889 if sep_pos != -1:

890 e = QuerystringParseError(

891 "When strict_parsing is True, we require an "

892 "equals sign in all field chunks. Did not "

893 "find one in the chunk that starts at %d" % (i,)

894 )

895 e.offset = i

896 raise e

897

898 # No separator in the rest of this chunk, so it's just

899 # a field name.

900 self.callback("field_name", data, i, length)

901 i = length

902

903 elif state == QuerystringState.FIELD_DATA:

904 # Try finding either an ampersand or a semicolon after this

905 # position.

906 sep_pos = data.find(b"&", i)

907 if sep_pos == -1:

908 sep_pos = data.find(b";", i)

909

910 # If we found it, callback this bit as data and then go back

911 # to expecting to find a field.

912 if sep_pos != -1:

913 self.callback("field_data", data, i, sep_pos)

914 self.callback("field_end")

915

916 # Note that we go to the separator, which brings us to the

917 # "before field" state. This allows us to properly emit

918 # "field_start" events only when we actually have data for

919 # a field of some sort.

920 i = sep_pos - 1

921 state = QuerystringState.BEFORE_FIELD

922

923 # Otherwise, emit the rest as data and finish.

924 else:

925 self.callback("field_data", data, i, length)

926 i = length

927

928 else: # pragma: no cover (error case)

929 msg = "Reached an unknown state %d at %d" % (state, i)

930 self.logger.warning(msg)

931 e = QuerystringParseError(msg)

932 e.offset = i

933 raise e

934

935 i += 1

936

937 self.state = state

938 self._found_sep = found_sep

939 return len(data)

940

941 def finalize(self) -> None:

942 """Finalize this parser, which signals to that we are finished parsing,

943 if we're still in the middle of a field, an on_field_end callback, and

944 then the on_end callback.

945 """

946 # If we're currently in the middle of a field, we finish it.

947 if self.state == QuerystringState.FIELD_DATA:

948 self.callback("field_end")

949 self.callback("end")

950

951 def __repr__(self) -> str:

952 return "{}(strict_parsing={!r}, max_size={!r})".format(

953 self.__class__.__name__, self.strict_parsing, self.max_size

954 )

955

956

957class MultipartParser(BaseParser):

958 """This class is a streaming multipart/form-data parser.

959

960 | Callback Name | Parameters | Description |

961 |--------------------|-----------------|-------------|

962 | on_part_begin | None | Called when a new part of the multipart message is encountered. |

963 | on_part_data | data, start, end| Called when a portion of a part's data is encountered. |

964 | on_part_end | None | Called when the end of a part is reached. |

965 | on_header_begin | None | Called when we've found a new header in a part of a multipart message |

966 | on_header_field | data, start, end| Called each time an additional portion of a header is read (i.e. the part of the header that is before the colon; the "Foo" in "Foo: Bar"). |

967 | on_header_value | data, start, end| Called when we get data for a header. |

968 | on_header_end | None | Called when the current header is finished - i.e. we've reached the newline at the end of the header. |

969 | on_headers_finished| None | Called when all headers are finished, and before the part data starts. |

970 | on_end | None | Called when the parser is finished parsing all data. |

971

972 Args:

973 boundary: The multipart boundary. This is required, and must match what is given in the HTTP request - usually in the Content-Type header.

974 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

975 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

976 """ # noqa: E501

977

978 def __init__(