Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/python

1from __future__ import annotations

3import logging

4import os

5import shutil

6import sys

7import tempfile

8from email.message import Message

9from enum import IntEnum

10from io import BufferedRandom, BytesIO

11from numbers import Number

12from typing import TYPE_CHECKING, cast

14from .decoders import Base64Decoder, QuotedPrintableDecoder

15from .exceptions import FileError, FormParserError, MultipartParseError, QuerystringParseError

17if TYPE_CHECKING: # pragma: no cover

18 from typing import Any, Callable, Literal, Protocol, TypedDict

20 from typing_extensions import TypeAlias

22 class SupportsRead(Protocol):

23 def read(self, __n: int) -> bytes: ...

25 class QuerystringCallbacks(TypedDict, total=False):

26 on_field_start: Callable[[], None]

27 on_field_name: Callable[[bytes, int, int], None]

28 on_field_data: Callable[[bytes, int, int], None]

29 on_field_end: Callable[[], None]

30 on_end: Callable[[], None]

32 class OctetStreamCallbacks(TypedDict, total=False):

33 on_start: Callable[[], None]

34 on_data: Callable[[bytes, int, int], None]

35 on_end: Callable[[], None]

37 class MultipartCallbacks(TypedDict, total=False):

38 on_part_begin: Callable[[], None]

39 on_part_data: Callable[[bytes, int, int], None]

40 on_part_end: Callable[[], None]

41 on_header_begin: Callable[[], None]

42 on_header_field: Callable[[bytes, int, int], None]

43 on_header_value: Callable[[bytes, int, int], None]

44 on_header_end: Callable[[], None]

45 on_headers_finished: Callable[[], None]

46 on_end: Callable[[], None]

48 class FormParserConfig(TypedDict):

49 UPLOAD_DIR: str | None

50 UPLOAD_KEEP_FILENAME: bool

51 UPLOAD_KEEP_EXTENSIONS: bool

52 UPLOAD_ERROR_ON_BAD_CTE: bool

53 MAX_MEMORY_FILE_SIZE: int

54 MAX_BODY_SIZE: float

56 class FileConfig(TypedDict, total=False):

57 UPLOAD_DIR: str | bytes | None

58 UPLOAD_DELETE_TMP: bool

59 UPLOAD_KEEP_FILENAME: bool

60 UPLOAD_KEEP_EXTENSIONS: bool

61 MAX_MEMORY_FILE_SIZE: int

63 class _FormProtocol(Protocol):

64 def write(self, data: bytes) -> int: ...

66 def finalize(self) -> None: ...

68 def close(self) -> None: ...

70 class FieldProtocol(_FormProtocol, Protocol):

71 def __init__(self, name: bytes | None) -> None: ...

73 def set_none(self) -> None: ...

75 class FileProtocol(_FormProtocol, Protocol):

76 def __init__(self, file_name: bytes | None, field_name: bytes | None, config: FileConfig) -> None: ...

78 OnFieldCallback = Callable[[FieldProtocol], None]

79 OnFileCallback = Callable[[FileProtocol], None]

81 CallbackName: TypeAlias = Literal[

82 "start",

83 "data",

84 "end",

85 "field_start",

86 "field_name",

87 "field_data",

88 "field_end",

89 "part_begin",

90 "part_data",

91 "part_end",

92 "header_begin",

93 "header_field",

94 "header_value",

95 "header_end",

96 "headers_finished",

97 ]

99# Unique missing object.

100_missing = object()

101

102

103class QuerystringState(IntEnum):

104 """Querystring parser states.

105

106 These are used to keep track of the state of the parser, and are used to determine

107 what to do when new data is encountered.

108 """

109

110 BEFORE_FIELD = 0

111 FIELD_NAME = 1

112 FIELD_DATA = 2

113

114

115class MultipartState(IntEnum):

116 """Multipart parser states.

117

118 These are used to keep track of the state of the parser, and are used to determine

119 what to do when new data is encountered.

120 """

121

122 START = 0

123 START_BOUNDARY = 1

124 HEADER_FIELD_START = 2

125 HEADER_FIELD = 3

126 HEADER_VALUE_START = 4

127 HEADER_VALUE = 5

128 HEADER_VALUE_ALMOST_DONE = 6

129 HEADERS_ALMOST_DONE = 7

130 PART_DATA_START = 8

131 PART_DATA = 9

132 PART_DATA_END = 10

133 END_BOUNDARY = 11

134 END = 12

135

136

137# Flags for the multipart parser.

138FLAG_PART_BOUNDARY = 1

139FLAG_LAST_BOUNDARY = 2

140

141# Get constants. Since iterating over a str on Python 2 gives you a 1-length

142# string, but iterating over a bytes object on Python 3 gives you an integer,

143# we need to save these constants.

144CR = b"\r"[0]

145LF = b"\n"[0]

146COLON = b":"[0]

147SPACE = b" "[0]

148HYPHEN = b"-"[0]

149AMPERSAND = b"&"[0]

150SEMICOLON = b";"[0]

151LOWER_A = b"a"[0]

152LOWER_Z = b"z"[0]

153NULL = b"\x00"[0]

154

155# fmt: off

156# Mask for ASCII characters that can be http tokens.

157# Per RFC7230 - 3.2.6, this is all alpha-numeric characters

158# and these: !#$%&'*+-.^_`|~

159TOKEN_CHARS_SET = frozenset(

160 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"

161 b"abcdefghijklmnopqrstuvwxyz"

162 b"0123456789"

163 b"!#$%&'*+-.^_`|~")

164# fmt: on

165

166

167def parse_options_header(value: str | bytes | None) -> tuple[bytes, dict[bytes, bytes]]:

168 """Parses a Content-Type header into a value in the following format: (content_type, {parameters})."""

169 # Uses email.message.Message to parse the header as described in PEP 594.

170 # Ref: https://peps.python.org/pep-0594/#cgi

171 if not value:

172 return (b"", {})

173

174 # If we are passed bytes, we assume that it conforms to WSGI, encoding in latin-1.

175 if isinstance(value, bytes): # pragma: no cover

176 value = value.decode("latin-1")

177

178 # For types

179 assert isinstance(value, str), "Value should be a string by now"

180

181 # If we have no options, return the string as-is.

182 if ";" not in value:

183 return (value.lower().strip().encode("latin-1"), {})

184

185 # Split at the first semicolon, to get our value and then options.

186 # ctype, rest = value.split(b';', 1)

187 message = Message()

188 message["content-type"] = value

189 params = message.get_params()

190 # If there were no parameters, this would have already returned above

191 assert params, "At least the content type value should be present"

192 ctype = params.pop(0)[0].encode("latin-1")

193 options: dict[bytes, bytes] = {}

194 for param in params:

195 key, value = param

196 # If the value returned from get_params() is a 3-tuple, the last

197 # element corresponds to the value.

198 # See: https://docs.python.org/3/library/email.compat32-message.html

199 if isinstance(value, tuple):

200 value = value[-1]

201 # If the value is a filename, we need to fix a bug on IE6 that sends

202 # the full file path instead of the filename.

203 if key == "filename":

204 if value[1:3] == ":\\" or value[:2] == "\\\\":

205 value = value.split("\\")[-1]

206 options[key.encode("latin-1")] = value.encode("latin-1")

207 return ctype, options

208

209

210class Field:

211 """A Field object represents a (parsed) form field. It represents a single

212 field with a corresponding name and value.

213

214 The name that a :class:`Field` will be instantiated with is the same name

215 that would be found in the following HTML::

216

217 <input name="name_goes_here" type="text"/>

218

219 This class defines two methods, :meth:`on_data` and :meth:`on_end`, that

220 will be called when data is written to the Field, and when the Field is

221 finalized, respectively.

222

223 Args:

224 name: The name of the form field.

225 """

226

227 def __init__(self, name: bytes | None) -> None:

228 self._name = name

229 self._value: list[bytes] = []

230

231 # We cache the joined version of _value for speed.

232 self._cache = _missing

233

234 @classmethod

235 def from_value(cls, name: bytes, value: bytes | None) -> Field:

236 """Create an instance of a :class:`Field`, and set the corresponding

237 value - either None or an actual value. This method will also

238 finalize the Field itself.

239

240 Args:

241 name: the name of the form field.

242 value: the value of the form field - either a bytestring or None.

243

244 Returns:

245 A new instance of a [`Field`][python_multipart.Field].

246 """

247

248 f = cls(name)

249 if value is None:

250 f.set_none()

251 else:

252 f.write(value)

253 f.finalize()

254 return f

255

256 def write(self, data: bytes) -> int:

257 """Write some data into the form field.

258

259 Args:

260 data: The data to write to the field.

261

262 Returns:

263 The number of bytes written.

264 """

265 return self.on_data(data)

266

267 def on_data(self, data: bytes) -> int:

268 """This method is a callback that will be called whenever data is

269 written to the Field.

270

271 Args:

272 data: The data to write to the field.

273

274 Returns:

275 The number of bytes written.

276 """

277 self._value.append(data)

278 self._cache = _missing

279 return len(data)

280

281 def on_end(self) -> None:

282 """This method is called whenever the Field is finalized."""

283 if self._cache is _missing:

284 self._cache = b"".join(self._value)

285

286 def finalize(self) -> None:

287 """Finalize the form field."""

288 self.on_end()

289

290 def close(self) -> None:

291 """Close the Field object. This will free any underlying cache."""

292 # Free our value array.

293 if self._cache is _missing:

294 self._cache = b"".join(self._value)

295

296 del self._value

297

298 def set_none(self) -> None:

299 """Some fields in a querystring can possibly have a value of None - for

300 example, the string "foo&bar=&baz=asdf" will have a field with the

301 name "foo" and value None, one with name "bar" and value "", and one

302 with name "baz" and value "asdf". Since the write() interface doesn't

303 support writing None, this function will set the field value to None.

304 """

305 self._cache = None

306

307 @property

308 def field_name(self) -> bytes | None:

309 """This property returns the name of the field."""

310 return self._name

311

312 @property

313 def value(self) -> bytes | None:

314 """This property returns the value of the form field."""

315 if self._cache is _missing:

316 self._cache = b"".join(self._value)

317

318 assert isinstance(self._cache, bytes) or self._cache is None

319 return self._cache

320

321 def __eq__(self, other: object) -> bool:

322 if isinstance(other, Field):

323 return self.field_name == other.field_name and self.value == other.value

324 else:

325 return NotImplemented

326

327 def __repr__(self) -> str:

328 if self.value is not None and len(self.value) > 97:

329 # We get the repr, and then insert three dots before the final

330 # quote.

331 v = repr(self.value[:97])[:-1] + "...'"

332 else:

333 v = repr(self.value)

334

335 return "{}(field_name={!r}, value={})".format(self.__class__.__name__, self.field_name, v)

336

337

338class File:

339 """This class represents an uploaded file. It handles writing file data to

340 either an in-memory file or a temporary file on-disk, if the optional

341 threshold is passed.

342

343 There are some options that can be passed to the File to change behavior

344 of the class. Valid options are as follows:

345

347 |-----------------------|-------|---------|-------------|

350 | UPLOAD_KEEP_FILENAME | `bool`| False | Whether or not to keep the filename of the uploaded file. If True, then the filename will be converted to a safe representation (e.g. by removing any invalid path segments), and then saved with the same name). Otherwise, a temporary name will be used. |

351 | UPLOAD_KEEP_EXTENSIONS| `bool`| False | Whether or not to keep the uploaded file's extension. If False, the file will be saved with the default temporary extension (usually ".tmp"). Otherwise, the file's extension will be maintained. Note that this will properly combine with the UPLOAD_KEEP_FILENAME setting. |

352 | MAX_MEMORY_FILE_SIZE | `int` | 1 MiB | The maximum number of bytes of a File to keep in memory. By default, the contents of a File are kept into memory until a certain limit is reached, after which the contents of the File are written to a temporary file. This behavior can be disabled by setting this value to an appropriately large value (or, for example, infinity, such as `float('inf')`. |

353

354 Args:

355 file_name: The name of the file that this [`File`][python_multipart.File] represents.

356 field_name: The name of the form field that this file was uploaded with. This can be None, if, for example,

357 the file was uploaded with Content-Type application/octet-stream.

358 config: The configuration for this File. See above for valid configuration keys and their corresponding values.

359 """ # noqa: E501

360

361 def __init__(self, file_name: bytes | None, field_name: bytes | None = None, config: FileConfig = {}) -> None:

362 # Save configuration, set other variables default.

363 self.logger = logging.getLogger(__name__)

364 self._config = config

365 self._in_memory = True

366 self._bytes_written = 0

367 self._fileobj: BytesIO | BufferedRandom = BytesIO()

368

369 # Save the provided field/file name.

370 self._field_name = field_name

371 self._file_name = file_name

372

373 # Our actual file name is None by default, since, depending on our

374 # config, we may not actually use the provided name.

375 self._actual_file_name: bytes | None = None

376

377 # Split the extension from the filename.

378 if file_name is not None:

379 base, ext = os.path.splitext(file_name)

380 self._file_base = base

381 self._ext = ext

382

383 @property

384 def field_name(self) -> bytes | None:

385 """The form field associated with this file. May be None if there isn't

386 one, for example when we have an application/octet-stream upload.

387 """

388 return self._field_name

389

390 @property

391 def file_name(self) -> bytes | None:

392 """The file name given in the upload request."""

393 return self._file_name

394

395 @property

396 def actual_file_name(self) -> bytes | None:

397 """The file name that this file is saved as. Will be None if it's not

398 currently saved on disk.

399 """

400 return self._actual_file_name

401

402 @property

403 def file_object(self) -> BytesIO | BufferedRandom:

404 """The file object that we're currently writing to. Note that this

405 will either be an instance of a :class:`io.BytesIO`, or a regular file

406 object.

407 """

408 return self._fileobj

409

410 @property

411 def size(self) -> int:

412 """The total size of this file, counted as the number of bytes that

413 currently have been written to the file.

414 """

415 return self._bytes_written

416

417 @property

418 def in_memory(self) -> bool:

419 """A boolean representing whether or not this file object is currently

420 stored in-memory or on-disk.

421 """

422 return self._in_memory

423

424 def flush_to_disk(self) -> None:

425 """If the file is already on-disk, do nothing. Otherwise, copy from

426 the in-memory buffer to a disk file, and then reassign our internal

427 file object to this new disk file.

428

429 Note that if you attempt to flush a file that is already on-disk, a

430 warning will be logged to this module's logger.

431 """

432 if not self._in_memory:

433 self.logger.warning("Trying to flush to disk when we're not in memory")

434 return

435

436 # Go back to the start of our file.

437 self._fileobj.seek(0)

438

439 # Open a new file.

440 new_file = self._get_disk_file()

441

442 # Copy the file objects.

443 shutil.copyfileobj(self._fileobj, new_file)

444

445 # Seek to the new position in our new file.

446 new_file.seek(self._bytes_written)

447

448 # Reassign the fileobject.

449 old_fileobj = self._fileobj

450 self._fileobj = new_file

451

452 # We're no longer in memory.

453 self._in_memory = False

454

455 # Close the old file object.

456 old_fileobj.close()

457

458 def _get_disk_file(self) -> BufferedRandom:

459 """This function is responsible for getting a file object on-disk for us."""

460 self.logger.info("Opening a file on disk")

461

462 file_dir = self._config.get("UPLOAD_DIR")

463 keep_filename = self._config.get("UPLOAD_KEEP_FILENAME", False)

464 keep_extensions = self._config.get("UPLOAD_KEEP_EXTENSIONS", False)

465 delete_tmp = self._config.get("UPLOAD_DELETE_TMP", True)

466 tmp_file: None | BufferedRandom = None

467

468 # If we have a directory and are to keep the filename...

469 if file_dir is not None and keep_filename:

470 self.logger.info("Saving with filename in: %r", file_dir)

471

472 # Build our filename.

473 # TODO: what happens if we don't have a filename?

474 fname = self._file_base + self._ext if keep_extensions else self._file_base

475

476 path = os.path.join(file_dir, fname) # type: ignore[arg-type]

477 try:

478 self.logger.info("Opening file: %r", path)

479 tmp_file = open(path, "w+b")

480 except OSError:

481 tmp_file = None

482

483 self.logger.exception("Error opening temporary file")

484 raise FileError("Error opening temporary file: %r" % path)

485 else:

486 # Build options array.

487 # Note that on Python 3, tempfile doesn't support byte names. We

488 # encode our paths using the default filesystem encoding.

489 suffix = self._ext.decode(sys.getfilesystemencoding()) if keep_extensions else None

490

491 if file_dir is None:

492 dir = None

493 elif isinstance(file_dir, bytes):

494 dir = file_dir.decode(sys.getfilesystemencoding())

495 else:

496 dir = file_dir # pragma: no cover

497

498 # Create a temporary (named) file with the appropriate settings.

499 self.logger.info(

500 "Creating a temporary file with options: %r", {"suffix": suffix, "delete": delete_tmp, "dir": dir}

501 )

502 try:

503 tmp_file = cast(BufferedRandom, tempfile.NamedTemporaryFile(suffix=suffix, delete=delete_tmp, dir=dir))

504 except OSError:

505 self.logger.exception("Error creating named temporary file")

506 raise FileError("Error creating named temporary file")

507

508 assert tmp_file is not None

509 # Encode filename as bytes.

510 if isinstance(tmp_file.name, str):

511 fname = tmp_file.name.encode(sys.getfilesystemencoding())

512 else:

513 fname = cast(bytes, tmp_file.name) # pragma: no cover

514

515 self._actual_file_name = fname

516 return tmp_file

517

518 def write(self, data: bytes) -> int:

519 """Write some data to the File.

520

521 :param data: a bytestring

522 """

523 return self.on_data(data)

524

525 def on_data(self, data: bytes) -> int:

526 """This method is a callback that will be called whenever data is

527 written to the File.

528

529 Args:

530 data: The data to write to the file.

531

532 Returns:

533 The number of bytes written.

534 """

535 bwritten = self._fileobj.write(data)

536

537 # If the bytes written isn't the same as the length, just return.

538 if bwritten != len(data):

539 self.logger.warning("bwritten != len(data) (%d != %d)", bwritten, len(data))

540 return bwritten

541

542 # Keep track of how many bytes we've written.

543 self._bytes_written += bwritten

544

545 # If we're in-memory and are over our limit, we create a file.

546 max_memory_file_size = self._config.get("MAX_MEMORY_FILE_SIZE")

547 if self._in_memory and max_memory_file_size is not None and (self._bytes_written > max_memory_file_size):

548 self.logger.info("Flushing to disk")

549 self.flush_to_disk()

550

551 # Return the number of bytes written.

552 return bwritten

553

554 def on_end(self) -> None:

555 """This method is called whenever the Field is finalized."""

556 # Flush the underlying file object

557 self._fileobj.flush()

558

559 def finalize(self) -> None:

560 """Finalize the form file. This will not close the underlying file,

561 but simply signal that we are finished writing to the File.

562 """

563 self.on_end()

564

565 def close(self) -> None:

566 """Close the File object. This will actually close the underlying

567 file object (whether it's a :class:`io.BytesIO` or an actual file

568 object).

569 """

570 self._fileobj.close()

571

572 def __repr__(self) -> str:

573 return "{}(file_name={!r}, field_name={!r})".format(self.__class__.__name__, self.file_name, self.field_name)

574

575

576class BaseParser:

577 """This class is the base class for all parsers. It contains the logic for

578 calling and adding callbacks.

579

580 A callback can be one of two different forms. "Notification callbacks" are

581 callbacks that are called when something happens - for example, when a new

582 part of a multipart message is encountered by the parser. "Data callbacks"

583 are called when we get some sort of data - for example, part of the body of

584 a multipart chunk. Notification callbacks are called with no parameters,

585 whereas data callbacks are called with three, as follows::

586

587 data_callback(data, start, end)

588

589 The "data" parameter is a bytestring (i.e. "foo" on Python 2, or b"foo" on

590 Python 3). "start" and "end" are integer indexes into the "data" string

591 that represent the data of interest. Thus, in a data callback, the slice

592 `data[start:end]` represents the data that the callback is "interested in".

593 The callback is not passed a copy of the data, since copying severely hurts

594 performance.

595 """

596

597 def __init__(self) -> None:

598 self.logger = logging.getLogger(__name__)

599 self.callbacks: QuerystringCallbacks | OctetStreamCallbacks | MultipartCallbacks = {}

600

601 def callback(

602 self, name: CallbackName, data: bytes | None = None, start: int | None = None, end: int | None = None

603 ) -> None:

604 """This function calls a provided callback with some data. If the

605 callback is not set, will do nothing.

606

607 Args:

608 name: The name of the callback to call (as a string).

609 data: Data to pass to the callback. If None, then it is assumed that the callback is a notification

610 callback, and no parameters are given.

611 end: An integer that is passed to the data callback.

612 start: An integer that is passed to the data callback.

613 """

614 on_name = "on_" + name

615 func = self.callbacks.get(on_name)

616 if func is None:

617 return

618 func = cast("Callable[..., Any]", func)

619 # Depending on whether we're given a buffer...

620 if data is not None:

621 # Don't do anything if we have start == end.

622 if start is not None and start == end:

623 return

624

625 self.logger.debug("Calling %s with data[%d:%d]", on_name, start, end)

626 func(data, start, end)

627 else:

628 self.logger.debug("Calling %s with no data", on_name)

629 func()

630

631 def set_callback(self, name: CallbackName, new_func: Callable[..., Any] | None) -> None:

632 """Update the function for a callback. Removes from the callbacks dict

633 if new_func is None.

634

635 :param name: The name of the callback to call (as a string).

636

637 :param new_func: The new function for the callback. If None, then the

638 callback will be removed (with no error if it does not

639 exist).

640 """

641 if new_func is None:

642 self.callbacks.pop("on_" + name, None) # type: ignore[misc]

643 else:

644 self.callbacks["on_" + name] = new_func # type: ignore[literal-required]

645

646 def close(self) -> None:

647 pass # pragma: no cover

648

649 def finalize(self) -> None:

650 pass # pragma: no cover

651

652 def __repr__(self) -> str:

653 return "%s()" % self.__class__.__name__

654

655

656class OctetStreamParser(BaseParser):

657 """This parser parses an octet-stream request body and calls callbacks when

658 incoming data is received. Callbacks are as follows:

659

660 | Callback Name | Parameters | Description |

661 |----------------|-----------------|-----------------------------------------------------|

662 | on_start | None | Called when the first data is parsed. |

663 | on_data | data, start, end| Called for each data chunk that is parsed. |

664 | on_end | None | Called when the parser is finished parsing all data.|

665

666 Args:

667 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

668 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

669 """

670

671 def __init__(self, callbacks: OctetStreamCallbacks = {}, max_size: float = float("inf")):

672 super().__init__()

673 self.callbacks = callbacks

674 self._started = False

675

676 if not isinstance(max_size, Number) or max_size < 1:

677 raise ValueError("max_size must be a positive number, not %r" % max_size)

678 self.max_size: int | float = max_size

679 self._current_size = 0

680

681 def write(self, data: bytes) -> int:

682 """Write some data to the parser, which will perform size verification,

683 and then pass the data to the underlying callback.

684

685 Args:

686 data: The data to write to the parser.

687

688 Returns:

689 The number of bytes written.

690 """

691 if not self._started:

692 self.callback("start")

693 self._started = True

694

695 # Truncate data length.

696 data_len = len(data)

697 if (self._current_size + data_len) > self.max_size:

698 # We truncate the length of data that we are to process.

699 new_size = int(self.max_size - self._current_size)

700 self.logger.warning(

701 "Current size is %d (max %d), so truncating data length from %d to %d",

702 self._current_size,

703 self.max_size,

704 data_len,

705 new_size,

706 )

707 data_len = new_size

708

709 # Increment size, then callback, in case there's an exception.

710 self._current_size += data_len

711 self.callback("data", data, 0, data_len)

712 return data_len

713

714 def finalize(self) -> None:

715 """Finalize this parser, which signals to that we are finished parsing,

716 and sends the on_end callback.

717 """

718 self.callback("end")

719

720 def __repr__(self) -> str:

721 return "%s()" % self.__class__.__name__

722

723

724class QuerystringParser(BaseParser):

725 """This is a streaming querystring parser. It will consume data, and call

726 the callbacks given when it has data.

727

728 | Callback Name | Parameters | Description |

729 |----------------|-----------------|-----------------------------------------------------|

730 | on_field_start | None | Called when a new field is encountered. |

731 | on_field_name | data, start, end| Called when a portion of a field's name is encountered. |

732 | on_field_data | data, start, end| Called when a portion of a field's data is encountered. |

733 | on_field_end | None | Called when the end of a field is encountered. |

734 | on_end | None | Called when the parser is finished parsing all data.|

735

736 Args:

737 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

738 strict_parsing: Whether or not to parse the body strictly. Defaults to False. If this is set to True, then the

739 behavior of the parser changes as the following: if a field has a value with an equal sign

740 (e.g. "foo=bar", or "foo="), it is always included. If a field has no equals sign (e.g. "...&name&..."),

741 it will be treated as an error if 'strict_parsing' is True, otherwise included. If an error is encountered,

742 then a [`QuerystringParseError`][python_multipart.exceptions.QuerystringParseError] will be raised.

743 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

744 """ # noqa: E501

745

746 state: QuerystringState

747

748 def __init__(

749 self, callbacks: QuerystringCallbacks = {}, strict_parsing: bool = False, max_size: float = float("inf")

750 ) -> None:

751 super().__init__()

752 self.state = QuerystringState.BEFORE_FIELD

753 self._found_sep = False

754

755 self.callbacks = callbacks

756

757 # Max-size stuff

758 if not isinstance(max_size, Number) or max_size < 1:

759 raise ValueError("max_size must be a positive number, not %r" % max_size)

760 self.max_size: int | float = max_size

761 self._current_size = 0

762

763 # Should parsing be strict?

764 self.strict_parsing = strict_parsing

765

766 def write(self, data: bytes) -> int:

767 """Write some data to the parser, which will perform size verification,

768 parse into either a field name or value, and then pass the

769 corresponding data to the underlying callback. If an error is

770 encountered while parsing, a QuerystringParseError will be raised. The

771 "offset" attribute of the raised exception will be set to the offset in

772 the input data chunk (NOT the overall stream) that caused the error.

773

774 Args:

775 data: The data to write to the parser.

776

777 Returns:

778 The number of bytes written.

779 """

780 # Handle sizing.

781 data_len = len(data)

782 if (self._current_size + data_len) > self.max_size:

783 # We truncate the length of data that we are to process.

784 new_size = int(self.max_size - self._current_size)

785 self.logger.warning(

786 "Current size is %d (max %d), so truncating data length from %d to %d",

787 self._current_size,

788 self.max_size,

789 data_len,

790 new_size,

791 )

792 data_len = new_size

793

794 l = 0

795 try:

796 l = self._internal_write(data, data_len)

797 finally:

798 self._current_size += l

799

800 return l

801

802 def _internal_write(self, data: bytes, length: int) -> int:

803 state = self.state

804 strict_parsing = self.strict_parsing

805 found_sep = self._found_sep

806

807 i = 0

808 while i < length:

809 ch = data[i]

810

811 # Depending on our state...

812 if state == QuerystringState.BEFORE_FIELD:

813 # If the 'found_sep' flag is set, we've already encountered

814 # and skipped a single separator. If so, we check our strict

815 # parsing flag and decide what to do. Otherwise, we haven't

816 # yet reached a separator, and thus, if we do, we need to skip

817 # it as it will be the boundary between fields that's supposed

818 # to be there.

819 if ch == AMPERSAND or ch == SEMICOLON:

820 if found_sep:

821 # If we're parsing strictly, we disallow blank chunks.

822 if strict_parsing:

823 e = QuerystringParseError("Skipping duplicate ampersand/semicolon at %d" % i)

824 e.offset = i

825 raise e

826 else:

827 self.logger.debug("Skipping duplicate ampersand/semicolon at %d", i)

828 else:

829 # This case is when we're skipping the (first)

830 # separator between fields, so we just set our flag

831 # and continue on.

832 found_sep = True

833 else:

834 # Emit a field-start event, and go to that state. Also,

835 # reset the "found_sep" flag, for the next time we get to

836 # this state.

837 self.callback("field_start")

838 i -= 1

839 state = QuerystringState.FIELD_NAME

840 found_sep = False

841

842 elif state == QuerystringState.FIELD_NAME:

843 # Try and find a separator - we ensure that, if we do, we only

844 # look for the equal sign before it.

845 sep_pos = data.find(b"&", i)

846 if sep_pos == -1:

847 sep_pos = data.find(b";", i)

848

849 # See if we can find an equals sign in the remaining data. If

850 # so, we can immediately emit the field name and jump to the

851 # data state.

852 if sep_pos != -1:

853 equals_pos = data.find(b"=", i, sep_pos)

854 else:

855 equals_pos = data.find(b"=", i)

856

857 if equals_pos != -1:

858 # Emit this name.

859 self.callback("field_name", data, i, equals_pos)

860

861 # Jump i to this position. Note that it will then have 1

862 # added to it below, which means the next iteration of this

863 # loop will inspect the character after the equals sign.

864 i = equals_pos

865 state = QuerystringState.FIELD_DATA

866 else:

867 # No equals sign found.

868 if not strict_parsing:

869 # See also comments in the QuerystringState.FIELD_DATA case below.

870 # If we found the separator, we emit the name and just

871 # end - there's no data callback at all (not even with

872 # a blank value).

873 if sep_pos != -1:

874 self.callback("field_name", data, i, sep_pos)

875 self.callback("field_end")

876

877 i = sep_pos - 1

878 state = QuerystringState.BEFORE_FIELD

879 else:

880 # Otherwise, no separator in this block, so the

881 # rest of this chunk must be a name.

882 self.callback("field_name", data, i, length)

883 i = length

884

885 else:

886 # We're parsing strictly. If we find a separator,

887 # this is an error - we require an equals sign.

888 if sep_pos != -1:

889 e = QuerystringParseError(

890 "When strict_parsing is True, we require an "

891 "equals sign in all field chunks. Did not "

892 "find one in the chunk that starts at %d" % (i,)

893 )

894 e.offset = i

895 raise e

896

897 # No separator in the rest of this chunk, so it's just

898 # a field name.

899 self.callback("field_name", data, i, length)

900 i = length

901

902 elif state == QuerystringState.FIELD_DATA:

903 # Try finding either an ampersand or a semicolon after this

904 # position.

905 sep_pos = data.find(b"&", i)

906 if sep_pos == -1:

907 sep_pos = data.find(b";", i)

908

909 # If we found it, callback this bit as data and then go back

910 # to expecting to find a field.

911 if sep_pos != -1:

912 self.callback("field_data", data, i, sep_pos)

913 self.callback("field_end")

914

915 # Note that we go to the separator, which brings us to the

916 # "before field" state. This allows us to properly emit

917 # "field_start" events only when we actually have data for

918 # a field of some sort.

919 i = sep_pos - 1

920 state = QuerystringState.BEFORE_FIELD

921

922 # Otherwise, emit the rest as data and finish.

923 else:

924 self.callback("field_data", data, i, length)

925 i = length

926

927 else: # pragma: no cover (error case)

928 msg = "Reached an unknown state %d at %d" % (state, i)

929 self.logger.warning(msg)

930 e = QuerystringParseError(msg)

931 e.offset = i

932 raise e

933

934 i += 1

935

936 self.state = state

937 self._found_sep = found_sep

938 return len(data)

939

940 def finalize(self) -> None:

941 """Finalize this parser, which signals to that we are finished parsing,

942 if we're still in the middle of a field, an on_field_end callback, and

943 then the on_end callback.

944 """

945 # If we're currently in the middle of a field, we finish it.

946 if self.state == QuerystringState.FIELD_DATA:

947 self.callback("field_end")

948 self.callback("end")

949

950 def __repr__(self) -> str:

951 return "{}(strict_parsing={!r}, max_size={!r})".format(

952 self.__class__.__name__, self.strict_parsing, self.max_size

953 )

954

955

956class MultipartParser(BaseParser):

957 """This class is a streaming multipart/form-data parser.

958

959 | Callback Name | Parameters | Description |

960 |--------------------|-----------------|-------------|

961 | on_part_begin | None | Called when a new part of the multipart message is encountered. |

962 | on_part_data | data, start, end| Called when a portion of a part's data is encountered. |

963 | on_part_end | None | Called when the end of a part is reached. |

964 | on_header_begin | None | Called when we've found a new header in a part of a multipart message |

965 | on_header_field | data, start, end| Called each time an additional portion of a header is read (i.e. the part of the header that is before the colon; the "Foo" in "Foo: Bar"). |

966 | on_header_value | data, start, end| Called when we get data for a header. |

967 | on_header_end | None | Called when the current header is finished - i.e. we've reached the newline at the end of the header. |

968 | on_headers_finished| None | Called when all headers are finished, and before the part data starts. |

969 | on_end | None | Called when the parser is finished parsing all data. |

970

971 Args:

972 boundary: The multipart boundary. This is required, and must match what is given in the HTTP request - usually in the Content-Type header.

973 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

974 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

975 """ # noqa: E501

976

977 def __init__(