Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/python

1from __future__ import annotations

3import logging

4import os

5import shutil

6import sys

7import tempfile

8from email.message import Message

9from enum import IntEnum

10from io import BufferedRandom, BytesIO

11from numbers import Number

12from typing import TYPE_CHECKING, cast

14from .decoders import Base64Decoder, QuotedPrintableDecoder

15from .exceptions import FileError, FormParserError, MultipartParseError, QuerystringParseError

17if TYPE_CHECKING:

18 from collections.abc import Callable

19 from typing import Any, Literal, Protocol, TypeAlias, TypedDict

21 class SupportsRead(Protocol):

22 def read(self, __n: int) -> bytes: ...

24 class QuerystringCallbacks(TypedDict, total=False):

25 on_field_start: Callable[[], None]

26 on_field_name: Callable[[bytes, int, int], None]

27 on_field_data: Callable[[bytes, int, int], None]

28 on_field_end: Callable[[], None]

29 on_end: Callable[[], None]

31 class OctetStreamCallbacks(TypedDict, total=False):

32 on_start: Callable[[], None]

33 on_data: Callable[[bytes, int, int], None]

34 on_end: Callable[[], None]

36 class MultipartCallbacks(TypedDict, total=False):

37 on_part_begin: Callable[[], None]

38 on_part_data: Callable[[bytes, int, int], None]

39 on_part_end: Callable[[], None]

40 on_header_begin: Callable[[], None]

41 on_header_field: Callable[[bytes, int, int], None]

42 on_header_value: Callable[[bytes, int, int], None]

43 on_header_end: Callable[[], None]

44 on_headers_finished: Callable[[], None]

45 on_end: Callable[[], None]

47 class FileConfig(TypedDict, total=False):

48 UPLOAD_DIR: str | bytes | None

49 UPLOAD_DELETE_TMP: bool

50 UPLOAD_KEEP_FILENAME: bool

51 UPLOAD_KEEP_EXTENSIONS: bool

52 MAX_MEMORY_FILE_SIZE: int

54 class FormParserConfig(FileConfig):

55 UPLOAD_ERROR_ON_BAD_CTE: bool

56 MAX_BODY_SIZE: float

57 MAX_HEADER_COUNT: int

58 MAX_HEADER_SIZE: int

60 CallbackName: TypeAlias = Literal[

61 "start",

62 "data",

63 "end",

64 "field_start",

65 "field_name",

66 "field_data",

67 "field_end",

68 "part_begin",

69 "part_data",

70 "part_end",

71 "header_begin",

72 "header_field",

73 "header_value",

74 "header_end",

75 "headers_finished",

76 ]

78# Unique missing object.

79_missing = object()

82class QuerystringState(IntEnum):

83 """Querystring parser states.

85 These are used to keep track of the state of the parser, and are used to determine

86 what to do when new data is encountered.

87 """

89 BEFORE_FIELD = 0

90 FIELD_NAME = 1

91 FIELD_DATA = 2

94class MultipartState(IntEnum):

95 """Multipart parser states.

97 These are used to keep track of the state of the parser, and are used to determine

98 what to do when new data is encountered.

99 """

100

101 START = 0

102 START_BOUNDARY = 1

103 HEADER_FIELD_START = 2

104 HEADER_FIELD = 3

105 HEADER_VALUE_START = 4

106 HEADER_VALUE = 5

107 HEADER_VALUE_ALMOST_DONE = 6

108 HEADERS_ALMOST_DONE = 7

109 PART_DATA_START = 8

110 PART_DATA = 9

111 PART_DATA_END = 10

112 END_BOUNDARY = 11

113 END = 12

114

115

116# Flags for the multipart parser.

117FLAG_PART_BOUNDARY = 1

118FLAG_LAST_BOUNDARY = 2

119

120# Get constants. Since iterating over a str on Python 2 gives you a 1-length

121# string, but iterating over a bytes object on Python 3 gives you an integer,

122# we need to save these constants.

123CR = b"\r"[0]

124LF = b"\n"[0]

125COLON = b":"[0]

126SPACE = b" "[0]

127HYPHEN = b"-"[0]

128AMPERSAND = b"&"[0]

129SEMICOLON = b";"[0]

130LOWER_A = b"a"[0]

131LOWER_Z = b"z"[0]

132NULL = b"\x00"[0]

133

134# fmt: off

135# Mask for ASCII characters that can be http tokens.

136# Per RFC7230 - 3.2.6, this is all alpha-numeric characters

137# and these: !#$%&'*+-.^_`|~

138TOKEN_CHARS_SET = frozenset(

139 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"

140 b"abcdefghijklmnopqrstuvwxyz"

141 b"0123456789"

142 b"!#$%&'*+-.^_`|~")

143# fmt: on

144

145DEFAULT_MAX_HEADER_COUNT = 8

146"""Default maximum number of headers allowed per multipart part."""

147

148DEFAULT_MAX_HEADER_SIZE = 4096 + 128

149"""Default maximum size of a single multipart header line, including syntax overhead."""

150

151MAX_BOUNDARY_LENGTH = 256

152"""Maximum allowed length of a multipart boundary.

153

154[RFC 2046 §5.1.1](https://datatracker.ietf.org/doc/html/rfc2046#section-5.1.1)

155recommends boundaries be at most 70 bytes. 256 bytes is generous headroom over

156every HTTP client.

157"""

158

159

160def parse_options_header(value: str | bytes | None) -> tuple[bytes, dict[bytes, bytes]]:

161 """Parses a Content-Type header into a value in the following format: (content_type, {parameters})."""

162 # Uses email.message.Message to parse the header as described in PEP 594.

163 # Ref: https://peps.python.org/pep-0594/#cgi

164 if not value:

165 return (b"", {})

166

167 # If we are passed bytes, we assume that it conforms to WSGI, encoding in latin-1.

168 if isinstance(value, bytes): # pragma: no cover

169 value = value.decode("latin-1")

170

171 # For types

172 assert isinstance(value, str), "Value should be a string by now"

173

174 # If we have no options, return the string as-is.

175 if ";" not in value:

176 return (value.lower().strip().encode("latin-1"), {})

177

178 # Split at the first semicolon, to get our value and then options.

179 # ctype, rest = value.split(b';', 1)

180 message = Message()

181 message["content-type"] = value

182 # `get_params()` can raise on malformed RFC 2231 headers found via fuzzing:

183 # - ValueError on oversized continuation indices (all supported versions).

184 # - TypeError on mixed `filename*` + `filename*0*` continuations (Python 3.12 only;

185 # 3.13+ silently picks a value).

186 # TODO: drop `TypeError` once Python 3.12 reaches EOL (October 2028).

187 try:

188 params = message.get_params()

189 except (TypeError, ValueError): # pragma: no cover

190 return (value.split(";", 1)[0].lower().strip().encode("latin-1"), {})

191 # If there were no parameters, this would have already returned above

192 assert params, "At least the content type value should be present"

193 ctype = params.pop(0)[0].encode("latin-1")

194 options: dict[bytes, bytes] = {}

195 for param in params:

196 key, value = param

197 # If the value returned from get_params() is a 3-tuple, the last

198 # element corresponds to the value.

199 # See: https://docs.python.org/3/library/email.compat32-message.html

200 if isinstance(value, tuple):

201 value = value[-1]

202 # If the value is a filename, we need to fix a bug on IE6 that sends

203 # the full file path instead of the filename.

204 if key == "filename":

205 if value[1:3] == ":\\" or value[:2] == "\\\\":

206 value = value.split("\\")[-1]

207 options[key.encode("latin-1")] = value.encode("latin-1")

208 return ctype, options

209

210

211class Field:

212 """A Field object represents a (parsed) form field. It represents a single

213 field with a corresponding name and value.

214

215 The name that a :class:`Field` will be instantiated with is the same name

216 that would be found in the following HTML::

217

218 <input name="name_goes_here" type="text"/>

219

220 This class defines two methods, :meth:`on_data` and :meth:`on_end`, that

221 will be called when data is written to the Field, and when the Field is

222 finalized, respectively.

223

224 Args:

225 name: The name of the form field.

226 content_type: The value of the Content-Type header for this field.

227 """

228

229 def __init__(self, name: bytes | None, *, content_type: str | None = None) -> None:

230 self._name = name

231 self._value: list[bytes] = []

232 self._content_type = content_type

233

234 # We cache the joined version of _value for speed.

235 self._cache = _missing

236

237 @classmethod

238 def from_value(cls, name: bytes, value: bytes | None) -> Field:

239 """Create an instance of a :class:`Field`, and set the corresponding

240 value - either None or an actual value. This method will also

241 finalize the Field itself.

242

243 Args:

244 name: the name of the form field.

245 value: the value of the form field - either a bytestring or None.

246

247 Returns:

248 A new instance of a [`Field`][python_multipart.Field].

249 """

250

251 f = cls(name)

252 if value is None:

253 f.set_none()

254 else:

255 f.write(value)

256 f.finalize()

257 return f

258

259 def write(self, data: bytes) -> int:

260 """Write some data into the form field.

261

262 Args:

263 data: The data to write to the field.

264

265 Returns:

266 The number of bytes written.

267 """

268 return self.on_data(data)

269

270 def on_data(self, data: bytes) -> int:

271 """This method is a callback that will be called whenever data is

272 written to the Field.

273

274 Args:

275 data: The data to write to the field.

276

277 Returns:

278 The number of bytes written.

279 """

280 self._value.append(data)

281 self._cache = _missing

282 return len(data)

283

284 def on_end(self) -> None:

285 """This method is called whenever the Field is finalized."""

286 if self._cache is _missing:

287 self._cache = b"".join(self._value)

288

289 def finalize(self) -> None:

290 """Finalize the form field."""

291 self.on_end()

292

293 def close(self) -> None:

294 """Close the Field object. This will free any underlying cache."""

295 # Free our value array.

296 if self._cache is _missing:

297 self._cache = b"".join(self._value)

298

299 del self._value

300

301 def set_none(self) -> None:

302 """Some fields in a querystring can possibly have a value of None - for

303 example, the string "foo&bar=&baz=asdf" will have a field with the

304 name "foo" and value None, one with name "bar" and value "", and one

305 with name "baz" and value "asdf". Since the write() interface doesn't

306 support writing None, this function will set the field value to None.

307 """

308 self._cache = None

309

310 @property

311 def field_name(self) -> bytes | None:

312 """This property returns the name of the field."""

313 return self._name

314

315 @property

316 def value(self) -> bytes | None:

317 """This property returns the value of the form field."""

318 if self._cache is _missing:

319 self._cache = b"".join(self._value)

320

321 assert isinstance(self._cache, bytes) or self._cache is None

322 return self._cache

323

324 @property

325 def content_type(self) -> str | None:

326 """This property returns the content_type value of the field."""

327 return self._content_type

328

329 def __eq__(self, other: object) -> bool:

330 if isinstance(other, Field):

331 return self.field_name == other.field_name and self.value == other.value

332 else:

333 return NotImplemented

334

335 def __repr__(self) -> str:

336 if self.value is not None and len(self.value) > 97:

337 # We get the repr, and then insert three dots before the final

338 # quote.

339 v = repr(self.value[:97])[:-1] + "...'"

340 else:

341 v = repr(self.value)

342

343 return f"{self.__class__.__name__}(field_name={self.field_name!r}, value={v})"

344

345

346class File:

347 """This class represents an uploaded file. It handles writing file data to

348 either an in-memory file or a temporary file on-disk, if the optional

349 threshold is passed.

350

351 There are some options that can be passed to the File to change behavior

352 of the class. Valid options are as follows:

353

355 |-----------------------|-------|---------|-------------|

358 | UPLOAD_KEEP_FILENAME | `bool`| False | Whether or not to keep the filename of the uploaded file. If True, then the filename will be converted to a safe representation (e.g. by removing any invalid path segments), and then saved with the same name). Otherwise, a temporary name will be used. |

359 | UPLOAD_KEEP_EXTENSIONS| `bool`| False | Whether or not to keep the uploaded file's extension. If False, the file will be saved with the default temporary extension (usually ".tmp"). Otherwise, the file's extension will be maintained. Note that this will properly combine with the UPLOAD_KEEP_FILENAME setting. |

360 | MAX_MEMORY_FILE_SIZE | `int` | 1 MiB | The maximum number of bytes of a File to keep in memory. By default, the contents of a File are kept into memory until a certain limit is reached, after which the contents of the File are written to a temporary file. This behavior can be disabled by setting this value to an appropriately large value (or, for example, infinity, such as `float('inf')`. |

361

362 Args:

363 file_name: The name of the file that this [`File`][python_multipart.File] represents.

364 field_name: The name of the form field that this file was uploaded with. This can be None, if, for example,

365 the file was uploaded with Content-Type application/octet-stream.

366 config: The configuration for this File. See above for valid configuration keys and their corresponding values.

367 content_type: The value of the Content-Type header.

368 """ # noqa: E501

369

370 def __init__(

371 self,

372 file_name: bytes | None,

373 field_name: bytes | None = None,

374 config: FileConfig = {},

375 *,

376 content_type: str | None = None,

377 ) -> None:

378 # Save configuration, set other variables default.

379 self.logger = logging.getLogger(__name__)

380 self._config = config

381 self._in_memory = True

382 self._bytes_written = 0

383 self._fileobj: BytesIO | BufferedRandom = BytesIO()

384

385 # Save the provided field/file name and content type.

386 self._field_name = field_name

387 self._file_name = file_name

388 self._content_type = content_type

389

390 # Our actual file name is None by default, since, depending on our

391 # config, we may not actually use the provided name.

392 self._actual_file_name: bytes | None = None

393

394 # Split the extension from the filename.

395 if file_name is not None:

396 # Extract just the basename to avoid directory traversal

397 basename = os.path.basename(file_name)

398 base, ext = os.path.splitext(basename)

399 self._file_base = base

400 self._ext = ext

401

402 @property

403 def field_name(self) -> bytes | None:

404 """The form field associated with this file. May be None if there isn't

405 one, for example when we have an application/octet-stream upload.

406 """

407 return self._field_name

408

409 @property

410 def file_name(self) -> bytes | None:

411 """The file name given in the upload request."""

412 return self._file_name

413

414 @property

415 def actual_file_name(self) -> bytes | None:

416 """The file name that this file is saved as. Will be None if it's not

417 currently saved on disk.

418 """

419 return self._actual_file_name

420

421 @property

422 def file_object(self) -> BytesIO | BufferedRandom:

423 """The file object that we're currently writing to. Note that this

424 will either be an instance of a :class:`io.BytesIO`, or a regular file

425 object.

426 """

427 return self._fileobj

428

429 @property

430 def size(self) -> int:

431 """The total size of this file, counted as the number of bytes that

432 currently have been written to the file.

433 """

434 return self._bytes_written

435

436 @property

437 def in_memory(self) -> bool:

438 """A boolean representing whether or not this file object is currently

439 stored in-memory or on-disk.

440 """

441 return self._in_memory

442

443 @property

444 def content_type(self) -> str | None:

445 """The Content-Type value for this part, if it was set."""

446 return self._content_type

447

448 def flush_to_disk(self) -> None:

449 """If the file is already on-disk, do nothing. Otherwise, copy from

450 the in-memory buffer to a disk file, and then reassign our internal

451 file object to this new disk file.

452

453 Note that if you attempt to flush a file that is already on-disk, a

454 warning will be logged to this module's logger.

455 """

456 if not self._in_memory:

457 self.logger.warning("Trying to flush to disk when we're not in memory")

458 return

459

460 # Go back to the start of our file.

461 self._fileobj.seek(0)

462

463 # Open a new file.

464 new_file = self._get_disk_file()

465

466 # Copy the file objects.

467 shutil.copyfileobj(self._fileobj, new_file)

468

469 # Seek to the new position in our new file.

470 new_file.seek(self._bytes_written)

471

472 # Reassign the fileobject.

473 old_fileobj = self._fileobj

474 self._fileobj = new_file

475

476 # We're no longer in memory.

477 self._in_memory = False

478

479 # Close the old file object.

480 old_fileobj.close()

481

482 def _get_disk_file(self) -> BufferedRandom:

483 """This function is responsible for getting a file object on-disk for us."""

484 self.logger.info("Opening a file on disk")

485

486 file_dir = self._config.get("UPLOAD_DIR")

487 keep_filename = self._config.get("UPLOAD_KEEP_FILENAME", False)

488 keep_extensions = self._config.get("UPLOAD_KEEP_EXTENSIONS", False)

489 delete_tmp = self._config.get("UPLOAD_DELETE_TMP", True)

490 tmp_file: None | BufferedRandom = None

491

492 # If we have a directory and are to keep the filename...

493 if file_dir is not None and keep_filename:

494 self.logger.info("Saving with filename in: %r", file_dir)

495

496 # Build our filename.

497 # TODO: what happens if we don't have a filename?

498 fname = self._file_base + self._ext if keep_extensions else self._file_base

499

500 path = os.path.join(file_dir, fname) # type: ignore[arg-type]

501 try:

502 self.logger.info("Opening file: %r", path)

503 tmp_file = open(path, "w+b")

504 except OSError:

505 tmp_file = None

506

507 self.logger.exception("Error opening temporary file")

508 raise FileError("Error opening temporary file: %r" % path)

509 else:

510 # Build options array.

511 # Note that on Python 3, tempfile doesn't support byte names. We

512 # encode our paths using the default filesystem encoding.

513 suffix = self._ext.decode(sys.getfilesystemencoding()) if keep_extensions else None

514

515 if file_dir is None:

516 dir = None

517 elif isinstance(file_dir, bytes):

518 dir = file_dir.decode(sys.getfilesystemencoding())

519 else:

520 dir = file_dir # pragma: no cover

521

522 # Create a temporary (named) file with the appropriate settings.

523 self.logger.info(

524 "Creating a temporary file with options: %r", {"suffix": suffix, "delete": delete_tmp, "dir": dir}

525 )

526 try:

527 tmp_file = cast(BufferedRandom, tempfile.NamedTemporaryFile(suffix=suffix, delete=delete_tmp, dir=dir))

528 except OSError:

529 self.logger.exception("Error creating named temporary file")

530 raise FileError("Error creating named temporary file")

531

532 assert tmp_file is not None

533 # Encode filename as bytes.

534 if isinstance(tmp_file.name, str):

535 fname = tmp_file.name.encode(sys.getfilesystemencoding())

536 else:

537 fname = cast(bytes, tmp_file.name) # pragma: no cover

538

539 self._actual_file_name = fname

540 return tmp_file

541

542 def write(self, data: bytes) -> int:

543 """Write some data to the File.

544

545 :param data: a bytestring

546 """

547 return self.on_data(data)

548

549 def on_data(self, data: bytes) -> int:

550 """This method is a callback that will be called whenever data is

551 written to the File.

552

553 Args:

554 data: The data to write to the file.

555

556 Returns:

557 The number of bytes written.

558 """

559 bwritten = self._fileobj.write(data)

560

561 # If the bytes written isn't the same as the length, just return.

562 if bwritten != len(data):

563 self.logger.warning("bwritten != len(data) (%d != %d)", bwritten, len(data))

564 return bwritten

565

566 # Keep track of how many bytes we've written.

567 self._bytes_written += bwritten

568

569 # If we're in-memory and are over our limit, we create a file.

570 max_memory_file_size = self._config.get("MAX_MEMORY_FILE_SIZE")

571 if self._in_memory and max_memory_file_size is not None and (self._bytes_written > max_memory_file_size):

572 self.logger.info("Flushing to disk")

573 self.flush_to_disk()

574

575 # Return the number of bytes written.

576 return bwritten

577

578 def on_end(self) -> None:

579 """This method is called whenever the Field is finalized."""

580 # Flush the underlying file object

581 self._fileobj.flush()

582

583 def finalize(self) -> None:

584 """Finalize the form file. This will not close the underlying file,

585 but simply signal that we are finished writing to the File.

586 """

587 self.on_end()

588

589 def close(self) -> None:

590 """Close the File object. This will actually close the underlying

591 file object (whether it's a :class:`io.BytesIO` or an actual file

592 object).

593 """

594 self._fileobj.close()

595

596 def __repr__(self) -> str:

597 return f"{self.__class__.__name__}(file_name={self.file_name!r}, field_name={self.field_name!r})"

598

599

600class BaseParser:

601 """This class is the base class for all parsers. It contains the logic for

602 calling and adding callbacks.

603

604 A callback can be one of two different forms. "Notification callbacks" are

605 callbacks that are called when something happens - for example, when a new

606 part of a multipart message is encountered by the parser. "Data callbacks"

607 are called when we get some sort of data - for example, part of the body of

608 a multipart chunk. Notification callbacks are called with no parameters,

609 whereas data callbacks are called with three, as follows::

610

611 data_callback(data, start, end)

612

613 The "data" parameter is a bytestring (i.e. "foo" on Python 2, or b"foo" on

614 Python 3). "start" and "end" are integer indexes into the "data" string

615 that represent the data of interest. Thus, in a data callback, the slice

616 `data[start:end]` represents the data that the callback is "interested in".

617 The callback is not passed a copy of the data, since copying severely hurts

618 performance.

619 """

620

621 def __init__(self) -> None:

622 self.logger = logging.getLogger(__name__)

623 self.callbacks: QuerystringCallbacks | OctetStreamCallbacks | MultipartCallbacks = {}

624

625 def callback(

626 self, name: CallbackName, data: bytes | None = None, start: int | None = None, end: int | None = None

627 ) -> None:

628 """This function calls a provided callback with some data. If the

629 callback is not set, will do nothing.

630

631 Args:

632 name: The name of the callback to call (as a string).

633 data: Data to pass to the callback. If None, then it is assumed that the callback is a notification

634 callback, and no parameters are given.

635 end: An integer that is passed to the data callback.

636 start: An integer that is passed to the data callback.

637 """

638 on_name = "on_" + name

639 func = self.callbacks.get(on_name)

640 if func is None:

641 return

642 func = cast("Callable[..., Any]", func)

643 # Depending on whether we're given a buffer...

644 if data is not None:

645 # Don't do anything if we have start == end.

646 if start is not None and start == end:

647 return

648

649 self.logger.debug("Calling %s with data[%d:%d]", on_name, start, end)

650 func(data, start, end)

651 else:

652 self.logger.debug("Calling %s with no data", on_name)

653 func()

654

655 def set_callback(self, name: CallbackName, new_func: Callable[..., Any] | None) -> None:

656 """Update the function for a callback. Removes from the callbacks dict

657 if new_func is None.

658

659 :param name: The name of the callback to call (as a string).

660

661 :param new_func: The new function for the callback. If None, then the

662 callback will be removed (with no error if it does not

663 exist).

664 """

665 if new_func is None:

666 self.callbacks.pop("on_" + name, None) # type: ignore[misc]

667 else:

668 self.callbacks["on_" + name] = new_func # type: ignore[literal-required]

669

670 def close(self) -> None:

671 pass # pragma: no cover

672

673 def finalize(self) -> None:

674 pass # pragma: no cover

675

676 def __repr__(self) -> str:

677 return "%s()" % self.__class__.__name__

678

679

680class OctetStreamParser(BaseParser):

681 """This parser parses an octet-stream request body and calls callbacks when

682 incoming data is received. Callbacks are as follows:

683

684 | Callback Name | Parameters | Description |

685 |----------------|-----------------|-----------------------------------------------------|

686 | on_start | None | Called when the first data is parsed. |

687 | on_data | data, start, end| Called for each data chunk that is parsed. |

688 | on_end | None | Called when the parser is finished parsing all data.|

689

690 Args:

691 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

692 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

693 """

694

695 def __init__(self, callbacks: OctetStreamCallbacks = {}, max_size: float = float("inf")):

696 super().__init__()

697 self.callbacks = callbacks

698 self._started = False

699

700 if not isinstance(max_size, Number) or max_size < 1:

701 raise ValueError("max_size must be a positive number, not %r" % max_size)

702 self.max_size: int | float = max_size

703 self._current_size = 0

704

705 def write(self, data: bytes) -> int:

706 """Write some data to the parser, which will perform size verification,

707 and then pass the data to the underlying callback.

708

709 Args:

710 data: The data to write to the parser.

711

712 Returns:

713 The number of bytes written.

714 """

715 if not self._started:

716 self.callback("start")

717 self._started = True

718

719 # Truncate data length.

720 data_len = len(data)

721 if (self._current_size + data_len) > self.max_size:

722 # We truncate the length of data that we are to process.

723 new_size = int(self.max_size - self._current_size)

724 self.logger.warning(

725 "Current size is %d (max %d), so truncating data length from %d to %d",

726 self._current_size,

727 self.max_size,

728 data_len,

729 new_size,

730 )

731 data_len = new_size

732

733 # Increment size, then callback, in case there's an exception.

734 self._current_size += data_len

735 self.callback("data", data, 0, data_len)

736 return data_len

737

738 def finalize(self) -> None:

739 """Finalize this parser, which signals to that we are finished parsing,

740 and sends the on_end callback.

741 """

742 self.callback("end")

743

744 def __repr__(self) -> str:

745 return "%s()" % self.__class__.__name__

746

747

748class QuerystringParser(BaseParser):

749 """This is a streaming querystring parser. It will consume data, and call

750 the callbacks given when it has data.

751

752 | Callback Name | Parameters | Description |

753 |----------------|-----------------|-----------------------------------------------------|

754 | on_field_start | None | Called when a new field is encountered. |

755 | on_field_name | data, start, end| Called when a portion of a field's name is encountered. |

756 | on_field_data | data, start, end| Called when a portion of a field's data is encountered. |

757 | on_field_end | None | Called when the end of a field is encountered. |

758 | on_end | None | Called when the parser is finished parsing all data.|

759

760 Args:

761 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

762 strict_parsing: Whether or not to parse the body strictly. Defaults to False. If this is set to True, then the

763 behavior of the parser changes as the following: if a field has a value with an equal sign

764 (e.g. "foo=bar", or "foo="), it is always included. If a field has no equals sign (e.g. "...&name&..."),

765 it will be treated as an error if 'strict_parsing' is True, otherwise included. If an error is encountered,

766 then a [`QuerystringParseError`][python_multipart.exceptions.QuerystringParseError] will be raised.

767 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

768 """ # noqa: E501

769

770 state: QuerystringState

771

772 def __init__(

773 self, callbacks: QuerystringCallbacks = {}, strict_parsing: bool = False, max_size: float = float("inf")

774 ) -> None:

775 super().__init__()

776 self.state = QuerystringState.BEFORE_FIELD

777 self._found_sep = False

778

779 self.callbacks = callbacks

780

781 # Max-size stuff

782 if not isinstance(max_size, Number) or max_size < 1:

783 raise ValueError("max_size must be a positive number, not %r" % max_size)

784 self.max_size: int | float = max_size

785 self._current_size = 0

786

787 # Should parsing be strict?

788 self.strict_parsing = strict_parsing

789

790 def write(self, data: bytes) -> int:

791 """Write some data to the parser, which will perform size verification,

792 parse into either a field name or value, and then pass the

793 corresponding data to the underlying callback. If an error is

794 encountered while parsing, a QuerystringParseError will be raised. The

795 "offset" attribute of the raised exception will be set to the offset in

796 the input data chunk (NOT the overall stream) that caused the error.

797

798 Args:

799 data: The data to write to the parser.

800

801 Returns:

802 The number of bytes written.

803 """

804 # Handle sizing.

805 data_len = len(data)

806 if (self._current_size + data_len) > self.max_size:

807 # We truncate the length of data that we are to process.

808 new_size = int(self.max_size - self._current_size)

809 self.logger.warning(

810 "Current size is %d (max %d), so truncating data length from %d to %d",

811 self._current_size,

812 self.max_size,

813 data_len,

814 new_size,

815 )

816 data_len = new_size

817

818 l = 0

819 try:

820 l = self._internal_write(data, data_len)

821 finally:

822 self._current_size += l

823

824 return l

825

826 def _internal_write(self, data: bytes, length: int) -> int:

827 state = self.state

828 strict_parsing = self.strict_parsing

829 found_sep = self._found_sep

830

831 i = 0

832 while i < length:

833 ch = data[i]

834

835 # Depending on our state...

836 if state == QuerystringState.BEFORE_FIELD:

837 # If the 'found_sep' flag is set, we've already encountered

838 # and skipped a single separator. If so, we check our strict

839 # parsing flag and decide what to do. Otherwise, we haven't

840 # yet reached a separator, and thus, if we do, we need to skip

841 # it as it will be the boundary between fields that's supposed

842 # to be there.

843 if ch == AMPERSAND or ch == SEMICOLON:

844 if found_sep:

845 # If we're parsing strictly, we disallow blank chunks.

846 if strict_parsing:

847 raise QuerystringParseError("Skipping duplicate ampersand/semicolon at %d" % i, offset=i)

848 else:

849 self.logger.debug("Skipping duplicate ampersand/semicolon at %d", i)

850 else:

851 # This case is when we're skipping the (first)

852 # separator between fields, so we just set our flag

853 # and continue on.

854 found_sep = True

855 else:

856 # Emit a field-start event, and go to that state. Also,

857 # reset the "found_sep" flag, for the next time we get to

858 # this state.

859 self.callback("field_start")

860 i -= 1

861 state = QuerystringState.FIELD_NAME

862 found_sep = False

863

864 elif state == QuerystringState.FIELD_NAME:

865 # Try and find a separator - we ensure that, if we do, we only

866 # look for the equal sign before it.

867 sep_pos = data.find(b"&", i)

868 if sep_pos == -1:

869 sep_pos = data.find(b";", i)

870

871 # See if we can find an equals sign in the remaining data. If

872 # so, we can immediately emit the field name and jump to the

873 # data state.

874 if sep_pos != -1:

875 equals_pos = data.find(b"=", i, sep_pos)

876 else:

877 equals_pos = data.find(b"=", i)

878

879 if equals_pos != -1:

880 # Emit this name.

881 self.callback("field_name", data, i, equals_pos)

882

883 # Jump i to this position. Note that it will then have 1

884 # added to it below, which means the next iteration of this

885 # loop will inspect the character after the equals sign.

886 i = equals_pos

887 state = QuerystringState.FIELD_DATA

888 else:

889 # No equals sign found.

890 if not strict_parsing:

891 # See also comments in the QuerystringState.FIELD_DATA case below.

892 # If we found the separator, we emit the name and just

893 # end - there's no data callback at all (not even with

894 # a blank value).

895 if sep_pos != -1:

896 self.callback("field_name", data, i, sep_pos)

897 self.callback("field_end")

898

899 i = sep_pos - 1

900 state = QuerystringState.BEFORE_FIELD

901 else:

902 # Otherwise, no separator in this block, so the

903 # rest of this chunk must be a name.

904 self.callback("field_name", data, i, length)

905 i = length

906

907 else:

908 # We're parsing strictly. If we find a separator,

909 # this is an error - we require an equals sign.

910 if sep_pos != -1:

911 raise QuerystringParseError(

912 "When strict_parsing is True, we require an "

913 "equals sign in all field chunks. Did not "

914 "find one in the chunk that starts at %d" % (i,),

915 offset=i,

916 )

917

918 # No separator in the rest of this chunk, so it's just

919 # a field name.

920 self.callback("field_name", data, i, length)

921 i = length

922

923 elif state == QuerystringState.FIELD_DATA:

924 # Try finding either an ampersand or a semicolon after this

925 # position.

926 sep_pos = data.find(b"&", i)

927 if sep_pos == -1:

928 sep_pos = data.find(b";", i)

929

930 # If we found it, callback this bit as data and then go back

931 # to expecting to find a field.

932 if sep_pos != -1:

933 self.callback("field_data", data, i, sep_pos)

934 self.callback("field_end")

935

936 # Note that we go to the separator, which brings us to the

937 # "before field" state. This allows us to properly emit

938 # "field_start" events only when we actually have data for

939 # a field of some sort.

940 i = sep_pos - 1

941 state = QuerystringState.BEFORE_FIELD

942

943 # Otherwise, emit the rest as data and finish.

944 else:

945 self.callback("field_data", data, i, length)

946 i = length

947

948 else: # pragma: no cover (error case)

949 msg = "Reached an unknown state %d at %d" % (state, i)

950 self.logger.warning(msg)

951 raise QuerystringParseError(msg, offset=i)

952

953 i += 1

954

955 self.state = state

956 self._found_sep = found_sep

957 return length

958

959 def finalize(self) -> None:

960 """Finalize this parser, which signals to that we are finished parsing,

961 if we're still in the middle of a field, an on_field_end callback, and

962 then the on_end callback.

963 """

964 # If we're currently in the middle of a field, we finish it.

965 if self.state in (QuerystringState.FIELD_DATA, QuerystringState.FIELD_NAME):

966 self.callback("field_end")

967 self.callback("end")

968

969 def __repr__(self) -> str:

970 return "{}(strict_parsing={!r}, max_size={!r})".format(

971 self.__class__.__name__, self.strict_parsing, self.max_size

972 )

973

974

975class MultipartParser(BaseParser):

976 """This class is a streaming multipart/form-data parser.

977

978 | Callback Name | Parameters | Description |

979 |--------------------|-----------------|-------------|

980 | on_part_begin | None | Called when a new part of the multipart message is encountered. |

981 | on_part_data | data, start, end| Called when a portion of a part's data is encountered. |

982 | on_part_end | None | Called when the end of a part is reached. |

983 | on_header_begin | None | Called when we've found a new header in a part of a multipart message |

984 | on_header_field | data, start, end| Called each time an additional portion of a header is read (i.e. the part of the header that is before the colon; the "Foo" in "Foo: Bar"). |

985 | on_header_value | data, start, end| Called when we get data for a header. |

986 | on_header_end | None | Called when the current header is finished - i.e. we've reached the newline at the end of the header. |

987 | on_headers_finished| None | Called when all headers are finished, and before the part data starts. |

988 | on_end | None | Called when the parser is finished parsing all data. |

989

990 Args:

991 boundary: The multipart boundary. This is required, and must match what is given in the HTTP request - usually in the Content-Type header.

992 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

993 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

994 max_header_count: The maximum number of headers allowed per part.

995 max_header_size: The maximum size of a single header line (excluding the trailing CRLF).

996 """ # noqa: E501