Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/python

1from __future__ import annotations

3import logging

4import os

5import shutil

6import sys

7import tempfile

8from email.message import Message

9from enum import IntEnum

10from io import BufferedRandom, BytesIO

11from numbers import Number

12from typing import TYPE_CHECKING, cast

14from .decoders import Base64Decoder, QuotedPrintableDecoder

15from .exceptions import FileError, FormParserError, MultipartParseError, QuerystringParseError

17if TYPE_CHECKING: # pragma: no cover

18 from collections.abc import Callable

19 from typing import Any, Literal, Protocol, TypeAlias, TypedDict

21 class SupportsRead(Protocol):

22 def read(self, __n: int) -> bytes: ...

24 class QuerystringCallbacks(TypedDict, total=False):

25 on_field_start: Callable[[], None]

26 on_field_name: Callable[[bytes, int, int], None]

27 on_field_data: Callable[[bytes, int, int], None]

28 on_field_end: Callable[[], None]

29 on_end: Callable[[], None]

31 class OctetStreamCallbacks(TypedDict, total=False):

32 on_start: Callable[[], None]

33 on_data: Callable[[bytes, int, int], None]

34 on_end: Callable[[], None]

36 class MultipartCallbacks(TypedDict, total=False):

37 on_part_begin: Callable[[], None]

38 on_part_data: Callable[[bytes, int, int], None]

39 on_part_end: Callable[[], None]

40 on_header_begin: Callable[[], None]

41 on_header_field: Callable[[bytes, int, int], None]

42 on_header_value: Callable[[bytes, int, int], None]

43 on_header_end: Callable[[], None]

44 on_headers_finished: Callable[[], None]

45 on_end: Callable[[], None]

47 class FormParserConfig(TypedDict):

48 UPLOAD_DIR: str | None

49 UPLOAD_KEEP_FILENAME: bool

50 UPLOAD_KEEP_EXTENSIONS: bool

51 UPLOAD_ERROR_ON_BAD_CTE: bool

52 MAX_MEMORY_FILE_SIZE: int

53 MAX_BODY_SIZE: float

55 class FileConfig(TypedDict, total=False):

56 UPLOAD_DIR: str | bytes | None

57 UPLOAD_DELETE_TMP: bool

58 UPLOAD_KEEP_FILENAME: bool

59 UPLOAD_KEEP_EXTENSIONS: bool

60 MAX_MEMORY_FILE_SIZE: int

62 class _FormProtocol(Protocol):

63 def write(self, data: bytes) -> int: ...

65 def finalize(self) -> None: ...

67 def close(self) -> None: ...

69 class FieldProtocol(_FormProtocol, Protocol):

70 def __init__(self, name: bytes | None) -> None: ...

72 def set_none(self) -> None: ...

74 class FileProtocol(_FormProtocol, Protocol):

75 def __init__(self, file_name: bytes | None, field_name: bytes | None, config: FileConfig) -> None: ...

77 OnFieldCallback = Callable[[FieldProtocol], None]

78 OnFileCallback = Callable[[FileProtocol], None]

80 CallbackName: TypeAlias = Literal[

81 "start",

82 "data",

83 "end",

84 "field_start",

85 "field_name",

86 "field_data",

87 "field_end",

88 "part_begin",

89 "part_data",

90 "part_end",

91 "header_begin",

92 "header_field",

93 "header_value",

94 "header_end",

95 "headers_finished",

96 ]

98# Unique missing object.

99_missing = object()

100

101

102class QuerystringState(IntEnum):

103 """Querystring parser states.

104

105 These are used to keep track of the state of the parser, and are used to determine

106 what to do when new data is encountered.

107 """

108

109 BEFORE_FIELD = 0

110 FIELD_NAME = 1

111 FIELD_DATA = 2

112

113

114class MultipartState(IntEnum):

115 """Multipart parser states.

116

117 These are used to keep track of the state of the parser, and are used to determine

118 what to do when new data is encountered.

119 """

120

121 START = 0

122 START_BOUNDARY = 1

123 HEADER_FIELD_START = 2

124 HEADER_FIELD = 3

125 HEADER_VALUE_START = 4

126 HEADER_VALUE = 5

127 HEADER_VALUE_ALMOST_DONE = 6

128 HEADERS_ALMOST_DONE = 7

129 PART_DATA_START = 8

130 PART_DATA = 9

131 PART_DATA_END = 10

132 END_BOUNDARY = 11

133 END = 12

134

135

136# Flags for the multipart parser.

137FLAG_PART_BOUNDARY = 1

138FLAG_LAST_BOUNDARY = 2

139

140# Get constants. Since iterating over a str on Python 2 gives you a 1-length

141# string, but iterating over a bytes object on Python 3 gives you an integer,

142# we need to save these constants.

143CR = b"\r"[0]

144LF = b"\n"[0]

145COLON = b":"[0]

146SPACE = b" "[0]

147HYPHEN = b"-"[0]

148AMPERSAND = b"&"[0]

149SEMICOLON = b";"[0]

150LOWER_A = b"a"[0]

151LOWER_Z = b"z"[0]

152NULL = b"\x00"[0]

153

154# fmt: off

155# Mask for ASCII characters that can be http tokens.

156# Per RFC7230 - 3.2.6, this is all alpha-numeric characters

157# and these: !#$%&'*+-.^_`|~

158TOKEN_CHARS_SET = frozenset(

159 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"

160 b"abcdefghijklmnopqrstuvwxyz"

161 b"0123456789"

162 b"!#$%&'*+-.^_`|~")

163# fmt: on

164

165

166def parse_options_header(value: str | bytes | None) -> tuple[bytes, dict[bytes, bytes]]:

167 """Parses a Content-Type header into a value in the following format: (content_type, {parameters})."""

168 # Uses email.message.Message to parse the header as described in PEP 594.

169 # Ref: https://peps.python.org/pep-0594/#cgi

170 if not value:

171 return (b"", {})

172

173 # If we are passed bytes, we assume that it conforms to WSGI, encoding in latin-1.

174 if isinstance(value, bytes): # pragma: no cover

175 value = value.decode("latin-1")

176

177 # For types

178 assert isinstance(value, str), "Value should be a string by now"

179

180 # If we have no options, return the string as-is.

181 if ";" not in value:

182 return (value.lower().strip().encode("latin-1"), {})

183

184 # Split at the first semicolon, to get our value and then options.

185 # ctype, rest = value.split(b';', 1)

186 message = Message()

187 message["content-type"] = value

188 params = message.get_params()

189 # If there were no parameters, this would have already returned above

190 assert params, "At least the content type value should be present"

191 ctype = params.pop(0)[0].encode("latin-1")

192 options: dict[bytes, bytes] = {}

193 for param in params:

194 key, value = param

195 # If the value returned from get_params() is a 3-tuple, the last

196 # element corresponds to the value.

197 # See: https://docs.python.org/3/library/email.compat32-message.html

198 if isinstance(value, tuple):

199 value = value[-1]

200 # If the value is a filename, we need to fix a bug on IE6 that sends

201 # the full file path instead of the filename.

202 if key == "filename":

203 if value[1:3] == ":\\" or value[:2] == "\\\\":

204 value = value.split("\\")[-1]

205 options[key.encode("latin-1")] = value.encode("latin-1")

206 return ctype, options

207

208

209class Field:

210 """A Field object represents a (parsed) form field. It represents a single

211 field with a corresponding name and value.

212

213 The name that a :class:`Field` will be instantiated with is the same name

214 that would be found in the following HTML::

215

216 <input name="name_goes_here" type="text"/>

217

218 This class defines two methods, :meth:`on_data` and :meth:`on_end`, that

219 will be called when data is written to the Field, and when the Field is

220 finalized, respectively.

221

222 Args:

223 name: The name of the form field.

224 """

225

226 def __init__(self, name: bytes | None) -> None:

227 self._name = name

228 self._value: list[bytes] = []

229

230 # We cache the joined version of _value for speed.

231 self._cache = _missing

232

233 @classmethod

234 def from_value(cls, name: bytes, value: bytes | None) -> Field:

235 """Create an instance of a :class:`Field`, and set the corresponding

236 value - either None or an actual value. This method will also

237 finalize the Field itself.

238

239 Args:

240 name: the name of the form field.

241 value: the value of the form field - either a bytestring or None.

242

243 Returns:

244 A new instance of a [`Field`][python_multipart.Field].

245 """

246

247 f = cls(name)

248 if value is None:

249 f.set_none()

250 else:

251 f.write(value)

252 f.finalize()

253 return f

254

255 def write(self, data: bytes) -> int:

256 """Write some data into the form field.

257

258 Args:

259 data: The data to write to the field.

260

261 Returns:

262 The number of bytes written.

263 """

264 return self.on_data(data)

265

266 def on_data(self, data: bytes) -> int:

267 """This method is a callback that will be called whenever data is

268 written to the Field.

269

270 Args:

271 data: The data to write to the field.

272

273 Returns:

274 The number of bytes written.

275 """

276 self._value.append(data)

277 self._cache = _missing

278 return len(data)

279

280 def on_end(self) -> None:

281 """This method is called whenever the Field is finalized."""

282 if self._cache is _missing:

283 self._cache = b"".join(self._value)

284

285 def finalize(self) -> None:

286 """Finalize the form field."""

287 self.on_end()

288

289 def close(self) -> None:

290 """Close the Field object. This will free any underlying cache."""

291 # Free our value array.

292 if self._cache is _missing:

293 self._cache = b"".join(self._value)

294

295 del self._value

296

297 def set_none(self) -> None:

298 """Some fields in a querystring can possibly have a value of None - for

299 example, the string "foo&bar=&baz=asdf" will have a field with the

300 name "foo" and value None, one with name "bar" and value "", and one

301 with name "baz" and value "asdf". Since the write() interface doesn't

302 support writing None, this function will set the field value to None.

303 """

304 self._cache = None

305

306 @property

307 def field_name(self) -> bytes | None:

308 """This property returns the name of the field."""

309 return self._name

310

311 @property

312 def value(self) -> bytes | None:

313 """This property returns the value of the form field."""

314 if self._cache is _missing:

315 self._cache = b"".join(self._value)

316

317 assert isinstance(self._cache, bytes) or self._cache is None

318 return self._cache

319

320 def __eq__(self, other: object) -> bool:

321 if isinstance(other, Field):

322 return self.field_name == other.field_name and self.value == other.value

323 else:

324 return NotImplemented

325

326 def __repr__(self) -> str:

327 if self.value is not None and len(self.value) > 97:

328 # We get the repr, and then insert three dots before the final

329 # quote.

330 v = repr(self.value[:97])[:-1] + "...'"

331 else:

332 v = repr(self.value)

333

334 return f"{self.__class__.__name__}(field_name={self.field_name!r}, value={v})"

335

336

337class File:

338 """This class represents an uploaded file. It handles writing file data to

339 either an in-memory file or a temporary file on-disk, if the optional

340 threshold is passed.

341

342 There are some options that can be passed to the File to change behavior

343 of the class. Valid options are as follows:

344

346 |-----------------------|-------|---------|-------------|

349 | UPLOAD_KEEP_FILENAME | `bool`| False | Whether or not to keep the filename of the uploaded file. If True, then the filename will be converted to a safe representation (e.g. by removing any invalid path segments), and then saved with the same name). Otherwise, a temporary name will be used. |

350 | UPLOAD_KEEP_EXTENSIONS| `bool`| False | Whether or not to keep the uploaded file's extension. If False, the file will be saved with the default temporary extension (usually ".tmp"). Otherwise, the file's extension will be maintained. Note that this will properly combine with the UPLOAD_KEEP_FILENAME setting. |

351 | MAX_MEMORY_FILE_SIZE | `int` | 1 MiB | The maximum number of bytes of a File to keep in memory. By default, the contents of a File are kept into memory until a certain limit is reached, after which the contents of the File are written to a temporary file. This behavior can be disabled by setting this value to an appropriately large value (or, for example, infinity, such as `float('inf')`. |

352

353 Args:

354 file_name: The name of the file that this [`File`][python_multipart.File] represents.

355 field_name: The name of the form field that this file was uploaded with. This can be None, if, for example,

356 the file was uploaded with Content-Type application/octet-stream.

357 config: The configuration for this File. See above for valid configuration keys and their corresponding values.

358 """ # noqa: E501

359

360 def __init__(self, file_name: bytes | None, field_name: bytes | None = None, config: FileConfig = {}) -> None:

361 # Save configuration, set other variables default.

362 self.logger = logging.getLogger(__name__)

363 self._config = config

364 self._in_memory = True

365 self._bytes_written = 0

366 self._fileobj: BytesIO | BufferedRandom = BytesIO()

367

368 # Save the provided field/file name.

369 self._field_name = field_name

370 self._file_name = file_name

371

372 # Our actual file name is None by default, since, depending on our

373 # config, we may not actually use the provided name.

374 self._actual_file_name: bytes | None = None

375

376 # Split the extension from the filename.

377 if file_name is not None:

378 base, ext = os.path.splitext(file_name)

379 self._file_base = base

380 self._ext = ext

381

382 @property

383 def field_name(self) -> bytes | None:

384 """The form field associated with this file. May be None if there isn't

385 one, for example when we have an application/octet-stream upload.

386 """

387 return self._field_name

388

389 @property

390 def file_name(self) -> bytes | None:

391 """The file name given in the upload request."""

392 return self._file_name

393

394 @property

395 def actual_file_name(self) -> bytes | None:

396 """The file name that this file is saved as. Will be None if it's not

397 currently saved on disk.

398 """

399 return self._actual_file_name

400

401 @property

402 def file_object(self) -> BytesIO | BufferedRandom:

403 """The file object that we're currently writing to. Note that this

404 will either be an instance of a :class:`io.BytesIO`, or a regular file

405 object.

406 """

407 return self._fileobj

408

409 @property

410 def size(self) -> int:

411 """The total size of this file, counted as the number of bytes that

412 currently have been written to the file.

413 """

414 return self._bytes_written

415

416 @property

417 def in_memory(self) -> bool:

418 """A boolean representing whether or not this file object is currently

419 stored in-memory or on-disk.

420 """

421 return self._in_memory

422

423 def flush_to_disk(self) -> None:

424 """If the file is already on-disk, do nothing. Otherwise, copy from

425 the in-memory buffer to a disk file, and then reassign our internal

426 file object to this new disk file.

427

428 Note that if you attempt to flush a file that is already on-disk, a

429 warning will be logged to this module's logger.

430 """

431 if not self._in_memory:

432 self.logger.warning("Trying to flush to disk when we're not in memory")

433 return

434

435 # Go back to the start of our file.

436 self._fileobj.seek(0)

437

438 # Open a new file.

439 new_file = self._get_disk_file()

440

441 # Copy the file objects.

442 shutil.copyfileobj(self._fileobj, new_file)

443

444 # Seek to the new position in our new file.

445 new_file.seek(self._bytes_written)

446

447 # Reassign the fileobject.

448 old_fileobj = self._fileobj

449 self._fileobj = new_file

450

451 # We're no longer in memory.

452 self._in_memory = False

453

454 # Close the old file object.

455 old_fileobj.close()

456

457 def _get_disk_file(self) -> BufferedRandom:

458 """This function is responsible for getting a file object on-disk for us."""

459 self.logger.info("Opening a file on disk")

460

461 file_dir = self._config.get("UPLOAD_DIR")

462 keep_filename = self._config.get("UPLOAD_KEEP_FILENAME", False)

463 keep_extensions = self._config.get("UPLOAD_KEEP_EXTENSIONS", False)

464 delete_tmp = self._config.get("UPLOAD_DELETE_TMP", True)

465 tmp_file: None | BufferedRandom = None

466

467 # If we have a directory and are to keep the filename...

468 if file_dir is not None and keep_filename:

469 self.logger.info("Saving with filename in: %r", file_dir)

470

471 # Build our filename.

472 # TODO: what happens if we don't have a filename?

473 fname = self._file_base + self._ext if keep_extensions else self._file_base

474

475 path = os.path.join(file_dir, fname) # type: ignore[arg-type]

476 try:

477 self.logger.info("Opening file: %r", path)

478 tmp_file = open(path, "w+b")

479 except OSError:

480 tmp_file = None

481

482 self.logger.exception("Error opening temporary file")

483 raise FileError("Error opening temporary file: %r" % path)

484 else:

485 # Build options array.

486 # Note that on Python 3, tempfile doesn't support byte names. We

487 # encode our paths using the default filesystem encoding.

488 suffix = self._ext.decode(sys.getfilesystemencoding()) if keep_extensions else None

489

490 if file_dir is None:

491 dir = None

492 elif isinstance(file_dir, bytes):

493 dir = file_dir.decode(sys.getfilesystemencoding())

494 else:

495 dir = file_dir # pragma: no cover

496

497 # Create a temporary (named) file with the appropriate settings.

498 self.logger.info(

499 "Creating a temporary file with options: %r", {"suffix": suffix, "delete": delete_tmp, "dir": dir}

500 )

501 try:

502 tmp_file = cast(BufferedRandom, tempfile.NamedTemporaryFile(suffix=suffix, delete=delete_tmp, dir=dir))

503 except OSError:

504 self.logger.exception("Error creating named temporary file")

505 raise FileError("Error creating named temporary file")

506

507 assert tmp_file is not None

508 # Encode filename as bytes.

509 if isinstance(tmp_file.name, str):

510 fname = tmp_file.name.encode(sys.getfilesystemencoding())

511 else:

512 fname = cast(bytes, tmp_file.name) # pragma: no cover

513

514 self._actual_file_name = fname

515 return tmp_file

516

517 def write(self, data: bytes) -> int:

518 """Write some data to the File.

519

520 :param data: a bytestring

521 """

522 return self.on_data(data)

523

524 def on_data(self, data: bytes) -> int:

525 """This method is a callback that will be called whenever data is

526 written to the File.

527

528 Args:

529 data: The data to write to the file.

530

531 Returns:

532 The number of bytes written.

533 """

534 bwritten = self._fileobj.write(data)

535

536 # If the bytes written isn't the same as the length, just return.

537 if bwritten != len(data):

538 self.logger.warning("bwritten != len(data) (%d != %d)", bwritten, len(data))

539 return bwritten

540

541 # Keep track of how many bytes we've written.

542 self._bytes_written += bwritten

543

544 # If we're in-memory and are over our limit, we create a file.

545 max_memory_file_size = self._config.get("MAX_MEMORY_FILE_SIZE")

546 if self._in_memory and max_memory_file_size is not None and (self._bytes_written > max_memory_file_size):

547 self.logger.info("Flushing to disk")

548 self.flush_to_disk()

549

550 # Return the number of bytes written.

551 return bwritten

552

553 def on_end(self) -> None:

554 """This method is called whenever the Field is finalized."""

555 # Flush the underlying file object

556 self._fileobj.flush()

557

558 def finalize(self) -> None:

559 """Finalize the form file. This will not close the underlying file,

560 but simply signal that we are finished writing to the File.

561 """

562 self.on_end()

563

564 def close(self) -> None:

565 """Close the File object. This will actually close the underlying

566 file object (whether it's a :class:`io.BytesIO` or an actual file

567 object).

568 """

569 self._fileobj.close()

570

571 def __repr__(self) -> str:

572 return f"{self.__class__.__name__}(file_name={self.file_name!r}, field_name={self.field_name!r})"

573

574

575class BaseParser:

576 """This class is the base class for all parsers. It contains the logic for

577 calling and adding callbacks.

578

579 A callback can be one of two different forms. "Notification callbacks" are

580 callbacks that are called when something happens - for example, when a new

581 part of a multipart message is encountered by the parser. "Data callbacks"

582 are called when we get some sort of data - for example, part of the body of

583 a multipart chunk. Notification callbacks are called with no parameters,

584 whereas data callbacks are called with three, as follows::

585

586 data_callback(data, start, end)

587

588 The "data" parameter is a bytestring (i.e. "foo" on Python 2, or b"foo" on

589 Python 3). "start" and "end" are integer indexes into the "data" string

590 that represent the data of interest. Thus, in a data callback, the slice

591 `data[start:end]` represents the data that the callback is "interested in".

592 The callback is not passed a copy of the data, since copying severely hurts

593 performance.

594 """

595

596 def __init__(self) -> None:

597 self.logger = logging.getLogger(__name__)

598 self.callbacks: QuerystringCallbacks | OctetStreamCallbacks | MultipartCallbacks = {}

599

600 def callback(

601 self, name: CallbackName, data: bytes | None = None, start: int | None = None, end: int | None = None

602 ) -> None:

603 """This function calls a provided callback with some data. If the

604 callback is not set, will do nothing.

605

606 Args:

607 name: The name of the callback to call (as a string).

608 data: Data to pass to the callback. If None, then it is assumed that the callback is a notification

609 callback, and no parameters are given.

610 end: An integer that is passed to the data callback.

611 start: An integer that is passed to the data callback.

612 """

613 on_name = "on_" + name

614 func = self.callbacks.get(on_name)

615 if func is None:

616 return

617 func = cast("Callable[..., Any]", func)

618 # Depending on whether we're given a buffer...

619 if data is not None:

620 # Don't do anything if we have start == end.

621 if start is not None and start == end:

622 return

623

624 self.logger.debug("Calling %s with data[%d:%d]", on_name, start, end)

625 func(data, start, end)

626 else:

627 self.logger.debug("Calling %s with no data", on_name)

628 func()

629

630 def set_callback(self, name: CallbackName, new_func: Callable[..., Any] | None) -> None:

631 """Update the function for a callback. Removes from the callbacks dict

632 if new_func is None.

633

634 :param name: The name of the callback to call (as a string).

635

636 :param new_func: The new function for the callback. If None, then the

637 callback will be removed (with no error if it does not

638 exist).

639 """

640 if new_func is None:

641 self.callbacks.pop("on_" + name, None) # type: ignore[misc]

642 else:

643 self.callbacks["on_" + name] = new_func # type: ignore[literal-required]

644

645 def close(self) -> None:

646 pass # pragma: no cover

647

648 def finalize(self) -> None:

649 pass # pragma: no cover

650

651 def __repr__(self) -> str:

652 return "%s()" % self.__class__.__name__

653

654

655class OctetStreamParser(BaseParser):

656 """This parser parses an octet-stream request body and calls callbacks when

657 incoming data is received. Callbacks are as follows:

658

659 | Callback Name | Parameters | Description |

660 |----------------|-----------------|-----------------------------------------------------|

661 | on_start | None | Called when the first data is parsed. |

662 | on_data | data, start, end| Called for each data chunk that is parsed. |

663 | on_end | None | Called when the parser is finished parsing all data.|

664

665 Args:

666 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

667 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

668 """

669

670 def __init__(self, callbacks: OctetStreamCallbacks = {}, max_size: float = float("inf")):

671 super().__init__()

672 self.callbacks = callbacks

673 self._started = False

674

675 if not isinstance(max_size, Number) or max_size < 1:

676 raise ValueError("max_size must be a positive number, not %r" % max_size)

677 self.max_size: int | float = max_size

678 self._current_size = 0

679

680 def write(self, data: bytes) -> int:

681 """Write some data to the parser, which will perform size verification,

682 and then pass the data to the underlying callback.

683

684 Args:

685 data: The data to write to the parser.

686

687 Returns:

688 The number of bytes written.

689 """

690 if not self._started:

691 self.callback("start")

692 self._started = True

693

694 # Truncate data length.

695 data_len = len(data)

696 if (self._current_size + data_len) > self.max_size:

697 # We truncate the length of data that we are to process.

698 new_size = int(self.max_size - self._current_size)

699 self.logger.warning(

700 "Current size is %d (max %d), so truncating data length from %d to %d",

701 self._current_size,

702 self.max_size,

703 data_len,

704 new_size,

705 )

706 data_len = new_size

707

708 # Increment size, then callback, in case there's an exception.

709 self._current_size += data_len

710 self.callback("data", data, 0, data_len)

711 return data_len

712

713 def finalize(self) -> None:

714 """Finalize this parser, which signals to that we are finished parsing,

715 and sends the on_end callback.

716 """

717 self.callback("end")

718

719 def __repr__(self) -> str:

720 return "%s()" % self.__class__.__name__

721

722

723class QuerystringParser(BaseParser):

724 """This is a streaming querystring parser. It will consume data, and call

725 the callbacks given when it has data.

726

727 | Callback Name | Parameters | Description |

728 |----------------|-----------------|-----------------------------------------------------|

729 | on_field_start | None | Called when a new field is encountered. |

730 | on_field_name | data, start, end| Called when a portion of a field's name is encountered. |

731 | on_field_data | data, start, end| Called when a portion of a field's data is encountered. |

732 | on_field_end | None | Called when the end of a field is encountered. |

733 | on_end | None | Called when the parser is finished parsing all data.|

734

735 Args:

736 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

737 strict_parsing: Whether or not to parse the body strictly. Defaults to False. If this is set to True, then the

738 behavior of the parser changes as the following: if a field has a value with an equal sign

739 (e.g. "foo=bar", or "foo="), it is always included. If a field has no equals sign (e.g. "...&name&..."),

740 it will be treated as an error if 'strict_parsing' is True, otherwise included. If an error is encountered,

741 then a [`QuerystringParseError`][python_multipart.exceptions.QuerystringParseError] will be raised.

742 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

743 """ # noqa: E501

744

745 state: QuerystringState

746

747 def __init__(

748 self, callbacks: QuerystringCallbacks = {}, strict_parsing: bool = False, max_size: float = float("inf")

749 ) -> None:

750 super().__init__()

751 self.state = QuerystringState.BEFORE_FIELD

752 self._found_sep = False

753

754 self.callbacks = callbacks

755

756 # Max-size stuff

757 if not isinstance(max_size, Number) or max_size < 1:

758 raise ValueError("max_size must be a positive number, not %r" % max_size)

759 self.max_size: int | float = max_size

760 self._current_size = 0

761

762 # Should parsing be strict?

763 self.strict_parsing = strict_parsing

764

765 def write(self, data: bytes) -> int:

766 """Write some data to the parser, which will perform size verification,

767 parse into either a field name or value, and then pass the

768 corresponding data to the underlying callback. If an error is

769 encountered while parsing, a QuerystringParseError will be raised. The

770 "offset" attribute of the raised exception will be set to the offset in

771 the input data chunk (NOT the overall stream) that caused the error.

772

773 Args:

774 data: The data to write to the parser.

775

776 Returns:

777 The number of bytes written.

778 """

779 # Handle sizing.

780 data_len = len(data)

781 if (self._current_size + data_len) > self.max_size:

782 # We truncate the length of data that we are to process.

783 new_size = int(self.max_size - self._current_size)

784 self.logger.warning(

785 "Current size is %d (max %d), so truncating data length from %d to %d",

786 self._current_size,

787 self.max_size,

788 data_len,

789 new_size,

790 )

791 data_len = new_size

792

793 l = 0

794 try:

795 l = self._internal_write(data, data_len)

796 finally:

797 self._current_size += l

798

799 return l

800

801 def _internal_write(self, data: bytes, length: int) -> int:

802 state = self.state

803 strict_parsing = self.strict_parsing

804 found_sep = self._found_sep

805

806 i = 0

807 while i < length:

808 ch = data[i]

809

810 # Depending on our state...

811 if state == QuerystringState.BEFORE_FIELD:

812 # If the 'found_sep' flag is set, we've already encountered

813 # and skipped a single separator. If so, we check our strict

814 # parsing flag and decide what to do. Otherwise, we haven't

815 # yet reached a separator, and thus, if we do, we need to skip

816 # it as it will be the boundary between fields that's supposed

817 # to be there.

818 if ch == AMPERSAND or ch == SEMICOLON:

819 if found_sep:

820 # If we're parsing strictly, we disallow blank chunks.

821 if strict_parsing:

822 e = QuerystringParseError("Skipping duplicate ampersand/semicolon at %d" % i)

823 e.offset = i

824 raise e

825 else:

826 self.logger.debug("Skipping duplicate ampersand/semicolon at %d", i)

827 else:

828 # This case is when we're skipping the (first)

829 # separator between fields, so we just set our flag

830 # and continue on.

831 found_sep = True

832 else:

833 # Emit a field-start event, and go to that state. Also,

834 # reset the "found_sep" flag, for the next time we get to

835 # this state.

836 self.callback("field_start")

837 i -= 1

838 state = QuerystringState.FIELD_NAME

839 found_sep = False

840

841 elif state == QuerystringState.FIELD_NAME:

842 # Try and find a separator - we ensure that, if we do, we only

843 # look for the equal sign before it.

844 sep_pos = data.find(b"&", i)

845 if sep_pos == -1:

846 sep_pos = data.find(b";", i)

847

848 # See if we can find an equals sign in the remaining data. If

849 # so, we can immediately emit the field name and jump to the

850 # data state.

851 if sep_pos != -1:

852 equals_pos = data.find(b"=", i, sep_pos)

853 else:

854 equals_pos = data.find(b"=", i)

855

856 if equals_pos != -1:

857 # Emit this name.

858 self.callback("field_name", data, i, equals_pos)

859

860 # Jump i to this position. Note that it will then have 1

861 # added to it below, which means the next iteration of this

862 # loop will inspect the character after the equals sign.

863 i = equals_pos

864 state = QuerystringState.FIELD_DATA

865 else:

866 # No equals sign found.

867 if not strict_parsing:

868 # See also comments in the QuerystringState.FIELD_DATA case below.

869 # If we found the separator, we emit the name and just

870 # end - there's no data callback at all (not even with

871 # a blank value).

872 if sep_pos != -1:

873 self.callback("field_name", data, i, sep_pos)

874 self.callback("field_end")

875

876 i = sep_pos - 1

877 state = QuerystringState.BEFORE_FIELD

878 else:

879 # Otherwise, no separator in this block, so the

880 # rest of this chunk must be a name.

881 self.callback("field_name", data, i, length)

882 i = length

883

884 else:

885 # We're parsing strictly. If we find a separator,

886 # this is an error - we require an equals sign.

887 if sep_pos != -1:

888 e = QuerystringParseError(

889 "When strict_parsing is True, we require an "

890 "equals sign in all field chunks. Did not "

891 "find one in the chunk that starts at %d" % (i,)

892 )

893 e.offset = i

894 raise e

895

896 # No separator in the rest of this chunk, so it's just

897 # a field name.

898 self.callback("field_name", data, i, length)

899 i = length

900

901 elif state == QuerystringState.FIELD_DATA:

902 # Try finding either an ampersand or a semicolon after this

903 # position.

904 sep_pos = data.find(b"&", i)

905 if sep_pos == -1:

906 sep_pos = data.find(b";", i)

907

908 # If we found it, callback this bit as data and then go back

909 # to expecting to find a field.

910 if sep_pos != -1:

911 self.callback("field_data", data, i, sep_pos)

912 self.callback("field_end")

913

914 # Note that we go to the separator, which brings us to the

915 # "before field" state. This allows us to properly emit

916 # "field_start" events only when we actually have data for

917 # a field of some sort.

918 i = sep_pos - 1

919 state = QuerystringState.BEFORE_FIELD

920

921 # Otherwise, emit the rest as data and finish.

922 else:

923 self.callback("field_data", data, i, length)

924 i = length

925

926 else: # pragma: no cover (error case)

927 msg = "Reached an unknown state %d at %d" % (state, i)

928 self.logger.warning(msg)

929 e = QuerystringParseError(msg)

930 e.offset = i

931 raise e

932

933 i += 1

934

935 self.state = state

936 self._found_sep = found_sep

937 return len(data)

938

939 def finalize(self) -> None:

940 """Finalize this parser, which signals to that we are finished parsing,

941 if we're still in the middle of a field, an on_field_end callback, and

942 then the on_end callback.

943 """

944 # If we're currently in the middle of a field, we finish it.

945 if self.state == QuerystringState.FIELD_DATA:

946 self.callback("field_end")

947 self.callback("end")

948

949 def __repr__(self) -> str:

950 return "{}(strict_parsing={!r}, max_size={!r})".format(

951 self.__class__.__name__, self.strict_parsing, self.max_size

952 )

953

954

955class MultipartParser(BaseParser):

956 """This class is a streaming multipart/form-data parser.

957

958 | Callback Name | Parameters | Description |

959 |--------------------|-----------------|-------------|

960 | on_part_begin | None | Called when a new part of the multipart message is encountered. |

961 | on_part_data | data, start, end| Called when a portion of a part's data is encountered. |

962 | on_part_end | None | Called when the end of a part is reached. |

963 | on_header_begin | None | Called when we've found a new header in a part of a multipart message |

964 | on_header_field | data, start, end| Called each time an additional portion of a header is read (i.e. the part of the header that is before the colon; the "Foo" in "Foo: Bar"). |

965 | on_header_value | data, start, end| Called when we get data for a header. |

966 | on_header_end | None | Called when the current header is finished - i.e. we've reached the newline at the end of the header. |

967 | on_headers_finished| None | Called when all headers are finished, and before the part data starts. |

968 | on_end | None | Called when the parser is finished parsing all data. |

969

970 Args:

971 boundary: The multipart boundary. This is required, and must match what is given in the HTTP request - usually in the Content-Type header.

972 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][python_multipart.BaseParser].

973 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

974 """ # noqa: E501

975

976 def __init__(