Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/multipart/multipart.py: 81%

1from __future__ import annotations

3import io

4import logging

5import os

6import shutil

7import sys

8import tempfile

9from email.message import Message

10from enum import IntEnum

11from io import BytesIO

12from numbers import Number

13from typing import TYPE_CHECKING, Any

15from .decoders import Base64Decoder, QuotedPrintableDecoder

16from .exceptions import FileError, FormParserError, MultipartParseError, QuerystringParseError

18if TYPE_CHECKING: # pragma: no cover

19 from typing import Callable, Protocol, TypedDict

21 class QuerystringCallbacks(TypedDict, total=False):

22 on_field_start: Callable[[], None]

23 on_field_name: Callable[[bytes, int, int], None]

24 on_field_data: Callable[[bytes, int, int], None]

25 on_field_end: Callable[[], None]

26 on_end: Callable[[], None]

28 class OctetStreamCallbacks(TypedDict, total=False):

29 on_start: Callable[[], None]

30 on_data: Callable[[bytes, int, int], None]

31 on_end: Callable[[], None]

33 class MultipartCallbacks(TypedDict, total=False):

34 on_part_begin: Callable[[], None]

35 on_part_data: Callable[[bytes, int, int], None]

36 on_part_end: Callable[[], None]

37 on_header_begin: Callable[[], None]

38 on_header_field: Callable[[bytes, int, int], None]

39 on_header_value: Callable[[bytes, int, int], None]

40 on_header_end: Callable[[], None]

41 on_headers_finished: Callable[[], None]

42 on_end: Callable[[], None]

44 class FormParserConfig(TypedDict):

45 UPLOAD_DIR: str | None

46 UPLOAD_KEEP_FILENAME: bool

47 UPLOAD_KEEP_EXTENSIONS: bool

48 UPLOAD_ERROR_ON_BAD_CTE: bool

49 MAX_MEMORY_FILE_SIZE: int

50 MAX_BODY_SIZE: float

52 class FileConfig(TypedDict, total=False):

53 UPLOAD_DIR: str | bytes | None

54 UPLOAD_DELETE_TMP: bool

55 UPLOAD_KEEP_FILENAME: bool

56 UPLOAD_KEEP_EXTENSIONS: bool

57 MAX_MEMORY_FILE_SIZE: int

59 class _FormProtocol(Protocol):

60 def write(self, data: bytes) -> int:

61 ...

63 def finalize(self) -> None:

64 ...

66 def close(self) -> None:

67 ...

69 class FieldProtocol(_FormProtocol, Protocol):

70 def __init__(self, name: bytes) -> None:

71 ...

73 def set_none(self) -> None:

74 ...

76 class FileProtocol(_FormProtocol, Protocol):

77 def __init__(self, file_name: bytes | None, field_name: bytes | None, config: FileConfig) -> None:

78 ...

80 OnFieldCallback = Callable[[FieldProtocol], None]

81 OnFileCallback = Callable[[FileProtocol], None]

84# Unique missing object.

85_missing = object()

88class QuerystringState(IntEnum):

89 """Querystring parser states.

91 These are used to keep track of the state of the parser, and are used to determine

92 what to do when new data is encountered.

93 """

95 BEFORE_FIELD = 0

96 FIELD_NAME = 1

97 FIELD_DATA = 2

100class MultipartState(IntEnum):

101 """Multipart parser states.

102

103 These are used to keep track of the state of the parser, and are used to determine

104 what to do when new data is encountered.

105 """

106

107 START = 0

108 START_BOUNDARY = 1

109 HEADER_FIELD_START = 2

110 HEADER_FIELD = 3

111 HEADER_VALUE_START = 4

112 HEADER_VALUE = 5

113 HEADER_VALUE_ALMOST_DONE = 6

114 HEADERS_ALMOST_DONE = 7

115 PART_DATA_START = 8

116 PART_DATA = 9

117 PART_DATA_END = 10

118 END = 11

119

120

121# Flags for the multipart parser.

122FLAG_PART_BOUNDARY = 1

123FLAG_LAST_BOUNDARY = 2

124

125# Get constants. Since iterating over a str on Python 2 gives you a 1-length

126# string, but iterating over a bytes object on Python 3 gives you an integer,

127# we need to save these constants.

128CR = b"\r"[0]

129LF = b"\n"[0]

130COLON = b":"[0]

131SPACE = b" "[0]

132HYPHEN = b"-"[0]

133AMPERSAND = b"&"[0]

134SEMICOLON = b";"[0]

135LOWER_A = b"a"[0]

136LOWER_Z = b"z"[0]

137NULL = b"\x00"[0]

138

139# Mask for ASCII characters that can be http tokens.

140# Per RFC7230 - 3.2.6, this is all alpha-numeric characters

141# and these: !#$%&'*+-.^_`|~

142TOKEN_CHARS_SET = frozenset(

143 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"

144 b"abcdefghijklmnopqrstuvwxyz"

145 b"0123456789"

146 b"!#$%&'*+-.^_`|~")

147

148

149def ord_char(c: int) -> int:

150 return c

151

152

153def join_bytes(b: bytes) -> bytes:

154 return bytes(list(b))

155

156

157def parse_options_header(value: str | bytes) -> tuple[bytes, dict[bytes, bytes]]:

158 """Parses a Content-Type header into a value in the following format: (content_type, {parameters})."""

159 # Uses email.message.Message to parse the header as described in PEP 594.

160 # Ref: https://peps.python.org/pep-0594/#cgi

161 if not value:

162 return (b"", {})

163

164 # If we are passed bytes, we assume that it conforms to WSGI, encoding in latin-1.

165 if isinstance(value, bytes): # pragma: no cover

166 value = value.decode("latin-1")

167

168 # For types

169 assert isinstance(value, str), "Value should be a string by now"

170

171 # If we have no options, return the string as-is.

172 if ";" not in value:

173 return (value.lower().strip().encode("latin-1"), {})

174

175 # Split at the first semicolon, to get our value and then options.

176 # ctype, rest = value.split(b';', 1)

177 message = Message()

178 message["content-type"] = value

179 params = message.get_params()

180 # If there were no parameters, this would have already returned above

181 assert params, "At least the content type value should be present"

182 ctype = params.pop(0)[0].encode("latin-1")

183 options: dict[bytes, bytes] = {}

184 for param in params:

185 key, value = param

186 # If the value returned from get_params() is a 3-tuple, the last

187 # element corresponds to the value.

188 # See: https://docs.python.org/3/library/email.compat32-message.html

189 if isinstance(value, tuple):

190 value = value[-1]

191 # If the value is a filename, we need to fix a bug on IE6 that sends

192 # the full file path instead of the filename.

193 if key == "filename":

194 if value[1:3] == ":\\" or value[:2] == "\\\\":

195 value = value.split("\\")[-1]

196 options[key.encode("latin-1")] = value.encode("latin-1")

197 return ctype, options

198

199

200class Field:

201 """A Field object represents a (parsed) form field. It represents a single

202 field with a corresponding name and value.

203

204 The name that a :class:`Field` will be instantiated with is the same name

205 that would be found in the following HTML::

206

207 <input name="name_goes_here" type="text"/>

208

209 This class defines two methods, :meth:`on_data` and :meth:`on_end`, that

210 will be called when data is written to the Field, and when the Field is

211 finalized, respectively.

212

213 Args:

214 name: The name of the form field.

215 """

216

217 def __init__(self, name: bytes) -> None:

218 self._name = name

219 self._value: list[bytes] = []

220

221 # We cache the joined version of _value for speed.

222 self._cache = _missing

223

224 @classmethod

225 def from_value(cls, name: bytes, value: bytes | None) -> Field:

226 """Create an instance of a :class:`Field`, and set the corresponding

227 value - either None or an actual value. This method will also

228 finalize the Field itself.

229

230 Args:

231 name: the name of the form field.

232 value: the value of the form field - either a bytestring or None.

233

234 Returns:

235 A new instance of a [`Field`][multipart.Field].

236 """

237

238 f = cls(name)

239 if value is None:

240 f.set_none()

241 else:

242 f.write(value)

243 f.finalize()

244 return f

245

246 def write(self, data: bytes) -> int:

247 """Write some data into the form field.

248

249 Args:

250 data: The data to write to the field.

251

252 Returns:

253 The number of bytes written.

254 """

255 return self.on_data(data)

256

257 def on_data(self, data: bytes) -> int:

258 """This method is a callback that will be called whenever data is

259 written to the Field.

260

261 Args:

262 data: The data to write to the field.

263

264 Returns:

265 The number of bytes written.

266 """

267 self._value.append(data)

268 self._cache = _missing

269 return len(data)

270

271 def on_end(self) -> None:

272 """This method is called whenever the Field is finalized."""

273 if self._cache is _missing:

274 self._cache = b"".join(self._value)

275

276 def finalize(self) -> None:

277 """Finalize the form field."""

278 self.on_end()

279

280 def close(self) -> None:

281 """Close the Field object. This will free any underlying cache."""

282 # Free our value array.

283 if self._cache is _missing:

284 self._cache = b"".join(self._value)

285

286 del self._value

287

288 def set_none(self) -> None:

289 """Some fields in a querystring can possibly have a value of None - for

290 example, the string "foo&bar=&baz=asdf" will have a field with the

291 name "foo" and value None, one with name "bar" and value "", and one

292 with name "baz" and value "asdf". Since the write() interface doesn't

293 support writing None, this function will set the field value to None.

294 """

295 self._cache = None

296

297 @property

298 def field_name(self) -> bytes:

299 """This property returns the name of the field."""

300 return self._name

301

302 @property

303 def value(self) -> bytes | None:

304 """This property returns the value of the form field."""

305 if self._cache is _missing:

306 self._cache = b"".join(self._value)

307

308 return self._cache

309

310 def __eq__(self, other: object) -> bool:

311 if isinstance(other, Field):

312 return self.field_name == other.field_name and self.value == other.value

313 else:

314 return NotImplemented

315

316 def __repr__(self) -> str:

317 if self.value is not None and len(self.value) > 97:

318 # We get the repr, and then insert three dots before the final

319 # quote.

320 v = repr(self.value[:97])[:-1] + "...'"

321 else:

322 v = repr(self.value)

323

324 return "{}(field_name={!r}, value={})".format(self.__class__.__name__, self.field_name, v)

325

326

327class File:

328 """This class represents an uploaded file. It handles writing file data to

329 either an in-memory file or a temporary file on-disk, if the optional

330 threshold is passed.

331

332 There are some options that can be passed to the File to change behavior

333 of the class. Valid options are as follows:

334

336 |-----------------------|-------|---------|-------------|

339 | UPLOAD_KEEP_FILENAME | `bool`| False | Whether or not to keep the filename of the uploaded file. If True, then the filename will be converted to a safe representation (e.g. by removing any invalid path segments), and then saved with the same name). Otherwise, a temporary name will be used. |

340 | UPLOAD_KEEP_EXTENSIONS| `bool`| False | Whether or not to keep the uploaded file's extension. If False, the file will be saved with the default temporary extension (usually ".tmp"). Otherwise, the file's extension will be maintained. Note that this will properly combine with the UPLOAD_KEEP_FILENAME setting. |

341 | MAX_MEMORY_FILE_SIZE | `int` | 1 MiB | The maximum number of bytes of a File to keep in memory. By default, the contents of a File are kept into memory until a certain limit is reached, after which the contents of the File are written to a temporary file. This behavior can be disabled by setting this value to an appropriately large value (or, for example, infinity, such as `float('inf')`. |

342

343 Args:

344 file_name: The name of the file that this [`File`][multipart.File] represents.

345 field_name: The name of the form field that this file was uploaded with. This can be None, if, for example,

346 the file was uploaded with Content-Type application/octet-stream.

347 config: The configuration for this File. See above for valid configuration keys and their corresponding values.

348 """ # noqa: E501

349

350 def __init__(self, file_name: bytes | None, field_name: bytes | None = None, config: FileConfig = {}) -> None:

351 # Save configuration, set other variables default.

352 self.logger = logging.getLogger(__name__)

353 self._config = config

354 self._in_memory = True

355 self._bytes_written = 0

356 self._fileobj = BytesIO()

357

358 # Save the provided field/file name.

359 self._field_name = field_name

360 self._file_name = file_name

361

362 # Our actual file name is None by default, since, depending on our

363 # config, we may not actually use the provided name.

364 self._actual_file_name = None

365

366 # Split the extension from the filename.

367 if file_name is not None:

368 base, ext = os.path.splitext(file_name)

369 self._file_base = base

370 self._ext = ext

371

372 @property

373 def field_name(self) -> bytes | None:

374 """The form field associated with this file. May be None if there isn't

375 one, for example when we have an application/octet-stream upload.

376 """

377 return self._field_name

378

379 @property

380 def file_name(self) -> bytes | None:

381 """The file name given in the upload request."""

382 return self._file_name

383

384 @property

385 def actual_file_name(self):

386 """The file name that this file is saved as. Will be None if it's not

387 currently saved on disk.

388 """

389 return self._actual_file_name

390

391 @property

392 def file_object(self):

393 """The file object that we're currently writing to. Note that this

394 will either be an instance of a :class:`io.BytesIO`, or a regular file

395 object.

396 """

397 return self._fileobj

398

399 @property

400 def size(self) -> int:

401 """The total size of this file, counted as the number of bytes that

402 currently have been written to the file.

403 """

404 return self._bytes_written

405

406 @property

407 def in_memory(self) -> bool:

408 """A boolean representing whether or not this file object is currently

409 stored in-memory or on-disk.

410 """

411 return self._in_memory

412

413 def flush_to_disk(self) -> None:

414 """If the file is already on-disk, do nothing. Otherwise, copy from

415 the in-memory buffer to a disk file, and then reassign our internal

416 file object to this new disk file.

417

418 Note that if you attempt to flush a file that is already on-disk, a

419 warning will be logged to this module's logger.

420 """

421 if not self._in_memory:

422 self.logger.warning("Trying to flush to disk when we're not in memory")

423 return

424

425 # Go back to the start of our file.

426 self._fileobj.seek(0)

427

428 # Open a new file.

429 new_file = self._get_disk_file()

430

431 # Copy the file objects.

432 shutil.copyfileobj(self._fileobj, new_file)

433

434 # Seek to the new position in our new file.

435 new_file.seek(self._bytes_written)

436

437 # Reassign the fileobject.

438 old_fileobj = self._fileobj

439 self._fileobj = new_file

440

441 # We're no longer in memory.

442 self._in_memory = False

443

444 # Close the old file object.

445 old_fileobj.close()

446

447 def _get_disk_file(self) -> io.BufferedRandom | tempfile._TemporaryFileWrapper[bytes]: # type: ignore[reportPrivateUsage]

448 """This function is responsible for getting a file object on-disk for us."""

449 self.logger.info("Opening a file on disk")

450

451 file_dir = self._config.get("UPLOAD_DIR")

452 keep_filename = self._config.get("UPLOAD_KEEP_FILENAME", False)

453 keep_extensions = self._config.get("UPLOAD_KEEP_EXTENSIONS", False)

454 delete_tmp = self._config.get("UPLOAD_DELETE_TMP", True)

455

456 # If we have a directory and are to keep the filename...

457 if file_dir is not None and keep_filename:

458 self.logger.info("Saving with filename in: %r", file_dir)

459

460 # Build our filename.

461 # TODO: what happens if we don't have a filename?

462 fname = self._file_base + self._ext if keep_extensions else self._file_base

463

464 path = os.path.join(file_dir, fname)

465 try:

466 self.logger.info("Opening file: %r", path)

467 tmp_file = open(path, "w+b")

468 except OSError:

469 tmp_file = None

470

471 self.logger.exception("Error opening temporary file")

472 raise FileError("Error opening temporary file: %r" % path)

473 else:

474 # Build options array.

475 # Note that on Python 3, tempfile doesn't support byte names. We

476 # encode our paths using the default filesystem encoding.

477 suffix = self._ext.decode(sys.getfilesystemencoding()) if keep_extensions else None

478

479 if file_dir is None:

480 dir = None

481 elif isinstance(file_dir, bytes):

482 dir = file_dir.decode(sys.getfilesystemencoding())

483 else:

484 dir = file_dir

485

486 # Create a temporary (named) file with the appropriate settings.

487 self.logger.info(

488 "Creating a temporary file with options: %r", {"suffix": suffix, "delete": delete_tmp, "dir": dir}

489 )

490 try:

491 tmp_file = tempfile.NamedTemporaryFile(suffix=suffix, delete=delete_tmp, dir=dir)

492 except OSError:

493 self.logger.exception("Error creating named temporary file")

494 raise FileError("Error creating named temporary file")

495

496 fname = tmp_file.name

497

498 # Encode filename as bytes.

499 if isinstance(fname, str):

500 fname = fname.encode(sys.getfilesystemencoding())

501

502 self._actual_file_name = fname

503 return tmp_file

504

505 def write(self, data: bytes) -> int:

506 """Write some data to the File.

507

508 :param data: a bytestring

509 """

510 return self.on_data(data)

511

512 def on_data(self, data: bytes) -> int:

513 """This method is a callback that will be called whenever data is

514 written to the File.

515

516 Args:

517 data: The data to write to the file.

518

519 Returns:

520 The number of bytes written.

521 """

522 pos = self._fileobj.tell()

523 bwritten = self._fileobj.write(data)

524 # true file objects write returns None

525 if bwritten is None:

526 bwritten = self._fileobj.tell() - pos

527

528 # If the bytes written isn't the same as the length, just return.

529 if bwritten != len(data):

530 self.logger.warning("bwritten != len(data) (%d != %d)", bwritten, len(data))

531 return bwritten

532

533 # Keep track of how many bytes we've written.

534 self._bytes_written += bwritten

535

536 # If we're in-memory and are over our limit, we create a file.

537 max_memory_file_size = self._config.get("MAX_MEMORY_FILE_SIZE")

538 if self._in_memory and max_memory_file_size is not None and (self._bytes_written > max_memory_file_size):

539 self.logger.info("Flushing to disk")

540 self.flush_to_disk()

541

542 # Return the number of bytes written.

543 return bwritten

544

545 def on_end(self) -> None:

546 """This method is called whenever the Field is finalized."""

547 # Flush the underlying file object

548 self._fileobj.flush()

549

550 def finalize(self) -> None:

551 """Finalize the form file. This will not close the underlying file,

552 but simply signal that we are finished writing to the File.

553 """

554 self.on_end()

555

556 def close(self) -> None:

557 """Close the File object. This will actually close the underlying

558 file object (whether it's a :class:`io.BytesIO` or an actual file

559 object).

560 """

561 self._fileobj.close()

562

563 def __repr__(self) -> str:

564 return "{}(file_name={!r}, field_name={!r})".format(self.__class__.__name__, self.file_name, self.field_name)

565

566

567class BaseParser:

568 """This class is the base class for all parsers. It contains the logic for

569 calling and adding callbacks.

570

571 A callback can be one of two different forms. "Notification callbacks" are

572 callbacks that are called when something happens - for example, when a new

573 part of a multipart message is encountered by the parser. "Data callbacks"

574 are called when we get some sort of data - for example, part of the body of

575 a multipart chunk. Notification callbacks are called with no parameters,

576 whereas data callbacks are called with three, as follows::

577

578 data_callback(data, start, end)

579

580 The "data" parameter is a bytestring (i.e. "foo" on Python 2, or b"foo" on

581 Python 3). "start" and "end" are integer indexes into the "data" string

582 that represent the data of interest. Thus, in a data callback, the slice

583 `data[start:end]` represents the data that the callback is "interested in".

584 The callback is not passed a copy of the data, since copying severely hurts

585 performance.

586 """

587

588 def __init__(self) -> None:

589 self.logger = logging.getLogger(__name__)

590

591 def callback(self, name: str, data: bytes | None = None, start: int | None = None, end: int | None = None):

592 """This function calls a provided callback with some data. If the

593 callback is not set, will do nothing.

594

595 Args:

596 name: The name of the callback to call (as a string).

597 data: Data to pass to the callback. If None, then it is assumed that the callback is a notification

598 callback, and no parameters are given.

599 end: An integer that is passed to the data callback.

600 start: An integer that is passed to the data callback.

601 """

602 name = "on_" + name

603 func = self.callbacks.get(name)

604 if func is None:

605 return

606

607 # Depending on whether we're given a buffer...

608 if data is not None:

609 # Don't do anything if we have start == end.

610 if start is not None and start == end:

611 return

612

613 self.logger.debug("Calling %s with data[%d:%d]", name, start, end)

614 func(data, start, end)

615 else:

616 self.logger.debug("Calling %s with no data", name)

617 func()

618

619 def set_callback(self, name: str, new_func: Callable[..., Any] | None) -> None:

620 """Update the function for a callback. Removes from the callbacks dict

621 if new_func is None.

622

623 :param name: The name of the callback to call (as a string).

624

625 :param new_func: The new function for the callback. If None, then the

626 callback will be removed (with no error if it does not

627 exist).

628 """

629 if new_func is None:

630 self.callbacks.pop("on_" + name, None)

631 else:

632 self.callbacks["on_" + name] = new_func

633

634 def close(self):

635 pass # pragma: no cover

636

637 def finalize(self):

638 pass # pragma: no cover

639

640 def __repr__(self):

641 return "%s()" % self.__class__.__name__

642

643

644class OctetStreamParser(BaseParser):

645 """This parser parses an octet-stream request body and calls callbacks when

646 incoming data is received. Callbacks are as follows:

647

648 | Callback Name | Parameters | Description |

649 |----------------|-----------------|-----------------------------------------------------|

650 | on_start | None | Called when the first data is parsed. |

651 | on_data | data, start, end| Called for each data chunk that is parsed. |

652 | on_end | None | Called when the parser is finished parsing all data.|

653

654 Args:

655 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][multipart.BaseParser].

656 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

657 """

658

659 def __init__(self, callbacks: OctetStreamCallbacks = {}, max_size: float = float("inf")):

660 super().__init__()

661 self.callbacks = callbacks

662 self._started = False

663

664 if not isinstance(max_size, Number) or max_size < 1:

665 raise ValueError("max_size must be a positive number, not %r" % max_size)

666 self.max_size = max_size

667 self._current_size = 0

668

669 def write(self, data: bytes) -> int:

670 """Write some data to the parser, which will perform size verification,

671 and then pass the data to the underlying callback.

672

673 Args:

674 data: The data to write to the parser.

675

676 Returns:

677 The number of bytes written.

678 """

679 if not self._started:

680 self.callback("start")

681 self._started = True

682

683 # Truncate data length.

684 data_len = len(data)

685 if (self._current_size + data_len) > self.max_size:

686 # We truncate the length of data that we are to process.

687 new_size = int(self.max_size - self._current_size)

688 self.logger.warning(

689 "Current size is %d (max %d), so truncating data length from %d to %d",

690 self._current_size,

691 self.max_size,

692 data_len,

693 new_size,

694 )

695 data_len = new_size

696

697 # Increment size, then callback, in case there's an exception.

698 self._current_size += data_len

699 self.callback("data", data, 0, data_len)

700 return data_len

701

702 def finalize(self) -> None:

703 """Finalize this parser, which signals to that we are finished parsing,

704 and sends the on_end callback.

705 """

706 self.callback("end")

707

708 def __repr__(self) -> str:

709 return "%s()" % self.__class__.__name__

710

711

712class QuerystringParser(BaseParser):

713 """This is a streaming querystring parser. It will consume data, and call

714 the callbacks given when it has data.

715

716 | Callback Name | Parameters | Description |

717 |----------------|-----------------|-----------------------------------------------------|

718 | on_field_start | None | Called when a new field is encountered. |

719 | on_field_name | data, start, end| Called when a portion of a field's name is encountered. |

720 | on_field_data | data, start, end| Called when a portion of a field's data is encountered. |

721 | on_field_end | None | Called when the end of a field is encountered. |

722 | on_end | None | Called when the parser is finished parsing all data.|

723

724 Args:

725 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][multipart.BaseParser].

726 strict_parsing: Whether or not to parse the body strictly. Defaults to False. If this is set to True, then the

727 behavior of the parser changes as the following: if a field has a value with an equal sign

728 (e.g. "foo=bar", or "foo="), it is always included. If a field has no equals sign (e.g. "...&name&..."),

729 it will be treated as an error if 'strict_parsing' is True, otherwise included. If an error is encountered,

730 then a [`QuerystringParseError`][multipart.exceptions.QuerystringParseError] will be raised.

731 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

732 """ # noqa: E501

733

734 state: QuerystringState

735

736 def __init__(

737 self, callbacks: QuerystringCallbacks = {}, strict_parsing: bool = False, max_size: float = float("inf")

738 ) -> None:

739 super().__init__()

740 self.state = QuerystringState.BEFORE_FIELD

741 self._found_sep = False

742

743 self.callbacks = callbacks

744

745 # Max-size stuff

746 if not isinstance(max_size, Number) or max_size < 1:

747 raise ValueError("max_size must be a positive number, not %r" % max_size)

748 self.max_size = max_size

749 self._current_size = 0

750

751 # Should parsing be strict?

752 self.strict_parsing = strict_parsing

753

754 def write(self, data: bytes) -> int:

755 """Write some data to the parser, which will perform size verification,

756 parse into either a field name or value, and then pass the

757 corresponding data to the underlying callback. If an error is

758 encountered while parsing, a QuerystringParseError will be raised. The

759 "offset" attribute of the raised exception will be set to the offset in

760 the input data chunk (NOT the overall stream) that caused the error.

761

762 Args:

763 data: The data to write to the parser.

764

765 Returns:

766 The number of bytes written.

767 """

768 # Handle sizing.

769 data_len = len(data)

770 if (self._current_size + data_len) > self.max_size:

771 # We truncate the length of data that we are to process.

772 new_size = int(self.max_size - self._current_size)

773 self.logger.warning(

774 "Current size is %d (max %d), so truncating data length from %d to %d",

775 self._current_size,

776 self.max_size,

777 data_len,

778 new_size,

779 )

780 data_len = new_size

781

782 l = 0

783 try:

784 l = self._internal_write(data, data_len)

785 finally:

786 self._current_size += l

787

788 return l

789

790 def _internal_write(self, data: bytes, length: int) -> int:

791 state = self.state

792 strict_parsing = self.strict_parsing

793 found_sep = self._found_sep

794

795 i = 0

796 while i < length:

797 ch = data[i]

798

799 # Depending on our state...

800 if state == QuerystringState.BEFORE_FIELD:

801 # If the 'found_sep' flag is set, we've already encountered

802 # and skipped a single separator. If so, we check our strict

803 # parsing flag and decide what to do. Otherwise, we haven't

804 # yet reached a separator, and thus, if we do, we need to skip

805 # it as it will be the boundary between fields that's supposed

806 # to be there.

807 if ch == AMPERSAND or ch == SEMICOLON:

808 if found_sep:

809 # If we're parsing strictly, we disallow blank chunks.

810 if strict_parsing:

811 e = QuerystringParseError("Skipping duplicate ampersand/semicolon at %d" % i)

812 e.offset = i

813 raise e

814 else:

815 self.logger.debug("Skipping duplicate ampersand/semicolon at %d", i)

816 else:

817 # This case is when we're skipping the (first)

818 # separator between fields, so we just set our flag

819 # and continue on.

820 found_sep = True

821 else:

822 # Emit a field-start event, and go to that state. Also,

823 # reset the "found_sep" flag, for the next time we get to

824 # this state.

825 self.callback("field_start")

826 i -= 1

827 state = QuerystringState.FIELD_NAME

828 found_sep = False

829

830 elif state == QuerystringState.FIELD_NAME:

831 # Try and find a separator - we ensure that, if we do, we only

832 # look for the equal sign before it.

833 sep_pos = data.find(b"&", i)

834 if sep_pos == -1:

835 sep_pos = data.find(b";", i)

836

837 # See if we can find an equals sign in the remaining data. If

838 # so, we can immediately emit the field name and jump to the

839 # data state.

840 if sep_pos != -1:

841 equals_pos = data.find(b"=", i, sep_pos)

842 else:

843 equals_pos = data.find(b"=", i)

844

845 if equals_pos != -1:

846 # Emit this name.

847 self.callback("field_name", data, i, equals_pos)

848

849 # Jump i to this position. Note that it will then have 1

850 # added to it below, which means the next iteration of this

851 # loop will inspect the character after the equals sign.

852 i = equals_pos

853 state = QuerystringState.FIELD_DATA

854 else:

855 # No equals sign found.

856 if not strict_parsing:

857 # See also comments in the QuerystringState.FIELD_DATA case below.

858 # If we found the separator, we emit the name and just

859 # end - there's no data callback at all (not even with

860 # a blank value).

861 if sep_pos != -1:

862 self.callback("field_name", data, i, sep_pos)

863 self.callback("field_end")

864

865 i = sep_pos - 1

866 state = QuerystringState.BEFORE_FIELD

867 else:

868 # Otherwise, no separator in this block, so the

869 # rest of this chunk must be a name.

870 self.callback("field_name", data, i, length)

871 i = length

872

873 else:

874 # We're parsing strictly. If we find a separator,

875 # this is an error - we require an equals sign.

876 if sep_pos != -1:

877 e = QuerystringParseError(

878 "When strict_parsing is True, we require an "

879 "equals sign in all field chunks. Did not "

880 "find one in the chunk that starts at %d" % (i,)

881 )

882 e.offset = i

883 raise e

884

885 # No separator in the rest of this chunk, so it's just

886 # a field name.

887 self.callback("field_name", data, i, length)

888 i = length

889

890 elif state == QuerystringState.FIELD_DATA:

891 # Try finding either an ampersand or a semicolon after this

892 # position.

893 sep_pos = data.find(b"&", i)

894 if sep_pos == -1:

895 sep_pos = data.find(b";", i)

896

897 # If we found it, callback this bit as data and then go back

898 # to expecting to find a field.

899 if sep_pos != -1:

900 self.callback("field_data", data, i, sep_pos)

901 self.callback("field_end")

902

903 # Note that we go to the separator, which brings us to the

904 # "before field" state. This allows us to properly emit

905 # "field_start" events only when we actually have data for

906 # a field of some sort.

907 i = sep_pos - 1

908 state = QuerystringState.BEFORE_FIELD

909

910 # Otherwise, emit the rest as data and finish.

911 else:

912 self.callback("field_data", data, i, length)

913 i = length

914

915 else: # pragma: no cover (error case)

916 msg = "Reached an unknown state %d at %d" % (state, i)

917 self.logger.warning(msg)

918 e = QuerystringParseError(msg)

919 e.offset = i

920 raise e

921

922 i += 1

923

924 self.state = state

925 self._found_sep = found_sep

926 return len(data)

927

928 def finalize(self) -> None:

929 """Finalize this parser, which signals to that we are finished parsing,

930 if we're still in the middle of a field, an on_field_end callback, and

931 then the on_end callback.

932 """

933 # If we're currently in the middle of a field, we finish it.

934 if self.state == QuerystringState.FIELD_DATA:

935 self.callback("field_end")

936 self.callback("end")

937

938 def __repr__(self) -> str:

939 return "{}(strict_parsing={!r}, max_size={!r})".format(

940 self.__class__.__name__, self.strict_parsing, self.max_size

941 )

942

943

944class MultipartParser(BaseParser):

945 """This class is a streaming multipart/form-data parser.

946

947 | Callback Name | Parameters | Description |

948 |--------------------|-----------------|-------------|

949 | on_part_begin | None | Called when a new part of the multipart message is encountered. |

950 | on_part_data | data, start, end| Called when a portion of a part's data is encountered. |

951 | on_part_end | None | Called when the end of a part is reached. |

952 | on_header_begin | None | Called when we've found a new header in a part of a multipart message |

953 | on_header_field | data, start, end| Called each time an additional portion of a header is read (i.e. the part of the header that is before the colon; the "Foo" in "Foo: Bar"). |

954 | on_header_value | data, start, end| Called when we get data for a header. |

955 | on_header_end | None | Called when the current header is finished - i.e. we've reached the newline at the end of the header. |

956 | on_headers_finished| None | Called when all headers are finished, and before the part data starts. |

957 | on_end | None | Called when the parser is finished parsing all data. |

958

959 Args:

960 boundary: The multipart boundary. This is required, and must match what is given in the HTTP request - usually in the Content-Type header.

961 callbacks: A dictionary of callbacks. See the documentation for [`BaseParser`][multipart.BaseParser].

962 max_size: The maximum size of body to parse. Defaults to infinity - i.e. unbounded.

963 """ # noqa: E501

964

965 def __init__(