Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/_doc

7# Redistribution and use in source and binary forms, with or without

8# modification, are permitted provided that the following conditions are

9# met:

10#

11# * Redistributions of source code must retain the above copyright notice,

12# this list of conditions and the following disclaimer.

13# * Redistributions in binary form must reproduce the above copyright notice,

14# this list of conditions and the following disclaimer in the documentation

15# and/or other materials provided with the distribution.

16# * The name of the author may not be used to endorse or promote products

17# derived from this software without specific prior written permission.

18#

19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

20# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

21# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

22# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

23# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

24# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

25# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

26# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

27# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

28# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

29# POSSIBILITY OF SUCH DAMAGE.

31import struct

32from abc import ABC, abstractmethod

33from collections.abc import Generator, Iterable, Iterator, Mapping

34from datetime import datetime

35from typing import (

36 Any,

37 NoReturn,

38 Optional,

39 Union,

40 cast,

41)

43from ._encryption import Encryption

44from ._page import PageObject, _VirtualList

45from ._page_labels import index2label as page_index2page_label

46from ._utils import (

47 deprecation_with_replacement,

48 logger_warning,

49 parse_iso8824_date,

50)

51from .constants import CatalogAttributes as CA

52from .constants import CatalogDictionary as CD

53from .constants import (

54 CheckboxRadioButtonAttributes,

55 GoToActionArguments,

56 PagesAttributes,

57 UserAccessPermissions,

58)

59from .constants import Core as CO

60from .constants import DocumentInformationAttributes as DI

61from .constants import FieldDictionaryAttributes as FA

62from .constants import PageAttributes as PG

63from .errors import PdfReadError, PyPdfError

64from .filters import _decompress_with_limit

65from .generic import (

66 ArrayObject,

67 BooleanObject,

68 ByteStringObject,

69 Destination,

70 DictionaryObject,

71 EncodedStreamObject,

72 Field,

73 Fit,

74 FloatObject,

75 IndirectObject,

76 NameObject,

77 NullObject,

78 NumberObject,

79 PdfObject,

80 TextStringObject,

81 TreeObject,

82 ViewerPreferences,

83 create_string_object,

84 is_null_or_none,

85)

86from .generic._files import EmbeddedFile

87from .types import OutlineType, PagemodeType

88from .xmp import XmpInformation

91def convert_to_int(d: bytes, size: int) -> Union[int, tuple[Any, ...]]:

92 if size > 8:

93 raise PdfReadError("Invalid size in convert_to_int")

94 d = b"\x00\x00\x00\x00\x00\x00\x00\x00" + d

95 d = d[-8:]

96 return cast(int, struct.unpack(">Q", d)[0])

99class DocumentInformation(DictionaryObject):

100 """

101 A class representing the basic document metadata provided in a PDF File.

102 This class is accessible through

103 :py:class:`PdfReader.metadata<pypdf.PdfReader.metadata>`.

104

105 All text properties of the document metadata have

106 *two* properties, e.g. author and author_raw. The non-raw property will

107 always return a ``TextStringObject``, making it ideal for a case where the

108 metadata is being displayed. The raw property can sometimes return a

109 ``ByteStringObject``, if pypdf was unable to decode the string's text

110 encoding; this requires additional safety in the caller and therefore is not

111 as commonly accessed.

112 """

113

114 def __init__(self) -> None:

115 DictionaryObject.__init__(self)

116

117 def _get_text(self, key: str) -> Optional[str]:

118 retval = self.get(key, None)

119 if isinstance(retval, TextStringObject):

120 return retval

121 if isinstance(retval, ByteStringObject):

122 return str(retval)

123 return None

124

125 @property

126 def title(self) -> Optional[str]:

127 """

128 Read-only property accessing the document's title.

129

130 Returns a ``TextStringObject`` or ``None`` if the title is not

131 specified.

132 """

133 return (

134 self._get_text(DI.TITLE) or self.get(DI.TITLE).get_object() # type: ignore[union-attr]

135 if self.get(DI.TITLE)

136 else None

137 )

138

139 @property

140 def title_raw(self) -> Optional[str]:

141 """The "raw" version of title; can return a ``ByteStringObject``."""

142 return self.get(DI.TITLE)

143

144 @property

145 def author(self) -> Optional[str]:

146 """

147 Read-only property accessing the document's author.

148

149 Returns a ``TextStringObject`` or ``None`` if the author is not

150 specified.

151 """

152 return self._get_text(DI.AUTHOR)

153

154 @property

155 def author_raw(self) -> Optional[str]:

156 """The "raw" version of author; can return a ``ByteStringObject``."""

157 return self.get(DI.AUTHOR)

158

159 @property

160 def subject(self) -> Optional[str]:

161 """

162 Read-only property accessing the document's subject.

163

164 Returns a ``TextStringObject`` or ``None`` if the subject is not

165 specified.

166 """

167 return self._get_text(DI.SUBJECT)

168

169 @property

170 def subject_raw(self) -> Optional[str]:

171 """The "raw" version of subject; can return a ``ByteStringObject``."""

172 return self.get(DI.SUBJECT)

173

174 @property

175 def creator(self) -> Optional[str]:

176 """

177 Read-only property accessing the document's creator.

178

179 If the document was converted to PDF from another format, this is the

180 name of the application (e.g. OpenOffice) that created the original

181 document from which it was converted. Returns a ``TextStringObject`` or

182 ``None`` if the creator is not specified.

183 """

184 return self._get_text(DI.CREATOR)

185

186 @property

187 def creator_raw(self) -> Optional[str]:

188 """The "raw" version of creator; can return a ``ByteStringObject``."""

189 return self.get(DI.CREATOR)

190

191 @property

192 def producer(self) -> Optional[str]:

193 """

194 Read-only property accessing the document's producer.

195

196 If the document was converted to PDF from another format, this is the

197 name of the application (for example, macOS Quartz) that converted it to

198 PDF. Returns a ``TextStringObject`` or ``None`` if the producer is not

199 specified.

200 """

201 return self._get_text(DI.PRODUCER)

202

203 @property

204 def producer_raw(self) -> Optional[str]:

205 """The "raw" version of producer; can return a ``ByteStringObject``."""

206 return self.get(DI.PRODUCER)

207

208 @property

209 def creation_date(self) -> Optional[datetime]:

210 """Read-only property accessing the document's creation date."""

211 return parse_iso8824_date(self._get_text(DI.CREATION_DATE))

212

213 @property

214 def creation_date_raw(self) -> Optional[str]:

215 """

216 The "raw" version of creation date; can return a ``ByteStringObject``.

217

218 Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix

219 is the offset from UTC.

220 """

221 return self.get(DI.CREATION_DATE)

222

223 @property

224 def modification_date(self) -> Optional[datetime]:

225 """

226 Read-only property accessing the document's modification date.

227

228 The date and time the document was most recently modified.

229 """

230 return parse_iso8824_date(self._get_text(DI.MOD_DATE))

231

232 @property

233 def modification_date_raw(self) -> Optional[str]:

234 """

235 The "raw" version of modification date; can return a

236 ``ByteStringObject``.

237

238 Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix

239 is the offset from UTC.

240 """

241 return self.get(DI.MOD_DATE)

242

243 @property

244 def keywords(self) -> Optional[str]:

245 """

246 Read-only property accessing the document's keywords.

247

248 Returns a ``TextStringObject`` or ``None`` if keywords are not

249 specified.

250 """

251 return self._get_text(DI.KEYWORDS)

252

253 @property

254 def keywords_raw(self) -> Optional[str]:

255 """The "raw" version of keywords; can return a ``ByteStringObject``."""

256 return self.get(DI.KEYWORDS)

257

258

259class PdfDocCommon(ABC):

260 """

261 Common functions from PdfWriter and PdfReader objects.

262

263 This root class is strongly abstracted.

264 """

265

266 strict: bool = False # default

267

268 flattened_pages: Optional[list[PageObject]] = None

269

270 _encryption: Optional[Encryption] = None

271

272 _readonly: bool = False

273

274 @property

275 @abstractmethod

276 def root_object(self) -> DictionaryObject:

277 ... # pragma: no cover

278

279 @property

280 @abstractmethod

281 def pdf_header(self) -> str:

282 ... # pragma: no cover

283

284 @abstractmethod

285 def get_object(

286 self, indirect_reference: Union[int, IndirectObject]

287 ) -> Optional[PdfObject]:

288 ... # pragma: no cover

289

290 @abstractmethod

291 def _replace_object(self, indirect: IndirectObject, obj: PdfObject) -> PdfObject:

292 ... # pragma: no cover

293

294 @property

295 @abstractmethod

296 def _info(self) -> Optional[DictionaryObject]:

297 ... # pragma: no cover

298

299 @property

300 def metadata(self) -> Optional[DocumentInformation]:

301 """

302 Retrieve the PDF file's document information dictionary, if it exists.

303

304 Note that some PDF files use metadata streams instead of document

305 information dictionaries, and these metadata streams will not be

306 accessed by this function.

307 """

308 retval = DocumentInformation()

309 if self._info is None:

310 return None

311 retval.update(self._info)

312 return retval

313

314 @property

315 @abstractmethod

316 def xmp_metadata(self) -> Optional[XmpInformation]:

317 ... # pragma: no cover

318

319 @property

320 def viewer_preferences(self) -> Optional[ViewerPreferences]:

321 """Returns the existing ViewerPreferences as an overloaded dictionary."""

322 o = self.root_object.get(CD.VIEWER_PREFERENCES, None)

323 if o is None:

324 return None

325 o = o.get_object()

326 if not isinstance(o, ViewerPreferences):

327 o = ViewerPreferences(o)

328 if hasattr(o, "indirect_reference") and o.indirect_reference is not None:

329 self._replace_object(o.indirect_reference, o)

330 else:

331 self.root_object[NameObject(CD.VIEWER_PREFERENCES)] = o

332 return o

333

334 def get_num_pages(self) -> int:

335 """

336 Calculate the number of pages in this PDF file.

337

338 Returns:

339 The number of pages of the parsed PDF file.

340

341 Raises:

342 PdfReadError: If restrictions prevent this action.

343

344 """

345 # Flattened pages will not work on an encrypted PDF;

346 # the PDF file's page count is used in this case. Otherwise,

347 # the original method (flattened page count) is used.

348 if self.is_encrypted:

349 return self.root_object["/Pages"]["/Count"] # type: ignore[no-any-return, index]

350 if self.flattened_pages is None:

351 self._flatten(self._readonly)

352 assert self.flattened_pages is not None

353 return len(self.flattened_pages)

354

355 def get_page(self, page_number: int) -> PageObject:

356 """

357 Retrieve a page by number from this PDF file.

358 Most of the time ``.pages[page_number]`` is preferred.

359

360 Args:

361 page_number: The page number to retrieve

362 (pages begin at zero)

363

364 Returns:

365 A :class:`PageObject<pypdf._page.PageObject>` instance.

366

367 """

368 if self.flattened_pages is None:

369 self._flatten(self._readonly)

370 assert self.flattened_pages is not None, "hint for mypy"

371 return self.flattened_pages[page_number]

372

373 def _get_page_in_node(

374 self,

375 page_number: int,

376 ) -> tuple[DictionaryObject, int]:

377 """

378 Retrieve the node and position within the /Kids containing the page.

379 If page_number is greater than the number of pages, it returns the top node, -1.

380 """

381 top = cast(DictionaryObject, self.root_object["/Pages"])

382

383 def recursive_call(

384 node: DictionaryObject, mi: int

385 ) -> tuple[Optional[PdfObject], int]:

386 ma = cast(int, node.get("/Count", 1)) # default 1 for /Page types

387 if node["/Type"] == "/Page": # type: ignore[comparison-overlap]

388 if page_number == mi:

389 return node, -1

390 return None, mi + 1

391 if (page_number - mi) >= ma: # not in nodes below

392 if node == top:

393 return top, -1

394 return None, mi + ma

395 for idx, kid in enumerate(cast(ArrayObject, node["/Kids"])):

396 kid = cast(DictionaryObject, kid.get_object())

397 n, i = recursive_call(kid, mi)

398 if n is not None: # page has just been found ...

399 if i < 0: # ... just below!

400 return node, idx

401 # ... at lower levels

402 return n, i

403 mi = i

404 raise PyPdfError("Unexpectedly cannot find the node.")

405

406 node, idx = recursive_call(top, 0)

407 assert isinstance(node, DictionaryObject), "mypy"

408 return node, idx

409

410 @property

411 def named_destinations(self) -> dict[str, Destination]:

412 """A read-only dictionary which maps names to destinations."""

413 return self._get_named_destinations()

414

415 def get_named_dest_root(self) -> ArrayObject:

416 named_dest = ArrayObject()

417 if CA.NAMES in self.root_object and isinstance(

418 self.root_object[CA.NAMES], DictionaryObject

419 ):

420 names = cast(DictionaryObject, self.root_object[CA.NAMES])

421 if CA.DESTS in names and isinstance(names[CA.DESTS], DictionaryObject):

422 # §3.6.3 Name Dictionary (PDF spec 1.7)

423 dests = cast(DictionaryObject, names[CA.DESTS])

424 dests_ref = dests.indirect_reference

425 if CA.NAMES in dests:

426 # §7.9.6, entries in a name tree node dictionary

427 named_dest = cast(ArrayObject, dests[CA.NAMES])

428 else:

429 named_dest = ArrayObject()

430 dests[NameObject(CA.NAMES)] = named_dest

431 elif hasattr(self, "_add_object"):

432 dests = DictionaryObject()

433 dests_ref = self._add_object(dests)

434 names[NameObject(CA.DESTS)] = dests_ref

435 dests[NameObject(CA.NAMES)] = named_dest

436

437 elif hasattr(self, "_add_object"):

438 names = DictionaryObject()

439 names_ref = self._add_object(names)

440 self.root_object[NameObject(CA.NAMES)] = names_ref

441 dests = DictionaryObject()

442 dests_ref = self._add_object(dests)

443 names[NameObject(CA.DESTS)] = dests_ref

444 dests[NameObject(CA.NAMES)] = named_dest

445

446 return named_dest

447

448 ## common

449 def _get_named_destinations(

450 self,

451 tree: Union[TreeObject, None] = None,

452 retval: Optional[dict[str, Destination]] = None,

453 ) -> dict[str, Destination]:

454 """

455 Retrieve the named destinations present in the document.

456

457 Args:

458 tree: The current tree.

459 retval: The previously retrieved destinations for nested calls.

460

461 Returns:

462 A dictionary which maps names to destinations.

463

464 """

465 if retval is None:

466 retval = {}

467 catalog = self.root_object

468

469 # get the name tree

470 if CA.DESTS in catalog:

471 tree = cast(TreeObject, catalog[CA.DESTS])

472 elif CA.NAMES in catalog:

473 names = cast(DictionaryObject, catalog[CA.NAMES])

474 if CA.DESTS in names:

475 tree = cast(TreeObject, names[CA.DESTS])

476

477 if is_null_or_none(tree):

478 return retval

479 assert tree is not None, "mypy"

480

481 if PagesAttributes.KIDS in tree:

482 # recurse down the tree

483 for kid in cast(ArrayObject, tree[PagesAttributes.KIDS]):

484 self._get_named_destinations(kid.get_object(), retval)

485 # §7.9.6, entries in a name tree node dictionary

486 elif CA.NAMES in tree: # /Kids and /Names are exclusives (§7.9.6)

487 names = cast(DictionaryObject, tree[CA.NAMES])

488 i = 0

489 while i < len(names):

490 key = names[i].get_object()

491 i += 1

492 if not isinstance(key, (bytes, str)):

493 continue

494 try:

495 value = names[i].get_object()

496 except IndexError:

497 break

498 i += 1

499 if isinstance(value, DictionaryObject):

500 if "/D" in value:

501 value = value["/D"]

502 else:

503 continue

504 dest = self._build_destination(key, value)

505 if dest is not None:

506 retval[cast(str, dest["/Title"])] = dest

507 # Remain backwards-compatible.

508 retval[str(key)] = dest

509 else: # case where Dests is in root catalog (PDF 1.7 specs, §2 about PDF 1.1)

510 for k__, v__ in tree.items():

511 val = v__.get_object()

512 if isinstance(val, DictionaryObject):

513 if "/D" in val:

514 val = val["/D"].get_object()

515 else:

516 continue

517 dest = self._build_destination(k__, val)

518 if dest is not None:

519 retval[k__] = dest

520 return retval

521

522 # A select group of relevant field attributes. For the complete list,

523 # see §12.3.2 of the PDF 1.7 or PDF 2.0 specification.

524

525 def get_fields(

526 self,

527 tree: Optional[TreeObject] = None,

528 retval: Optional[dict[Any, Any]] = None,

529 fileobj: Optional[Any] = None,

530 stack: Optional[list[PdfObject]] = None,

531 ) -> Optional[dict[str, Any]]:

532 """

533 Extract field data if this PDF contains interactive form fields.

534

535 The *tree*, *retval*, *stack* parameters are for recursive use.

536

537 Args:

538 tree: Current object to parse.

539 retval: In-progress list of fields.

540 fileobj: A file object (usually a text file) to write

541 a report to on all interactive form fields found.

542 stack: List of already parsed objects.

543

544 Returns:

545 A dictionary where each key is a field name, and each

546 value is a :class:`Field<pypdf.generic.Field>` object. By

547 default, the mapping name is used for keys.

548 ``None`` if form data could not be located.

549

550 """

551 field_attributes = FA.attributes_dict()

552 field_attributes.update(CheckboxRadioButtonAttributes.attributes_dict())

553 if retval is None:

554 retval = {}

555 catalog = self.root_object

556 stack = []

557 # get the AcroForm tree

558 if CD.ACRO_FORM in catalog:

559 tree = cast(Optional[TreeObject], catalog[CD.ACRO_FORM])

560 else:

561 return None

562 if tree is None:

563 return retval

564 assert stack is not None

565 if "/Fields" in tree:

566 fields = cast(ArrayObject, tree["/Fields"])

567 for f in fields:

568 field = f.get_object()

569 self._build_field(field, retval, fileobj, field_attributes, stack)

570 elif any(attr in tree for attr in field_attributes):

571 # Tree is a field

572 self._build_field(tree, retval, fileobj, field_attributes, stack)

573 return retval

574

575 def _get_qualified_field_name(self, parent: DictionaryObject) -> str:

576 if "/TM" in parent:

577 return cast(str, parent["/TM"])

578 if "/Parent" in parent:

579 return (

580 self._get_qualified_field_name(

581 cast(DictionaryObject, parent["/Parent"])

582 )

583 + "."

584 + cast(str, parent.get("/T", ""))

585 )

586 return cast(str, parent.get("/T", ""))

587

588 def _build_field(

589 self,

590 field: Union[TreeObject, DictionaryObject],

591 retval: dict[Any, Any],

592 fileobj: Any,

593 field_attributes: Any,

594 stack: list[PdfObject],

595 ) -> None:

596 if all(attr not in field for attr in ("/T", "/TM")):

597 return

598 key = self._get_qualified_field_name(field)

599 if fileobj:

600 self._write_field(fileobj, field, field_attributes)

601 fileobj.write("\n")

602 retval[key] = Field(field)

603 obj = retval[key].indirect_reference.get_object() # to get the full object

604 if obj.get(FA.FT, "") == "/Ch" and obj.get(NameObject(FA.Opt)):

605 retval[key][NameObject("/_States_")] = obj[NameObject(FA.Opt)]

606 if obj.get(FA.FT, "") == "/Btn" and "/AP" in obj:

607 # Checkbox

608 retval[key][NameObject("/_States_")] = ArrayObject(

609 list(obj["/AP"]["/N"].keys())

610 )

611 if "/Off" not in retval[key]["/_States_"]:

612 retval[key][NameObject("/_States_")].append(NameObject("/Off"))

613 elif obj.get(FA.FT, "") == "/Btn" and obj.get(FA.Ff, 0) & FA.FfBits.Radio != 0:

614 states: list[str] = []

615 retval[key][NameObject("/_States_")] = ArrayObject(states)

616 for k in obj.get(FA.Kids, {}):

617 k = k.get_object()

618 for s in list(k["/AP"]["/N"].keys()):

619 if s not in states:

620 states.append(s)

621 retval[key][NameObject("/_States_")] = ArrayObject(states)

622 if (

623 obj.get(FA.Ff, 0) & FA.FfBits.NoToggleToOff != 0

624 and "/Off" in retval[key]["/_States_"]

625 ):

626 del retval[key]["/_States_"][retval[key]["/_States_"].index("/Off")]

627 # at last for order

628 self._check_kids(field, retval, fileobj, stack)

629

630 def _check_kids(

631 self,

632 tree: Union[TreeObject, DictionaryObject],

633 retval: Any,

634 fileobj: Any,

635 stack: list[PdfObject],

636 ) -> None:

637 if tree in stack:

638 logger_warning(

639 "%(field_name)s already parsed",

640 source=__name__,

641 field_name=self._get_qualified_field_name(tree),

642 )

643 return

644 stack.append(tree)

645 if PagesAttributes.KIDS in tree:

646 # recurse down the tree

647 for kid in tree[PagesAttributes.KIDS]: # type: ignore[attr-defined]

648 kid = kid.get_object()

649 self.get_fields(kid, retval, fileobj, stack)

650

651 def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None:

652 field_attributes_tuple = FA.attributes()

653 field_attributes_tuple = (

654 field_attributes_tuple + CheckboxRadioButtonAttributes.attributes()

655 )

656

657 for attr in field_attributes_tuple:

658 if attr in (

659 FA.Kids,

660 FA.AA,

661 ):

662 continue

663 attr_name = field_attributes[attr]

664 try:

665 if attr == FA.FT:

666 # Make the field type value clearer

667 types = {

668 "/Btn": "Button",

669 "/Tx": "Text",

670 "/Ch": "Choice",

671 "/Sig": "Signature",

672 }

673 if field[attr] in types:

674 fileobj.write(f"{attr_name}: {types[field[attr]]}\n")

675 elif attr == FA.Parent:

676 # Let's just write the name of the parent

677 try:

678 name = field[attr][FA.TM]

679 except KeyError:

680 name = field[attr][FA.T]

681 fileobj.write(f"{attr_name}: {name}\n")

682 else:

683 fileobj.write(f"{attr_name}: {field[attr]}\n")

684 except KeyError:

685 # Field attribute is N/A or unknown, so don't write anything

686 pass

687

688 def get_form_text_fields(self, full_qualified_name: bool = False) -> dict[str, Any]:

689 """

690 Retrieve form fields from the document with textual data.

691

692 Args:

693 full_qualified_name: to get full name

694

695 Returns:

696 A dictionary. The key is the name of the form field,

697 the value is the content of the field.

698

699 If the document contains multiple form fields with the same name, the

700 second and following will get the suffix .2, .3, ...

701

702 """

703

704 def indexed_key(k: str, fields: dict[Any, Any]) -> str:

705 if k not in fields:

706 return k

707 return (

708 k

709 + "."

710 + str(sum(1 for kk in fields if kk.startswith(k + ".")) + 2)

711 )

712

713 # Retrieve document form fields

714 formfields = self.get_fields()

715 if formfields is None:

716 return {}

717 ff = {}

718 for field, value in formfields.items():

719 if value.get("/FT") == "/Tx":

720 if full_qualified_name:

721 ff[field] = value.get("/V")

722 else:

723 ff[indexed_key(cast(str, value["/T"]), ff)] = value.get("/V")

724 return ff

725

726 def get_pages_showing_field(

727 self, field: Union[Field, PdfObject, IndirectObject]

728 ) -> list[PageObject]:

729 """

730 Provides list of pages where the field is called.

731

732 Args:

733 field: Field Object, PdfObject or IndirectObject referencing a Field

734

735 Returns:

736 List of pages:

737 - Empty list:

738 The field has no widgets attached

739 (either hidden field or ancestor field).

740 - Single page list:

741 Page where the widget is present

742 (most common).

743 - Multi-page list:

744 Field with multiple kids widgets

745 (example: radio buttons, field repeated on multiple pages).

746

747 """

748 try:

749 # to cope with all types

750 field = cast(DictionaryObject, field.indirect_reference.get_object()) # type: ignore[union-attr]

751 except Exception as exc:

752 raise ValueError("Field type is invalid") from exc

753 if is_null_or_none(field.get_inherited(key="/FT", default=None)):

754 raise ValueError("Field is not valid")

755 ret = []

756 if field.get("/Subtype", "") == "/Widget":

757 if "/P" in field:

758 ret = [field["/P"].get_object()]

759 else:

760 ret = [

761 p

762 for p in self.pages

763 if field.indirect_reference in p.get("/Annots", "")

764 ]

765 else:

766 kids = field.get("/Kids", ())

767 for k in kids:

768 k = k.get_object()

769 if (k.get("/Subtype", "") == "/Widget") and ("/T" not in k):

770 # Kid that is just a widget, not a field:

771 if "/P" in k:

772 ret += [k["/P"].get_object()]

773 else:

774 ret += [

775 p

776 for p in self.pages

777 if k.indirect_reference in p.get("/Annots", "")

778 ]

779 return [

780 x

781 if isinstance(x, PageObject)

782 else (self.pages[self._get_page_number_by_indirect(x.indirect_reference)]) # type: ignore[index, union-attr]

783 for x in ret

784 ]

785

786 @property

787 def open_destination(

788 self,

789 ) -> Union[None, Destination, TextStringObject, ByteStringObject]:

790 """

791 Property to access the opening destination (``/OpenAction`` entry in

792 the PDF catalog). It returns ``None`` if the entry does not exist

793 or is not set.

794

795 Raises:

796 Exception: If a destination is invalid.

797

798 """

799 if "/OpenAction" not in self.root_object:

800 return None

801 oa: Any = self.root_object["/OpenAction"]

802 if isinstance(oa, bytes): # pragma: no cover

803 oa = oa.decode()

804 if isinstance(oa, str):

805 return create_string_object(oa)

806 if isinstance(oa, ArrayObject):

807 try:

808 page, typ, *array = oa

809 fit = Fit(typ, tuple(array))

810 return Destination("OpenAction", page, fit)

811 except Exception as exc:

812 raise Exception(f"Invalid Destination {oa}: {exc}")

813 else:

814 return None

815

816 @open_destination.setter

817 def open_destination(self, dest: Union[None, str, Destination, PageObject]) -> None:

818 raise NotImplementedError("No setter for open_destination")

819

820 @property

821 def outline(self) -> OutlineType:

822 """

823 Read-only property for the outline present in the document

824 (i.e., a collection of 'outline items' which are also known as

825 'bookmarks').

826 """

827 return self._get_outline()

828

829 def _get_outline(

830 self,

831 node: Optional[DictionaryObject] = None,

832 outline: Optional[Any] = None,

833 visited: Optional[set[int]] = None,

834 ) -> OutlineType:

835 if outline is None:

836 outline = []

837 catalog = self.root_object

838

839 # get the outline dictionary and named destinations

840 if CO.OUTLINES in catalog:

841 lines = cast(DictionaryObject, catalog[CO.OUTLINES])

842

843 if isinstance(lines, NullObject):

844 return outline

845

846 # §12.3.3 Document outline, entries in the outline dictionary

847 if not is_null_or_none(lines) and "/First" in lines:

848 node = cast(DictionaryObject, lines["/First"])

849 self._named_destinations = self._get_named_destinations()

850

851 if node is None:

852 return outline

853

854 # see if there are any more outline items

855 if visited is None:

856 visited = set()

857 while True:

858 node_id = id(node)

859 if node_id in visited:

860 logger_warning("Detected cycle in outline structure for %(node)s", source=__name__, node=node)

861 break

862 visited.add(node_id)

863

864 outline_obj = self._build_outline_item(node)

865 if outline_obj:

866 outline.append(outline_obj)

867

868 # check for sub-outline

869 if "/First" in node:

870 sub_outline: list[Any] = []

871 # Pass a copy to allow multiple outer entries to reference the same inner one.

872 inner_visited = visited.copy()

873 self._get_outline(

874 node=cast(DictionaryObject, node["/First"]),

875 outline=sub_outline,

876 visited=inner_visited,

877 )

878 if sub_outline:

879 outline.append(sub_outline)

880

881 if "/Next" not in node:

882 break

883 node = cast(DictionaryObject, node["/Next"])

884

885 return outline

886

887 @property

888 def threads(self) -> Optional[ArrayObject]:

889 """

890 Read-only property for the list of threads.

891

892 See §12.4.3 from the PDF 1.7 or 2.0 specification.

893

894 It is an array of dictionaries with "/F" (the first bead in the thread)

895 and "/I" (a thread information dictionary containing information about

896 the thread, such as its title, author, and creation date) properties or

897 None if there are no articles.

898

899 Since PDF 2.0 it can also contain an indirect reference to a metadata

900 stream containing information about the thread, such as its title,

901 author, and creation date.

902 """

903 catalog = self.root_object

904 if CO.THREADS in catalog:

905 return cast("ArrayObject", catalog[CO.THREADS])

906 return None

907

908 @abstractmethod

909 def _get_page_number_by_indirect(

910 self, indirect_reference: Union[None, int, NullObject, IndirectObject]

911 ) -> Optional[int]:

912 ... # pragma: no cover

913

914 def get_page_number(self, page: PageObject) -> Optional[int]:

915 """

916 Retrieve page number of a given PageObject.

917

918 Args:

919 page: The page to get page number. Should be

920 an instance of :class:`PageObject<pypdf._page.PageObject>`

921

922 Returns:

923 The page number or None if page is not found

924

925 """

926 return self._get_page_number_by_indirect(page.indirect_reference)

927

928 def get_destination_page_number(self, destination: Destination) -> Optional[int]:

929 """

930 Retrieve page number of a given Destination object.

931

932 Args:

933 destination: The destination to get page number.

934

935 Returns:

936 The page number or None if page is not found

937

938 """

939 return self._get_page_number_by_indirect(destination.page)

940

941 def _build_destination(

942 self,

943 title: Union[str, bytes],

944 array: Optional[

945 list[

946 Union[NumberObject, IndirectObject, None, NullObject, DictionaryObject]

947 ]

948 ],

949 ) -> Destination:

950 page, typ = None, None

951 # handle outline items with missing or invalid destination

952 if (

953 isinstance(array, (NullObject, str))

954 or (isinstance(array, ArrayObject) and len(array) == 0)

955 or array is None

956 ):

957 page = NullObject()

958 return Destination(title, page, Fit.fit())

959 page, typ, *array = array # type: ignore[assignment]

960 try:

961 return Destination(title, page, Fit(fit_type=typ, fit_args=array)) # type: ignore[arg-type]

962 except PdfReadError:

963 logger_warning("Unknown destination: %(title)r %(array)s", source=__name__, title=title, array=array)

964 if self.strict:

965 raise

966 # create a link to first Page

967 tmp = self.pages[0].indirect_reference

968 indirect_reference = NullObject() if tmp is None else tmp

969 return Destination(title, indirect_reference, Fit.fit())

970

971 def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:

972 dest, title, outline_item = None, None, None

973

974 # title required for valid outline

975 # §12.3.3, entries in an outline item dictionary

976 try:

977 title = cast("str", node["/Title"])

978 except KeyError:

979 if self.strict:

980 raise PdfReadError(f"Outline Entry Missing /Title attribute: {node!r}")

981 title = ""

982

983 if "/A" in node:

984 # Action, PDF 1.7 and PDF 2.0 §12.6 (only type GoTo supported)

985 action = cast(DictionaryObject, node["/A"])

986 action_type = cast(NameObject, action[GoToActionArguments.S])

987 if action_type == "/GoTo":

988 if GoToActionArguments.D in action:

989 dest = action[GoToActionArguments.D]

990 elif self.strict:

991 raise PdfReadError(f"Outline Action Missing /D attribute: {node!r}")

992 elif "/Dest" in node:

993 # Destination, PDF 1.7 and PDF 2.0 §12.3.2

994 dest = node["/Dest"]

995 # if array was referenced in another object, will be a dict w/ key "/D"

996 if isinstance(dest, DictionaryObject) and "/D" in dest:

997 dest = dest["/D"]

998

999 if isinstance(dest, ArrayObject):

1000 outline_item = self._build_destination(title, dest)

1001 elif isinstance(dest, str):

1002 # named destination, addresses NameObject Issue #193

1003 # TODO: Keep named destination instead of replacing it?

1004 try:

1005 outline_item = self._build_destination(

1006 title, self._named_destinations[dest].dest_array

1007 )

1008 except KeyError:

1009 # named destination not found in Name Dict

1010 outline_item = self._build_destination(title, None)

1011 elif dest is None:

1012 # outline item not required to have destination or action

1013 # PDFv1.7 Table 153

1014 outline_item = self._build_destination(title, dest)

1015 else:

1016 if self.strict:

1017 raise PdfReadError(f"Unexpected destination {dest!r}")

1018 logger_warning(

1019 "Removed unexpected destination %(dest)r from destination",

1020 source=__name__,

1021 dest=dest,

1022 )

1023 outline_item = self._build_destination(title, None)

1024

1025 # if outline item created, add color, format, and child count if present

1026 if outline_item:

1027 if "/C" in node:

1028 # Color of outline item font in (R, G, B) with values ranging 0.0-1.0

1029 outline_item[NameObject("/C")] = ArrayObject(FloatObject(c) for c in node["/C"]) # type: ignore[attr-defined]

1030 if "/F" in node:

1031 # specifies style characteristics bold and/or italic

1032 # with 1=italic, 2=bold, 3=both

1033 outline_item[NameObject("/F")] = node["/F"]

1034 if "/Count" in node:

1035 # absolute value = num. visible children

1036 # with positive = open/unfolded, negative = closed/folded

1037 outline_item[NameObject("/Count")] = node["/Count"]

1038 # if count is 0 we will consider it as open (to have available is_open)

1039 outline_item[NameObject("/%is_open%")] = BooleanObject(

1040 node.get("/Count", 0) >= 0

1041 )

1042 outline_item.node = node

1043 try:

1044 outline_item.indirect_reference = node.indirect_reference

1045 except AttributeError:

1046 pass

1047 return outline_item

1048

1049 @property

1050 def pages(self) -> list[PageObject]:

1051 """

1052 Property that emulates a list of :class:`PageObject<pypdf._page.PageObject>`.

1053 This property allows to get a page or a range of pages.

1054

1055 Note:

1056 For PdfWriter only: Provides the capability to remove a page/range of

1057 page from the list (using the del operator). Remember: Only the page

1058 entry is removed, as the objects beneath can be used elsewhere. A

1059 solution to completely remove them - if they are not used anywhere - is

1060 to write to a buffer/temporary file and then load it into a new

1061 PdfWriter.

1062

1063 """

1064 return _VirtualList(self.get_num_pages, self.get_page) # type: ignore[return-value]

1065

1066 @property

1067 def page_labels(self) -> list[str]:

1068 """

1069 A list of labels for the pages in this document.

1070

1071 This property is read-only. The labels are in the order that the pages

1072 appear in the document.

1073 """

1074 return [page_index2page_label(self, i) for i in range(len(self.pages))]

1075

1076 @property

1077 def page_layout(self) -> Optional[str]:

1078 """

1079 Get the page layout currently being used.

1080

1081 .. list-table:: Valid ``layout`` values

1082 :widths: 50 200

1083

1084 * - /NoLayout

1085 - Layout explicitly not specified

1086 * - /SinglePage

1087 - Show one page at a time

1088 * - /OneColumn

1089 - Show one column at a time

1090 * - /TwoColumnLeft

1091 - Show pages in two columns, odd-numbered pages on the left

1092 * - /TwoColumnRight

1093 - Show pages in two columns, odd-numbered pages on the right

1094 * - /TwoPageLeft

1095 - Show two pages at a time, odd-numbered pages on the left

1096 * - /TwoPageRight

1097 - Show two pages at a time, odd-numbered pages on the right

1098 """

1099 try:

1100 return cast(NameObject, self.root_object[CD.PAGE_LAYOUT])

1101 except KeyError:

1102 return None

1103

1104 @property

1105 def page_mode(self) -> Optional[PagemodeType]:

1106 """

1107 Get the page mode currently being used.

1108

1109 .. list-table:: Valid ``mode`` values

1110 :widths: 50 200

1111

1112 * - /UseNone

1113 - Do not show outline or thumbnails panels

1114 * - /UseOutlines

1115 - Show outline (aka bookmarks) panel

1116 * - /UseThumbs

1117 - Show page thumbnails panel

1118 * - /FullScreen

1119 - Fullscreen view

1120 * - /UseOC

1121 - Show Optional Content Group (OCG) panel

1122 * - /UseAttachments

1123 - Show attachments panel

1124 """

1125 try:

1126 return self.root_object["/PageMode"] # type: ignore[return-value]

1127 except KeyError:

1128 return None

1129

1130 def _flatten(

1131 self,

1132 list_only: bool = False,

1133 pages: Union[None, DictionaryObject, PageObject] = None,

1134 inherit: Optional[dict[str, Any]] = None,

1135 indirect_reference: Optional[IndirectObject] = None,

1136 ) -> None:

1137 """

1138 Process the document pages to ease searching.

1139

1140 Attributes of a page may inherit from ancestor nodes

1141 in the page tree. Flattening means moving

1142 any inheritance data into descendant nodes,

1143 effectively removing the inheritance dependency.

1144

1145 Note: It is distinct from another use of "flattening" applied to PDFs.

1146 Flattening a PDF also means combining all the contents into one single layer

1147 and making the file less editable.

1148

1149 Args:

1150 list_only: Will only list the pages within _flatten_pages.

1151 pages:

1152 inherit:

1153 indirect_reference: Used recursively to flatten the /Pages object.

1154

1155 """

1156 inheritable_page_attributes = (

1157 NameObject(PG.RESOURCES),

1158 NameObject(PG.MEDIABOX),

1159 NameObject(PG.CROPBOX),

1160 NameObject(PG.ROTATE),

1161 )

1162 if inherit is None:

1163 inherit = {}

1164 if is_null_or_none(pages):

1165 # Fix issue 327: set flattened_pages attribute only for

1166 # decrypted file

1167 catalog = self.root_object

1168 pages = catalog.get("/Pages").get_object() # type: ignore[union-attr]

1169 if not isinstance(pages, DictionaryObject):

1170 raise PdfReadError("Invalid object in /Pages")

1171 self.flattened_pages = []

1172 assert pages is not None, "mypy"

1173

1174 if PagesAttributes.TYPE in pages:

1175 t = cast(str, pages[PagesAttributes.TYPE])

1176 # if the page tree node has no /Type, consider as a page if /Kids is also missing

1177 elif PagesAttributes.KIDS not in pages:

1178 t = "/Page"

1179 else:

1180 t = "/Pages"

1181

1182 if t == "/Pages":

1183 for attr in inheritable_page_attributes:

1184 if attr in pages:

1185 inherit[attr] = pages[attr]

1186 pages_reference = getattr(pages, "indirect_reference", object())

1187 for page in cast(ArrayObject, pages[PagesAttributes.KIDS]):

1188 if getattr(page, "indirect_reference", object()) == pages_reference:

1189 raise PdfReadError("Detected cyclic page references.")

1190

1191 addt = {}

1192 if isinstance(page, IndirectObject):

1193 addt["indirect_reference"] = page

1194 obj = page.get_object()

1195 if obj:

1196 # damaged file may have invalid child in /Pages

1197 try:

1198 self._flatten(list_only, obj, inherit, **addt)

1199 except RecursionError:

1200 raise PdfReadError(

1201 "Maximum recursion depth reached during page flattening."

1202 )

1203 elif t == "/Page":

1204 for attr_in, value in inherit.items():

1205 # if the page has its own value, it does not inherit the

1206 # parent's value

1207 if attr_in not in pages:

1208 pages[attr_in] = value

1209 page_obj = PageObject(self, indirect_reference)

1210 if not list_only:

1211 page_obj.update(pages)

1212

1213 # TODO: Could flattened_pages be None at this point?

1214 self.flattened_pages.append(page_obj) # type: ignore[union-attr]

1215

1216 def remove_page(

1217 self,

1218 page: Union[int, PageObject, IndirectObject],

1219 clean: bool = False,

1220 ) -> None:

1221 """

1222 Remove page from pages list.

1223

1224 Args:

1225 page:

1226 * :class:`int`: Page number to be removed.

1227 * :class:`~pypdf._page.PageObject`: page to be removed. If the page appears many times

1228 only the first one will be removed.

1229 * :class:`~pypdf.generic.IndirectObject`: Reference to page to be removed.

1230

1231 clean: replace PageObject with NullObject to prevent annotations

1232 or destinations to reference a detached page.

1233

1234 """

1235 if self.flattened_pages is None:

1236 self._flatten(self._readonly)

1237 assert self.flattened_pages is not None

1238 if isinstance(page, IndirectObject):

1239 p = page.get_object()

1240 if not isinstance(p, PageObject):

1241 logger_warning("IndirectObject is not referencing a page", source=__name__)

1242 return

1243 page = p

1244

1245 if not isinstance(page, int):

1246 try:

1247 page = self.flattened_pages.index(page)

1248 except ValueError:

1249 logger_warning("Cannot find page in pages", source=__name__)

1250 return

1251 if not (0 <= page < len(self.flattened_pages)):

1252 logger_warning("Page number is out of range", source=__name__)

1253 return

1254

1255 ind = self.pages[page].indirect_reference

1256 del self.pages[page]

1257 if clean and ind is not None:

1258 self._replace_object(ind, NullObject())

1259

1260 def _get_indirect_object(self, num: int, gen: int) -> Optional[PdfObject]:

1261 """

1262 Used to ease development.

1263

1264 This is equivalent to generic.IndirectObject(num,gen,self).get_object()

1265

1266 Args:

1267 num: The object number of the indirect object.

1268 gen: The generation number of the indirect object.

1269

1270 Returns:

1271 A PdfObject

1272

1273 """

1274 return IndirectObject(num, gen, self).get_object()

1275

1276 def decode_permissions(

1277 self, permissions_code: int

1278 ) -> NoReturn: # pragma: no cover

1279 """Take the permissions as an integer, return the allowed access."""

1280 deprecation_with_replacement(

1281 old_name="decode_permissions",

1282 new_name="user_access_permissions",

1283 removed_in="5.0.0",

1284 )

1285

1286 @property

1287 def user_access_permissions(self) -> Optional[UserAccessPermissions]:

1288 """

1289 Get the user access permissions for encrypted documents.

1290 Returns None if not encrypted.

1291

1292 .. warning::

1293

1294 For AES-256 encrypted documents (R=5/R=6), the returned

1295 permissions are derived from the ``/P`` field, which is

1296 only trustworthy if the ``/Perms`` integrity check passed.

1297 Check :attr:`are_permissions_valid` to verify.

1298 """

1299 if self._encryption is None:

1300 return None

1301 return UserAccessPermissions(self._encryption.P)

1302

1303 @property

1304 def are_permissions_valid(self) -> Optional[bool]:

1305 """

1306 Whether the ``/Perms`` integrity check passed for this document.

1307

1308 For AES-256 encrypted documents (R=5/R=6), the ``/Perms`` field

1309 is an encrypted copy of the permissions that can be verified

1310 independently. Returns ``False`` if this check fails (the ``/P``

1311 permissions may have been tampered with).

1312

1313 Returns ``None`` if the document is not encrypted or has not yet

1314 been decrypted via :meth:`decrypt()<pypdf.PdfReader.decrypt>`.

1315 Returns ``True`` for non-AES-256 encryption (no ``/Perms`` to check).

1316 """

1317 if self._encryption is None:

1318 return None

1319 if not self._encryption.is_decrypted():

1320 return None

1321 return self._encryption._are_permissions_valid

1322

1323 @property

1324 @abstractmethod

1325 def is_encrypted(self) -> bool:

1326 """

1327 Read-only boolean property showing whether this PDF file is encrypted.

1328

1329 Note that this property, if true, will remain true even after the

1330 :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called.

1331 """

1332 ... # pragma: no cover

1333

1334 @property

1335 def xfa(self) -> Optional[dict[str, Any]]:

1336 retval: dict[str, Any] = {}

1337 catalog = self.root_object

1338

1339 if "/AcroForm" not in catalog or not catalog["/AcroForm"]:

1340 return None

1341

1342 tree = cast(TreeObject, catalog["/AcroForm"])

1343

1344 if "/XFA" in tree:

1345 fields = cast(ArrayObject, tree["/XFA"])

1346 i = iter(fields)

1347 for f in i:

1348 tag = f

1349 f = next(i)

1350 if isinstance(f, IndirectObject):

1351 field = cast(Optional[EncodedStreamObject], f.get_object())

1352 if field:

1353 es = _decompress_with_limit(field._data)

1354 retval[tag] = es

1355 return retval

1356

1357 @property

1358 def attachments(self) -> Mapping[str, list[bytes]]:

1359 """Mapping of attachment filenames to their content."""

1360 return LazyDict(

1361 {

1362 name: (self._get_attachment_list, name)

1363 for name in self._list_attachments()

1364 }

1365 )

1366

1367 @property

1368 def attachment_list(self) -> Generator[EmbeddedFile, None, None]:

1369 """Iterable of attachment objects."""

1370 yield from EmbeddedFile._load(self.root_object)

1371

1372 def _list_attachments(self) -> list[str]:

1373 """

1374 Retrieves the list of filenames of file attachments.

1375

1376 Returns:

1377 list of filenames

1378

1379 """

1380 names = []

1381 for entry in self.attachment_list:

1382 names.append(entry.name)

1383 if (name := entry.alternative_name) != entry.name and name:

1384 names.append(name)

1385 return names

1386

1387 def _get_attachment_list(self, name: str) -> list[bytes]:

1388 out = self._get_attachments(name)[name]

1389 if isinstance(out, list):

1390 return out

1391 return [out]

1392

1393 def _get_attachments(

1394 self, filename: Optional[str] = None

1395 ) -> dict[str, Union[bytes, list[bytes]]]:

1396 """

1397 Retrieves all or selected file attachments of the PDF as a dictionary of file names

1398 and the file data as a bytestring.

1399

1400 Args:

1401 filename: If filename is None, then a dictionary of all attachments

1402 will be returned, where the key is the filename and the value

1403 is the content. Otherwise, a dictionary with just a single key

1404 - the filename - and its content will be returned.

1405

1406 Returns:

1407 dictionary of filename -> Union[bytestring or List[ByteString]]

1408 If the filename exists multiple times a list of the different versions will be provided.

1409

1410 """

1411 attachments: dict[str, Union[bytes, list[bytes]]] = {}

1412 for entry in self.attachment_list:

1413 names = set()

1414 alternative_name = entry.alternative_name

1415 if filename is not None:

1416 if filename in {entry.name, alternative_name}:

1417 name = entry.name if filename == entry.name else alternative_name

1418 names.add(name)

1419 else:

1420 continue

1421 else:

1422 names = {entry.name, alternative_name}

1423

1424 for name in names:

1425 if name is None:

1426 continue

1427 if name in attachments:

1428 if not isinstance(attachments[name], list):

1429 attachments[name] = [attachments[name]] # type:ignore

1430 attachments[name].append(entry.content) # type:ignore

1431 else:

1432 attachments[name] = entry.content

1433 return attachments

1434

1435 @abstractmethod

1436 def _repr_mimebundle_(

1437 self,

1438 include: Union[None, Iterable[str]] = None,

1439 exclude: Union[None, Iterable[str]] = None,

1440 ) -> dict[str, Any]:

1441 """

1442 Integration into Jupyter Notebooks.

1443

1444 This method returns a dictionary that maps a mime-type to its

1445 representation.

1446

1447 .. seealso::

1448

1449 https://ipython.readthedocs.io/en/stable/config/integrating.html

1450 """

1451 ... # pragma: no cover

1452

1453

1454class LazyDict(Mapping[Any, Any]):

1455 def __init__(self, *args: Any, **kwargs: Any) -> None:

1456 self._raw_dict = dict(*args, **kwargs)

1457

1458 def __getitem__(self, key: str) -> Any:

1459 func, arg = self._raw_dict.__getitem__(key)

1460 return func(arg)

1461

1462 def __iter__(self) -> Iterator[Any]:

1463 return iter(self._raw_dict)

1464

1465 def __len__(self) -> int:

1466 return len(self._raw_dict)

1467

1468 def __str__(self) -> str:

1469 return f"LazyDict(keys={list(self.keys())})"

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/_doc_common.py: 22%

656 statements