Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pikepdf/models/metadata/

1# SPDX-FileCopyrightText: 2022 James R. Barlow

2# SPDX-License-Identifier: MPL-2.0

4"""XMP metadata constants, templates, and utilities."""

6from __future__ import annotations

8import re

9from collections.abc import Callable, Iterable

10from typing import NamedTuple

12from lxml import etree

13from lxml.etree import QName

15# XMP Namespace URIs

16XMP_NS_DC = "http://purl.org/dc/elements/1.1/"

17XMP_NS_PDF = "http://ns.adobe.com/pdf/1.3/"

18XMP_NS_PDFA_ID = "http://www.aiim.org/pdfa/ns/id/"

19XMP_NS_PDFA_EXTENSION = "http://www.aiim.org/pdfa/ns/extension/"

20XMP_NS_PDFA_PROPERTY = "http://www.aiim.org/pdfa/ns/property#"

21XMP_NS_PDFA_SCHEMA = "http://www.aiim.org/pdfa/ns/schema#"

22XMP_NS_PDFUA_ID = "http://www.aiim.org/pdfua/ns/id/"

23XMP_NS_PDFX_ID = "http://www.npes.org/pdfx/ns/id/"

24XMP_NS_PHOTOSHOP = "http://ns.adobe.com/photoshop/1.0/"

25XMP_NS_PRISM = "http://prismstandard.org/namespaces/basic/1.0/"

26XMP_NS_PRISM2 = "http://prismstandard.org/namespaces/basic/2.0/"

27XMP_NS_PRISM3 = "http://prismstandard.org/namespaces/basic/3.0/"

28XMP_NS_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"

29XMP_NS_XMP = "http://ns.adobe.com/xap/1.0/"

30XMP_NS_XMP_MM = "http://ns.adobe.com/xap/1.0/mm/"

31XMP_NS_XMP_RIGHTS = "http://ns.adobe.com/xap/1.0/rights/"

33# This one should not be registered with lxml

34XMP_NS_XML = "http://www.w3.org/XML/1998/namespace"

36DEFAULT_NAMESPACES: list[tuple[str, str]] = [

37 ('adobe:ns:meta/', 'x'),

38 (XMP_NS_DC, 'dc'),

39 (XMP_NS_PDF, 'pdf'),

40 (XMP_NS_PDFA_ID, 'pdfaid'),

41 (XMP_NS_PDFA_EXTENSION, 'pdfaExtension'),

42 (XMP_NS_PDFA_PROPERTY, 'pdfaProperty'),

43 (XMP_NS_PDFA_SCHEMA, 'pdfaSchema'),

44 (XMP_NS_PDFUA_ID, 'pdfuaid'),

45 (XMP_NS_PDFX_ID, 'pdfxid'),

46 (XMP_NS_PHOTOSHOP, 'photoshop'),

47 (XMP_NS_PRISM, 'prism'),

48 (XMP_NS_PRISM2, 'prism2'),

49 (XMP_NS_PRISM3, 'prism3'),

50 (XMP_NS_RDF, 'rdf'),

51 (XMP_NS_XMP, 'xmp'),

52 (XMP_NS_XMP_MM, 'xmpMM'),

53 (XMP_NS_XMP_RIGHTS, 'xmpRights'),

54 ('http://crossref.org/crossmark/1.0/', 'crossmark'),

55 ('http://www.niso.org/schemas/jav/1.0/', 'jav'),

56 ('http://ns.adobe.com/pdfx/1.3/', 'pdfx'),

57 ('http://www.niso.org/schemas/ali/1.0/', 'ali'),

58]

60# Register all namespaces with lxml

61for _uri, _prefix in DEFAULT_NAMESPACES:

62 etree.register_namespace(_prefix, _uri)

64# XMP packet wrappers

65XPACKET_BEGIN = b"""<?xpacket begin="\xef\xbb\xbf" id="W5M0MpCehiHzreSzNTczkc9d"?>\n"""

67XMP_EMPTY = b"""<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="pikepdf">

68 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">

69 </rdf:RDF>

70</x:xmpmeta>

71"""

73XPACKET_END = b"""\n<?xpacket end="w"?>\n"""

76class XmpContainer(NamedTuple):

77 """Map XMP container object to suitable Python container."""

79 rdf_type: str

80 py_type: type

81 insert_fn: Callable[..., None]

84class AltList(list):

85 """XMP AltList container for language alternatives."""

88XMP_CONTAINERS = [

89 XmpContainer('Alt', AltList, AltList.append),

90 XmpContainer('Bag', set, set.add),

91 XmpContainer('Seq', list, list.append),

92]

94LANG_ALTS = frozenset(

95 [

96 str(QName(XMP_NS_DC, 'title')),

97 str(QName(XMP_NS_DC, 'description')),

98 str(QName(XMP_NS_DC, 'rights')),

99 str(QName(XMP_NS_XMP_RIGHTS, 'UsageTerms')),

100 ]

101)

102

103# These are the illegal characters in XML 1.0. (XML 1.1 is a bit more permissive,

104# but we'll be strict to ensure wider compatibility.)

105re_xml_illegal_chars = re.compile(

106 r"(?u)[^\x09\x0A\x0D\x20-\U0000D7FF\U0000E000-\U0000FFFD\U00010000-\U0010FFFF]"

107)

108re_xml_illegal_bytes = re.compile(rb"[^\x09\x0A\x0D\x20-\xFF]|")

109

110

111def clean(s: str | Iterable[str], joiner: str = '; ') -> str:

112 """Ensure an object can safely be inserted in a XML tag body.

113

114 If we still have a non-str object at this point, the best option is to

115 join it, because it's apparently calling for a new node in a place that

116 isn't allowed in the spec or not supported.

117 """

118 from warnings import warn

119

120 if not isinstance(s, str):

121 if isinstance(s, Iterable):

122 warn(f"Merging elements of {s}")

123 if isinstance(s, set):

124 s = joiner.join(sorted(s))

125 else:

126 s = joiner.join(s)

127 else:

128 raise TypeError("object must be a string or iterable of strings")

129 return re_xml_illegal_chars.sub('', s)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pikepdf/models/metadata/_constants.py: 80%

50 statements