Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pikepdf/models/metadata/

1# SPDX-FileCopyrightText: 2022 James R. Barlow

2# SPDX-License-Identifier: MPL-2.0

4"""XMP metadata constants, templates, and utilities."""

6from __future__ import annotations

8import re

9from collections.abc import Callable, Iterable

10from typing import Any, NamedTuple

12# XMP Namespace URIs

13XMP_NS_DC = "http://purl.org/dc/elements/1.1/"

14XMP_NS_PDF = "http://ns.adobe.com/pdf/1.3/"

15XMP_NS_PDFA_ID = "http://www.aiim.org/pdfa/ns/id/"

16XMP_NS_PDFA_EXTENSION = "http://www.aiim.org/pdfa/ns/extension/"

17XMP_NS_PDFA_PROPERTY = "http://www.aiim.org/pdfa/ns/property#"

18XMP_NS_PDFA_SCHEMA = "http://www.aiim.org/pdfa/ns/schema#"

19XMP_NS_PDFUA_ID = "http://www.aiim.org/pdfua/ns/id/"

20XMP_NS_PDFX_ID = "http://www.npes.org/pdfx/ns/id/"

21XMP_NS_PHOTOSHOP = "http://ns.adobe.com/photoshop/1.0/"

22XMP_NS_PRISM = "http://prismstandard.org/namespaces/basic/1.0/"

23XMP_NS_PRISM2 = "http://prismstandard.org/namespaces/basic/2.0/"

24XMP_NS_PRISM3 = "http://prismstandard.org/namespaces/basic/3.0/"

25XMP_NS_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"

26XMP_NS_XMP = "http://ns.adobe.com/xap/1.0/"

27XMP_NS_XMP_MM = "http://ns.adobe.com/xap/1.0/mm/"

28XMP_NS_XMP_RIGHTS = "http://ns.adobe.com/xap/1.0/rights/"

30# This one should not be registered with lxml

31XMP_NS_XML = "http://www.w3.org/XML/1998/namespace"

33DEFAULT_NAMESPACES: list[tuple[str, str]] = [

34 ('adobe:ns:meta/', 'x'),

35 (XMP_NS_DC, 'dc'),

36 (XMP_NS_PDF, 'pdf'),

37 (XMP_NS_PDFA_ID, 'pdfaid'),

38 (XMP_NS_PDFA_EXTENSION, 'pdfaExtension'),

39 (XMP_NS_PDFA_PROPERTY, 'pdfaProperty'),

40 (XMP_NS_PDFA_SCHEMA, 'pdfaSchema'),

41 (XMP_NS_PDFUA_ID, 'pdfuaid'),

42 (XMP_NS_PDFX_ID, 'pdfxid'),

43 (XMP_NS_PHOTOSHOP, 'photoshop'),

44 (XMP_NS_PRISM, 'prism'),

45 (XMP_NS_PRISM2, 'prism2'),

46 (XMP_NS_PRISM3, 'prism3'),

47 (XMP_NS_RDF, 'rdf'),

48 (XMP_NS_XMP, 'xmp'),

49 (XMP_NS_XMP_MM, 'xmpMM'),

50 (XMP_NS_XMP_RIGHTS, 'xmpRights'),

51 ('http://crossref.org/crossmark/1.0/', 'crossmark'),

52 ('http://www.niso.org/schemas/jav/1.0/', 'jav'),

53 ('http://ns.adobe.com/pdfx/1.3/', 'pdfx'),

54 ('http://www.niso.org/schemas/ali/1.0/', 'ali'),

55]

58# XMP packet wrappers

59XPACKET_BEGIN = b"""<?xpacket begin="\xef\xbb\xbf" id="W5M0MpCehiHzreSzNTczkc9d"?>\n"""

61XMP_EMPTY = b"""<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="pikepdf">

62 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">

63 </rdf:RDF>

64</x:xmpmeta>

65"""

67XPACKET_END = b"""\n<?xpacket end="w"?>\n"""

70class XmpContainer(NamedTuple):

71 """Map XMP container object to suitable Python container."""

73 rdf_type: str

74 py_type: type

75 insert_fn: Callable[..., None]

78class AltList(list):

79 """XMP AltList container for language alternatives."""

82XMP_CONTAINERS = [

83 XmpContainer('Alt', AltList, AltList.append),

84 XmpContainer('Bag', set, set.add),

85 XmpContainer('Seq', list, list.append),

86]

89_LANG_ALTS_LAZY = [

90 (XMP_NS_DC, 'title'),

91 (XMP_NS_DC, 'description'),

92 (XMP_NS_DC, 'rights'),

93 (XMP_NS_XMP_RIGHTS, 'UsageTerms'),

94]

96_LOADED_LXML_NAMESPACES = False

98# lxml lazy-loading

99def __getattr__(name: str) -> Any:

100 global _LOADED_LXML_NAMESPACES

101

102 if name == 'LANG_ALTS':

103 from lxml.etree import QName

104

105 if not _LOADED_LXML_NAMESPACES:

106 from lxml import etree

107 # Register all namespaces with lxml

108 for _uri, _prefix in DEFAULT_NAMESPACES:

109 etree.register_namespace(_prefix, _uri)

110 _LOADED_LXML_NAMESPACES = True

111

112 val = frozenset([str(QName(x, y)) for x,y in _LANG_ALTS_LAZY])

113 globals()[name] = val

114

115 return val

116

117 raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

118

119

120# These are the illegal characters in XML 1.0. (XML 1.1 is a bit more permissive,

121# but we'll be strict to ensure wider compatibility.)

122re_xml_illegal_chars = re.compile(

123 r"(?u)[^\x09\x0A\x0D\x20-\U0000D7FF\U0000E000-\U0000FFFD\U00010000-\U0010FFFF]"

124)

125re_xml_illegal_bytes = re.compile(rb"[^\x09\x0A\x0D\x20-\xFF]|")

126

127

128def clean(s: str | Iterable[str], joiner: str = '; ') -> str:

129 """Ensure an object can safely be inserted in a XML tag body.

130

131 If we still have a non-str object at this point, the best option is to

132 join it, because it's apparently calling for a new node in a place that

133 isn't allowed in the spec or not supported.

134 """

135 from warnings import warn

136

137 if not isinstance(s, str):

138 if isinstance(s, Iterable):

139 warn(f"Merging elements of {s}")

140 if isinstance(s, set):

141 s = joiner.join(sorted(s))

142 else:

143 s = joiner.join(s)

144 else:

145 raise TypeError("object must be a string or iterable of strings")

146 return re_xml_illegal_chars.sub('', s)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pikepdf/models/metadata/_constants.py: 68%

59 statements