Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pikepdf/models/metadata/_constants.py: 68%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

59 statements  

1# SPDX-FileCopyrightText: 2022 James R. Barlow 

2# SPDX-License-Identifier: MPL-2.0 

3 

4"""XMP metadata constants, templates, and utilities.""" 

5 

6from __future__ import annotations 

7 

8import re 

9from collections.abc import Callable, Iterable 

10from typing import Any, NamedTuple 

11 

12# XMP Namespace URIs 

13XMP_NS_DC = "http://purl.org/dc/elements/1.1/" 

14XMP_NS_PDF = "http://ns.adobe.com/pdf/1.3/" 

15XMP_NS_PDFA_ID = "http://www.aiim.org/pdfa/ns/id/" 

16XMP_NS_PDFA_EXTENSION = "http://www.aiim.org/pdfa/ns/extension/" 

17XMP_NS_PDFA_PROPERTY = "http://www.aiim.org/pdfa/ns/property#" 

18XMP_NS_PDFA_SCHEMA = "http://www.aiim.org/pdfa/ns/schema#" 

19XMP_NS_PDFUA_ID = "http://www.aiim.org/pdfua/ns/id/" 

20XMP_NS_PDFX_ID = "http://www.npes.org/pdfx/ns/id/" 

21XMP_NS_PHOTOSHOP = "http://ns.adobe.com/photoshop/1.0/" 

22XMP_NS_PRISM = "http://prismstandard.org/namespaces/basic/1.0/" 

23XMP_NS_PRISM2 = "http://prismstandard.org/namespaces/basic/2.0/" 

24XMP_NS_PRISM3 = "http://prismstandard.org/namespaces/basic/3.0/" 

25XMP_NS_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" 

26XMP_NS_XMP = "http://ns.adobe.com/xap/1.0/" 

27XMP_NS_XMP_MM = "http://ns.adobe.com/xap/1.0/mm/" 

28XMP_NS_XMP_RIGHTS = "http://ns.adobe.com/xap/1.0/rights/" 

29 

30# This one should not be registered with lxml 

31XMP_NS_XML = "http://www.w3.org/XML/1998/namespace" 

32 

33DEFAULT_NAMESPACES: list[tuple[str, str]] = [ 

34 ('adobe:ns:meta/', 'x'), 

35 (XMP_NS_DC, 'dc'), 

36 (XMP_NS_PDF, 'pdf'), 

37 (XMP_NS_PDFA_ID, 'pdfaid'), 

38 (XMP_NS_PDFA_EXTENSION, 'pdfaExtension'), 

39 (XMP_NS_PDFA_PROPERTY, 'pdfaProperty'), 

40 (XMP_NS_PDFA_SCHEMA, 'pdfaSchema'), 

41 (XMP_NS_PDFUA_ID, 'pdfuaid'), 

42 (XMP_NS_PDFX_ID, 'pdfxid'), 

43 (XMP_NS_PHOTOSHOP, 'photoshop'), 

44 (XMP_NS_PRISM, 'prism'), 

45 (XMP_NS_PRISM2, 'prism2'), 

46 (XMP_NS_PRISM3, 'prism3'), 

47 (XMP_NS_RDF, 'rdf'), 

48 (XMP_NS_XMP, 'xmp'), 

49 (XMP_NS_XMP_MM, 'xmpMM'), 

50 (XMP_NS_XMP_RIGHTS, 'xmpRights'), 

51 ('http://crossref.org/crossmark/1.0/', 'crossmark'), 

52 ('http://www.niso.org/schemas/jav/1.0/', 'jav'), 

53 ('http://ns.adobe.com/pdfx/1.3/', 'pdfx'), 

54 ('http://www.niso.org/schemas/ali/1.0/', 'ali'), 

55] 

56 

57 

58# XMP packet wrappers 

59XPACKET_BEGIN = b"""<?xpacket begin="\xef\xbb\xbf" id="W5M0MpCehiHzreSzNTczkc9d"?>\n""" 

60 

61XMP_EMPTY = b"""<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="pikepdf"> 

62 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 

63 </rdf:RDF> 

64</x:xmpmeta> 

65""" 

66 

67XPACKET_END = b"""\n<?xpacket end="w"?>\n""" 

68 

69 

70class XmpContainer(NamedTuple): 

71 """Map XMP container object to suitable Python container.""" 

72 

73 rdf_type: str 

74 py_type: type 

75 insert_fn: Callable[..., None] 

76 

77 

78class AltList(list): 

79 """XMP AltList container for language alternatives.""" 

80 

81 

82XMP_CONTAINERS = [ 

83 XmpContainer('Alt', AltList, AltList.append), 

84 XmpContainer('Bag', set, set.add), 

85 XmpContainer('Seq', list, list.append), 

86] 

87 

88 

89_LANG_ALTS_LAZY = [ 

90 (XMP_NS_DC, 'title'), 

91 (XMP_NS_DC, 'description'), 

92 (XMP_NS_DC, 'rights'), 

93 (XMP_NS_XMP_RIGHTS, 'UsageTerms'), 

94] 

95 

96_LOADED_LXML_NAMESPACES = False 

97 

98# lxml lazy-loading 

99def __getattr__(name: str) -> Any: 

100 global _LOADED_LXML_NAMESPACES 

101 

102 if name == 'LANG_ALTS': 

103 from lxml.etree import QName 

104 

105 if not _LOADED_LXML_NAMESPACES: 

106 from lxml import etree 

107 # Register all namespaces with lxml 

108 for _uri, _prefix in DEFAULT_NAMESPACES: 

109 etree.register_namespace(_prefix, _uri) 

110 _LOADED_LXML_NAMESPACES = True 

111 

112 val = frozenset([str(QName(x, y)) for x,y in _LANG_ALTS_LAZY]) 

113 globals()[name] = val 

114 

115 return val 

116 

117 raise AttributeError(f"module {__name__!r} has no attribute {name!r}") 

118 

119 

120# These are the illegal characters in XML 1.0. (XML 1.1 is a bit more permissive, 

121# but we'll be strict to ensure wider compatibility.) 

122re_xml_illegal_chars = re.compile( 

123 r"(?u)[^\x09\x0A\x0D\x20-\U0000D7FF\U0000E000-\U0000FFFD\U00010000-\U0010FFFF]" 

124) 

125re_xml_illegal_bytes = re.compile(rb"[^\x09\x0A\x0D\x20-\xFF]|&#0;") 

126 

127 

128def clean(s: str | Iterable[str], joiner: str = '; ') -> str: 

129 """Ensure an object can safely be inserted in a XML tag body. 

130 

131 If we still have a non-str object at this point, the best option is to 

132 join it, because it's apparently calling for a new node in a place that 

133 isn't allowed in the spec or not supported. 

134 """ 

135 from warnings import warn 

136 

137 if not isinstance(s, str): 

138 if isinstance(s, Iterable): 

139 warn(f"Merging elements of {s}") 

140 if isinstance(s, set): 

141 s = joiner.join(sorted(s)) 

142 else: 

143 s = joiner.join(s) 

144 else: 

145 raise TypeError("object must be a string or iterable of strings") 

146 return re_xml_illegal_chars.sub('', s)