Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pikepdf/models/metadata/_constants.py: 80%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

50 statements  

1# SPDX-FileCopyrightText: 2022 James R. Barlow 

2# SPDX-License-Identifier: MPL-2.0 

3 

4"""XMP metadata constants, templates, and utilities.""" 

5 

6from __future__ import annotations 

7 

8import re 

9from collections.abc import Callable, Iterable 

10from typing import NamedTuple 

11 

12from lxml import etree 

13from lxml.etree import QName 

14 

15# XMP Namespace URIs 

16XMP_NS_DC = "http://purl.org/dc/elements/1.1/" 

17XMP_NS_PDF = "http://ns.adobe.com/pdf/1.3/" 

18XMP_NS_PDFA_ID = "http://www.aiim.org/pdfa/ns/id/" 

19XMP_NS_PDFA_EXTENSION = "http://www.aiim.org/pdfa/ns/extension/" 

20XMP_NS_PDFA_PROPERTY = "http://www.aiim.org/pdfa/ns/property#" 

21XMP_NS_PDFA_SCHEMA = "http://www.aiim.org/pdfa/ns/schema#" 

22XMP_NS_PDFUA_ID = "http://www.aiim.org/pdfua/ns/id/" 

23XMP_NS_PDFX_ID = "http://www.npes.org/pdfx/ns/id/" 

24XMP_NS_PHOTOSHOP = "http://ns.adobe.com/photoshop/1.0/" 

25XMP_NS_PRISM = "http://prismstandard.org/namespaces/basic/1.0/" 

26XMP_NS_PRISM2 = "http://prismstandard.org/namespaces/basic/2.0/" 

27XMP_NS_PRISM3 = "http://prismstandard.org/namespaces/basic/3.0/" 

28XMP_NS_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" 

29XMP_NS_XMP = "http://ns.adobe.com/xap/1.0/" 

30XMP_NS_XMP_MM = "http://ns.adobe.com/xap/1.0/mm/" 

31XMP_NS_XMP_RIGHTS = "http://ns.adobe.com/xap/1.0/rights/" 

32 

33# This one should not be registered with lxml 

34XMP_NS_XML = "http://www.w3.org/XML/1998/namespace" 

35 

36DEFAULT_NAMESPACES: list[tuple[str, str]] = [ 

37 ('adobe:ns:meta/', 'x'), 

38 (XMP_NS_DC, 'dc'), 

39 (XMP_NS_PDF, 'pdf'), 

40 (XMP_NS_PDFA_ID, 'pdfaid'), 

41 (XMP_NS_PDFA_EXTENSION, 'pdfaExtension'), 

42 (XMP_NS_PDFA_PROPERTY, 'pdfaProperty'), 

43 (XMP_NS_PDFA_SCHEMA, 'pdfaSchema'), 

44 (XMP_NS_PDFUA_ID, 'pdfuaid'), 

45 (XMP_NS_PDFX_ID, 'pdfxid'), 

46 (XMP_NS_PHOTOSHOP, 'photoshop'), 

47 (XMP_NS_PRISM, 'prism'), 

48 (XMP_NS_PRISM2, 'prism2'), 

49 (XMP_NS_PRISM3, 'prism3'), 

50 (XMP_NS_RDF, 'rdf'), 

51 (XMP_NS_XMP, 'xmp'), 

52 (XMP_NS_XMP_MM, 'xmpMM'), 

53 (XMP_NS_XMP_RIGHTS, 'xmpRights'), 

54 ('http://crossref.org/crossmark/1.0/', 'crossmark'), 

55 ('http://www.niso.org/schemas/jav/1.0/', 'jav'), 

56 ('http://ns.adobe.com/pdfx/1.3/', 'pdfx'), 

57 ('http://www.niso.org/schemas/ali/1.0/', 'ali'), 

58] 

59 

60# Register all namespaces with lxml 

61for _uri, _prefix in DEFAULT_NAMESPACES: 

62 etree.register_namespace(_prefix, _uri) 

63 

64# XMP packet wrappers 

65XPACKET_BEGIN = b"""<?xpacket begin="\xef\xbb\xbf" id="W5M0MpCehiHzreSzNTczkc9d"?>\n""" 

66 

67XMP_EMPTY = b"""<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="pikepdf"> 

68 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 

69 </rdf:RDF> 

70</x:xmpmeta> 

71""" 

72 

73XPACKET_END = b"""\n<?xpacket end="w"?>\n""" 

74 

75 

76class XmpContainer(NamedTuple): 

77 """Map XMP container object to suitable Python container.""" 

78 

79 rdf_type: str 

80 py_type: type 

81 insert_fn: Callable[..., None] 

82 

83 

84class AltList(list): 

85 """XMP AltList container for language alternatives.""" 

86 

87 

88XMP_CONTAINERS = [ 

89 XmpContainer('Alt', AltList, AltList.append), 

90 XmpContainer('Bag', set, set.add), 

91 XmpContainer('Seq', list, list.append), 

92] 

93 

94LANG_ALTS = frozenset( 

95 [ 

96 str(QName(XMP_NS_DC, 'title')), 

97 str(QName(XMP_NS_DC, 'description')), 

98 str(QName(XMP_NS_DC, 'rights')), 

99 str(QName(XMP_NS_XMP_RIGHTS, 'UsageTerms')), 

100 ] 

101) 

102 

103# These are the illegal characters in XML 1.0. (XML 1.1 is a bit more permissive, 

104# but we'll be strict to ensure wider compatibility.) 

105re_xml_illegal_chars = re.compile( 

106 r"(?u)[^\x09\x0A\x0D\x20-\U0000D7FF\U0000E000-\U0000FFFD\U00010000-\U0010FFFF]" 

107) 

108re_xml_illegal_bytes = re.compile(rb"[^\x09\x0A\x0D\x20-\xFF]|&#0;") 

109 

110 

111def clean(s: str | Iterable[str], joiner: str = '; ') -> str: 

112 """Ensure an object can safely be inserted in a XML tag body. 

113 

114 If we still have a non-str object at this point, the best option is to 

115 join it, because it's apparently calling for a new node in a place that 

116 isn't allowed in the spec or not supported. 

117 """ 

118 from warnings import warn 

119 

120 if not isinstance(s, str): 

121 if isinstance(s, Iterable): 

122 warn(f"Merging elements of {s}") 

123 if isinstance(s, set): 

124 s = joiner.join(sorted(s)) 

125 else: 

126 s = joiner.join(s) 

127 else: 

128 raise TypeError("object must be a string or iterable of strings") 

129 return re_xml_illegal_chars.sub('', s)