1"""
2<Program Name>
3 formats.py
4
5<Author>
6 Geremy Condra
7 Vladimir Diaz <vladimir.v.diaz@gmail.com>
8
9<Started>
10 Refactored April 30, 2012. -vladimir.v.diaz
11
12<Copyright>
13 2008-2011 The Tor Project, Inc
14 2012-2016 New York University and the TUF contributors
15 2016-2021 Securesystemslib contributors
16 See LICENSE for licensing information.
17
18<Purpose>
19 Implements canonical json (OLPC) encoder.
20
21"""
22
23from typing import Callable, Optional, Union
24
25from securesystemslib import exceptions
26
27
28def _canonical_string_encoder(string: str) -> str:
29 """
30 <Purpose>
31 Encode 'string' to canonical string format. By using the escape sequence ('\')
32 which is mandatory to use for quote and backslash.
33 backslash: \\ translates to \\\\
34 quote: \" translates to \\".
35
36 <Arguments>
37 string:
38 The string to encode.
39
40 <Exceptions>
41 None.
42
43 <Side Effects>
44 None.
45
46 <Returns>
47 A string with the canonical-encoded 'string' embedded.
48 """
49 string = '"{}"'.format(string.replace("\\", "\\\\").replace('"', '\\"'))
50
51 return string
52
53
54def _encode_canonical(
55 object: Union[bool, None, str, int, tuple, list, dict], output_function: Callable
56) -> None:
57 # Helper for encode_canonical. Older versions of json.encoder don't
58 # even let us replace the separators.
59
60 if isinstance(object, str):
61 output_function(_canonical_string_encoder(object))
62 elif object is True:
63 output_function("true")
64 elif object is False:
65 output_function("false")
66 elif object is None:
67 output_function("null")
68 elif isinstance(object, int):
69 output_function(str(object))
70 elif isinstance(object, (tuple, list)):
71 output_function("[")
72 if len(object):
73 for item in object[:-1]:
74 _encode_canonical(item, output_function)
75 output_function(",")
76 _encode_canonical(object[-1], output_function)
77 output_function("]")
78 elif isinstance(object, dict):
79 output_function("{")
80 if len(object):
81 items = sorted(object.items())
82 for key, value in items[:-1]:
83 output_function(_canonical_string_encoder(key))
84 output_function(":")
85 _encode_canonical(value, output_function)
86 output_function(",")
87 key, value = items[-1]
88 output_function(_canonical_string_encoder(key))
89 output_function(":")
90 _encode_canonical(value, output_function)
91 output_function("}")
92 else:
93 raise exceptions.FormatError("I cannot encode " + repr(object))
94
95
96def encode_canonical(
97 object: Union[bool, None, str, int, tuple, list, dict],
98 output_function: Optional[Callable] = None,
99) -> Union[str, None]:
100 """
101 <Purpose>
102 Encoding an object so that it is always has the same string format
103 independent of the original format. This allows to compute always the same hash
104 or signature for that object.
105
106 Encode 'object' in canonical JSON form, as specified at
107 http://wiki.laptop.org/go/Canonical_JSON . It's a restricted
108 dialect of JSON in which keys are always lexically sorted,
109 there is no whitespace, floats aren't allowed, and only quote
110 and backslash get escaped. The result is encoded in UTF-8,
111 and the resulting bits are passed to output_function (if provided),
112 or joined into a string and returned.
113
114 Note: This function should be called prior to computing the hash or
115 signature of a JSON object in securesystemslib. For example, generating a
116 signature of a signing role object such as 'ROOT_SCHEMA' is required to
117 ensure repeatable hashes are generated across different json module
118 versions and platforms. Code elsewhere is free to dump JSON objects in any
119 format they wish (e.g., utilizing indentation and single quotes around
120 object keys). These objects are only required to be in "canonical JSON"
121 format when their hashes or signatures are needed.
122
123 >>> encode_canonical("")
124 '""'
125 >>> encode_canonical([1, 2, 3])
126 '[1,2,3]'
127 >>> encode_canonical([])
128 '[]'
129 >>> encode_canonical({"A": [99]})
130 '{"A":[99]}'
131 >>> encode_canonical({"x" : 3, "y" : 2})
132 '{"x":3,"y":2}'
133
134 <Arguments>
135 object:
136 The object to be encoded.
137
138 output_function:
139 The result will be passed as arguments to 'output_function'
140 (e.g., output_function('result')).
141
142 <Exceptions>
143 securesystemslib.exceptions.FormatError, if 'object' cannot be encoded or
144 'output_function' is not callable.
145
146 <Side Effects>
147 The results are fed to 'output_function()' if 'output_function' is set.
148
149 <Returns>
150 A string representing the 'object' encoded in canonical JSON form.
151 """
152
153 result: Union[None, list] = None
154 # If 'output_function' is unset, treat it as
155 # appending to a list.
156 if output_function is None:
157 result = []
158 output_function = result.append
159
160 try:
161 _encode_canonical(object, output_function)
162
163 except (TypeError, exceptions.FormatError) as e:
164 message: str = "Could not encode " + repr(object) + ": " + str(e)
165 raise exceptions.FormatError(message)
166
167 # Return the encoded 'object' as a string.
168 # Note: Implies 'output_function' is None,
169 # otherwise results are sent to 'output_function'.
170 if result is not None:
171 return "".join(result)
172 return None