1"""
2<Program Name>
3 hash.py
4
5<Author>
6 Vladimir Diaz <vladimir.v.diaz@gmail.com>
7
8<Started>
9 February 28, 2012. Based on a previous version of this module.
10
11<Copyright>
12 See LICENSE for licensing information.
13
14<Purpose>
15 Support secure hashing and message digests. Any hash-related routines that
16 securesystemslib requires should be located in this module. Simplifying the
17 creation of digest objects, and providing a central location for hash
18 routines are the main goals of this module. Support routines implemented
19 include functions to create digest objects given a filename or file object.
20 Only the standard hashlib library is currently supported, but
21 pyca/cryptography support will be added in the future.
22"""
23
24import hashlib
25
26from securesystemslib import exceptions
27from securesystemslib.storage import FilesystemBackend
28
29DEFAULT_CHUNK_SIZE = 4096
30DEFAULT_HASH_ALGORITHM = "sha256"
31DEFAULT_HASH_LIBRARY = "hashlib"
32SUPPORTED_LIBRARIES = ["hashlib"]
33
34
35# If `pyca_crypto` is installed, add it to supported libraries
36try:
37 import binascii
38
39 from cryptography.hazmat.backends import default_backend
40 from cryptography.hazmat.primitives import hashes as _pyca_hashes
41
42 # Dictionary of `pyca/cryptography` supported hash algorithms.
43 PYCA_DIGEST_OBJECTS_CACHE = {
44 "sha224": _pyca_hashes.SHA224,
45 "sha256": _pyca_hashes.SHA256,
46 "sha384": _pyca_hashes.SHA384,
47 "sha512": _pyca_hashes.SHA512,
48 }
49
50 SUPPORTED_LIBRARIES.append("pyca_crypto")
51
52 class PycaDiggestWrapper:
53 """
54 <Purpose>
55 A wrapper around `cryptography.hazmat.primitives.hashes.Hash` which adds
56 additional methods to meet expected interface for digest objects:
57
58 digest_object.digest_size
59 digest_object.hexdigest()
60 digest_object.update('data')
61 digest_object.digest()
62
63 <Properties>
64 algorithm:
65 Specific for `cryptography.hazmat.primitives.hashes.Hash` object.
66
67 digest_size:
68 Returns original's object digest size.
69
70 <Methods>
71 digest(self) -> bytes:
72 Calls original's object `finalize` method and returns digest as bytes.
73 NOTE: `cryptography.hazmat.primitives.hashes.Hash` allows calling
74 `finalize` method just once on the same instance, so everytime `digest`
75 methods is called, we replace internal object (`_digest_obj`).
76
77 hexdigest(self) -> str:
78 Returns a string hex representation of digest.
79
80 update(self, data) -> None:
81 Updates digest object data by calling the original's object `update`
82 method.
83 """
84
85 def __init__(self, digest_obj):
86 self._digest_obj = digest_obj
87
88 @property
89 def algorithm(self):
90 return self._digest_obj.algorithm
91
92 @property
93 def digest_size(self):
94 return self._digest_obj.algorithm.digest_size
95
96 def digest(self):
97 digest_obj_copy = self._digest_obj.copy()
98 digest = self._digest_obj.finalize()
99 self._digest_obj = digest_obj_copy
100 return digest
101
102 def hexdigest(self):
103 return binascii.hexlify(self.digest()).decode("utf-8")
104
105 def update(self, data):
106 self._digest_obj.update(data)
107
108except ImportError: # pragma: no cover
109 pass
110
111
112def digest(algorithm=DEFAULT_HASH_ALGORITHM, hash_library=DEFAULT_HASH_LIBRARY):
113 """
114 <Purpose>
115 Provide the caller with the ability to create digest objects without having
116 to worry about crypto library availability or which library to use. The
117 caller also has the option of specifying which hash algorithm and/or
118 library to use.
119
120 # Creation of a digest object using defaults or by specifying hash
121 # algorithm and library.
122 digest_object = securesystemslib.hash.digest()
123 digest_object = securesystemslib.hash.digest('sha384')
124 digest_object = securesystemslib.hash.digest('sha256', 'hashlib')
125
126 # The expected interface for digest objects.
127 digest_object.digest_size
128 digest_object.hexdigest()
129 digest_object.update('data')
130 digest_object.digest()
131
132 # Added hash routines by this module.
133 digest_object = securesystemslib.hash.digest_fileobject(file_object)
134 digest_object = securesystemslib.hash.digest_filename(filename)
135
136 <Arguments>
137 algorithm:
138 The hash algorithm (e.g., 'sha256', 'sha512').
139
140 hash_library:
141 The crypto library to use for the given hash algorithm (e.g., 'hashlib').
142
143 <Exceptions>
144 securesystemslib.exceptions.UnsupportedAlgorithmError, if an unsupported
145 hashing algorithm is specified, or digest could not be generated with given
146 the algorithm.
147
148 securesystemslib.exceptions.UnsupportedLibraryError, if an unsupported
149 library was requested via 'hash_library'.
150
151 <Side Effects>
152 None.
153
154 <Returns>
155 Digest object
156
157 e.g.
158 hashlib.new(algorithm) or
159 PycaDiggestWrapper object
160 """
161
162 # Was a hashlib digest object requested and is it supported?
163 # If so, return the digest object.
164 if hash_library == "hashlib" and hash_library in SUPPORTED_LIBRARIES:
165 try:
166 if algorithm == "blake2b-256":
167 return hashlib.new("blake2b", digest_size=32)
168 else:
169 return hashlib.new(algorithm)
170
171 except (ValueError, TypeError):
172 # ValueError: the algorithm value was unknown
173 # TypeError: unexpected argument digest_size (on old python)
174 raise exceptions.UnsupportedAlgorithmError(algorithm)
175
176 # Was a pyca_crypto digest object requested and is it supported?
177 elif hash_library == "pyca_crypto" and hash_library in SUPPORTED_LIBRARIES:
178 try:
179 hash_algorithm = PYCA_DIGEST_OBJECTS_CACHE[algorithm]()
180 return PycaDiggestWrapper(
181 _pyca_hashes.Hash(hash_algorithm, default_backend())
182 )
183
184 except KeyError:
185 raise exceptions.UnsupportedAlgorithmError(algorithm)
186
187 # The requested hash library is not supported.
188 else:
189 raise exceptions.UnsupportedLibraryError(
190 "Unsupported"
191 " library requested. Supported hash"
192 " libraries: " + repr(SUPPORTED_LIBRARIES)
193 )
194
195
196def digest_fileobject(
197 file_object,
198 algorithm=DEFAULT_HASH_ALGORITHM,
199 hash_library=DEFAULT_HASH_LIBRARY,
200 normalize_line_endings=False,
201):
202 """
203 <Purpose>
204 Generate a digest object given a file object. The new digest object
205 is updated with the contents of 'file_object' prior to returning the
206 object to the caller.
207
208 <Arguments>
209 file_object:
210 File object whose contents will be used as the data
211 to update the hash of a digest object to be returned.
212
213 algorithm:
214 The hash algorithm (e.g., 'sha256', 'sha512').
215
216 hash_library:
217 The library providing the hash algorithms (e.g., 'hashlib').
218
219 normalize_line_endings: (default False)
220 Whether or not to normalize line endings for cross-platform support.
221 Note that this results in ambiguous hashes (e.g. 'abc\n' and 'abc\r\n'
222 will produce the same hash), so be careful to only apply this to text
223 files (not binary), when that equivalence is desirable and cannot result
224 in easily-maliciously-corrupted files producing the same hash as a valid
225 file.
226
227 <Exceptions>
228 securesystemslib.exceptions.FormatError, if the arguments are
229 improperly formatted.
230
231 securesystemslib.exceptions.UnsupportedAlgorithmError, if an unsupported
232 hashing algorithm was specified via 'algorithm'.
233
234 securesystemslib.exceptions.UnsupportedLibraryError, if an unsupported
235 crypto library was specified via 'hash_library'.
236
237 <Side Effects>
238 None.
239
240 <Returns>
241 Digest object
242
243 e.g.
244 hashlib.new(algorithm) or
245 PycaDiggestWrapper object
246 """
247 # Digest object returned whose hash will be updated using 'file_object'.
248 # digest() raises:
249 # securesystemslib.exceptions.UnsupportedAlgorithmError
250 # securesystemslib.exceptions.UnsupportedLibraryError
251 digest_object = digest(algorithm, hash_library)
252
253 # Defensively seek to beginning, as there's no case where we don't
254 # intend to start from the beginning of the file.
255 file_object.seek(0)
256
257 # Read the contents of the file object in at most 4096-byte chunks.
258 # Update the hash with the data read from each chunk and return after
259 # the entire file is processed.
260 while True:
261 data = file_object.read(DEFAULT_CHUNK_SIZE)
262 if not data:
263 break
264
265 if normalize_line_endings:
266 while data[-1:] == b"\r":
267 c = file_object.read(1)
268 if not c:
269 break
270
271 data += c
272
273 data = (
274 data
275 # First Windows
276 .replace(b"\r\n", b"\n")
277 # Then Mac
278 .replace(b"\r", b"\n")
279 )
280
281 if not isinstance(data, bytes):
282 digest_object.update(data.encode("utf-8"))
283
284 else:
285 digest_object.update(data)
286
287 return digest_object
288
289
290def digest_filename(
291 filename,
292 algorithm=DEFAULT_HASH_ALGORITHM,
293 hash_library=DEFAULT_HASH_LIBRARY,
294 normalize_line_endings=False,
295 storage_backend=None,
296):
297 """
298 <Purpose>
299 Generate a digest object, update its hash using a file object
300 specified by filename, and then return it to the caller.
301
302 <Arguments>
303 filename:
304 The filename belonging to the file object to be used.
305
306 algorithm:
307 The hash algorithm (e.g., 'sha256', 'sha512').
308
309 hash_library:
310 The library providing the hash algorithms (e.g., 'hashlib').
311
312 normalize_line_endings:
313 Whether or not to normalize line endings for cross-platform support.
314
315 storage_backend:
316 An object which implements
317 securesystemslib.storage.StorageBackendInterface. When no object is
318 passed a FilesystemBackend will be instantiated and used.
319
320 <Exceptions>
321 securesystemslib.exceptions.UnsupportedAlgorithmError, if the given
322 'algorithm' is unsupported.
323
324 securesystemslib.exceptions.UnsupportedLibraryError, if the given
325 'hash_library' is unsupported.
326
327 securesystemslib.exceptions.StorageError, if the file cannot be opened.
328
329 <Side Effects>
330 None.
331
332 <Returns>
333 Digest object
334
335 e.g.
336 hashlib.new(algorithm) or
337 PycaDiggestWrapper object
338 """
339 digest_object = None
340
341 if storage_backend is None:
342 storage_backend = FilesystemBackend()
343
344 # Open 'filename' in read+binary mode.
345 with storage_backend.get(filename) as file_object:
346 # Create digest_object and update its hash data from file_object.
347 # digest_fileobject() raises:
348 # securesystemslib.exceptions.UnsupportedAlgorithmError
349 # securesystemslib.exceptions.UnsupportedLibraryError
350 digest_object = digest_fileobject(
351 file_object, algorithm, hash_library, normalize_line_endings
352 )
353
354 return digest_object