1# Copyright 2024 The Sigstore Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Machinery for computing digests for a single object.
16
17The `Digest` object represents the algorithm used to summarize (hash) an object
18(file, chunk of file, etc.) and the value of the digest. These are the values
19that are stored in signature files.
20
21To support multiple hashing formats, we define an abstract `HashEngine` class
22which can be used in type annotations and is at the root of the hashing classes
23hierarchy.
24
25To support updating the hash of an object when more data is being added, we also
26define a `Streaming` protocol which we then use to define an abstract
27`StreamingHashEngine`.
28
29These two types of hashing engines are used to hash any of the objects that we
30can generate a signature over.
31"""
32
33import abc
34import dataclasses
35from typing import Protocol
36
37
38@dataclasses.dataclass(frozen=True)
39class Digest:
40 """A digest computed by a `HashEngine`.
41
42 Attributes:
43 algorithm: The algorithm used to compute the digest. This could be a
44 canonical name (e.g. "sha256" for SHA256) or a name that uniquely
45 encodes the algorithm being used for the purposes of this library
46 (e.g., "sha256-sharded-1024" for a digest produced by computing SHA256
47 hashes of shards of 1024 bytes of the object). This name can be used
48 to autodetect the hashing configuration used during signing so that
49 verification can compute a similar digest.
50 digest_value: The value of the digest.
51 """
52
53 algorithm: str
54 digest_value: bytes
55
56 @property
57 def digest_hex(self) -> str:
58 """Hexadecimal, human readable, equivalent of `digest`."""
59 return self.digest_value.hex()
60
61 @property
62 def digest_size(self) -> int:
63 """The size, in bytes, of the digest."""
64 return len(self.digest_value)
65
66
67class HashEngine(metaclass=abc.ABCMeta):
68 """Generic hash engine."""
69
70 @abc.abstractmethod
71 def compute(self) -> Digest:
72 """Computes the digest of data passed to the engine."""
73
74 @property
75 @abc.abstractmethod
76 def digest_name(self) -> str:
77 """The canonical name of the algorithm used to compute the hash.
78
79 Subclasses MUST use the `digest_name()` method to record all parameters
80 that influence the hash output. For example, if a file is split into
81 shards which are hashed separately and the final digest value is
82 computed by aggregating these hashes, then the shard size must be given
83 in the output string.
84
85 This name gets transferred to the `algorithm` field of the `Digest`
86 computed by the hashing engine.
87 """
88
89 @property
90 @abc.abstractmethod
91 def digest_size(self) -> int:
92 """The size, in bytes, of the digests produced by the engine.
93
94 This must return the same value as calling `digest_size` on the `Digest`
95 object produced by the hashing engine.
96 """
97
98
99class Streaming(Protocol):
100 """A protocol to support streaming data to `HashEngine` objects."""
101
102 @abc.abstractmethod
103 def update(self, data: bytes) -> None:
104 """Appends additional bytes to the data to be hashed.
105
106 Args:
107 data: The new data that should be hashed.
108 """
109
110 @abc.abstractmethod
111 def reset(self, data: bytes = b"") -> None:
112 """Resets the data to be hashed to the passed argument.
113
114 Args:
115 data: Optional, initial data to hash.
116 """
117
118
119class StreamingHashEngine(Streaming, HashEngine):
120 """A `HashEngine` that can stream data to be hashed."""