Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/model_signing/_signing/signing.py: 78%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

87 statements  

1# Copyright 2024 The Sigstore Authors 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Machinery for signing and verification of ML models. 

16 

17The serialization API produces a manifest representation of the models, and we 

18use that to implement integrity checking of models in different computational 

19patterns. This means that all manifests need to be kept only in memory. 

20 

21Since we need to support multiple signing methods (e.g., Sigstore, key, 

22certificate, etc.) , we provide a `Signer` abstract class with a single `sign` 

23method that takes a signing payload and converts it to a signature in the 

24supported format. 

25 

26Finally, every signature needs to be verified. We pair every `Signer` subclass 

27with a `Verifier` which takes a signature, verify the authenticity of the 

28payload and then expand that to a manifest. 

29 

30Regarding the data formats, for signing, we need to convert the manifest to the 

31signing payload. We only support manifests serialized to in-toto formats 

32described by https://github.com/in-toto/attestation/tree/main/spec/v1. The 

33envelope format is DSSE, as described in 

34https://github.com/secure-systems-lab/dsse. The signature is in a Sigstore 

35bundle format over the DSSE payload. The format is described at 

36https://docs.sigstore.dev/about/bundle/. 

37""" 

38 

39import abc 

40import json 

41import pathlib 

42import sys 

43from typing import Any 

44 

45from in_toto_attestation.v1 import statement 

46 

47from model_signing import manifest 

48from model_signing._hashing import hashing 

49from model_signing._hashing import memory 

50 

51 

52if sys.version_info >= (3, 11): 

53 from typing import Self 

54else: 

55 from typing_extensions import Self 

56 

57 

58# The expected in-toto payload type for the signature. 

59_IN_TOTO_JSON_PAYLOAD_TYPE: str = "application/vnd.in-toto+json" 

60 

61 

62# The expected in-toto statement type for the signature. 

63_IN_TOTO_STATEMENT_TYPE: str = "https://in-toto.io/Statement/v1" 

64 

65 

66# The expected model signature predicate type. 

67_PREDICATE_TYPE: str = "https://model_signing/signature/v1.0" 

68 

69 

70# The expected model signature predicate type for v0.2 (compat) 

71_PREDICATE_TYPE_COMPAT: str = "https://model_signing/Digests/v0.1" 

72 

73 

74def dsse_payload_to_manifest(dsse_payload: dict[str, Any]) -> manifest.Manifest: 

75 """Builds a manifest from the DSSE payload read from a signature. 

76 

77 The payload here is a dictionary that represents the payload part of the 

78 DSSE envelope contained in the Sigstore bundle. 

79 

80 Args: 

81 payload: The in-toto DSSE envelope to convert to manifest. 

82 

83 Returns: 

84 A manifest representing the signed model. 

85 

86 Raises: 

87 ValueError: The payload cannot be deserialized to a manifest. 

88 """ 

89 obtained_predicate_type = dsse_payload["predicateType"] 

90 if obtained_predicate_type != _PREDICATE_TYPE: 

91 if obtained_predicate_type == _PREDICATE_TYPE_COMPAT: 

92 return dsse_payload_to_manifest_compat(dsse_payload) 

93 raise ValueError( 

94 f"Predicate type mismatch, expected {_PREDICATE_TYPE}, " 

95 f"got {obtained_predicate_type}" 

96 ) 

97 

98 subjects = dsse_payload["subject"] 

99 if len(subjects) != 1: 

100 raise ValueError(f"Expected only one subject, got {subjects}") 

101 

102 model_name = subjects[0]["name"] 

103 expected_digest = subjects[0]["digest"]["sha256"] 

104 

105 predicate = dsse_payload["predicate"] 

106 serialization_args = predicate["serialization"] 

107 serialization = manifest.SerializationType.from_args(serialization_args) 

108 

109 hasher = memory.SHA256() 

110 items = [] 

111 for resource in predicate["resources"]: 

112 name = resource["name"] 

113 algorithm = resource["algorithm"] 

114 digest_value = resource["digest"] 

115 digest = hashing.Digest(algorithm, bytes.fromhex(digest_value)) 

116 hasher.update(digest.digest_value) 

117 items.append(serialization.new_item(name, digest)) 

118 

119 obtained_digest = hasher.compute().digest_hex 

120 if obtained_digest != expected_digest: 

121 raise ValueError( 

122 f"Manifest is inconsistent. Root digest is {expected_digest}, " 

123 f"but the included resources hash to {obtained_digest}" 

124 ) 

125 

126 return manifest.Manifest(model_name, items, serialization) 

127 

128 

129def dsse_payload_to_manifest_compat( 

130 dsse_payload: dict[str, Any], 

131) -> manifest.Manifest: 

132 """Builds a manifest from the DSSE payload read from a signature. 

133 

134 This is the same as `dsse_payload_to_manifest` but using a DSSE payload as 

135 defined at v0.2 release. This was experimental but got used in production 

136 before v1.0 so we need to patch support to it while verifiers migrate to the 

137 forward compatible format defined by v1.0. 

138 

139 Args: 

140 payload: The in-toto DSSE envelope to convert to manifest. 

141 

142 Returns: 

143 A manifest representing the signed model. 

144 

145 Raises: 

146 ValueError: The payload cannot be deserialized to a manifest. 

147 """ 

148 # Model name is not defined, use a constant. 

149 model_name = "compat-undefined-not-present" 

150 

151 # Serialization format is not present, build a fake one. 

152 serialization = manifest.SerializationType.from_args( 

153 {"method": "files", "hash_type": "sha256", "allow_symlinks": "false"} 

154 ) 

155 

156 # The only field with actual content is the subject. 

157 items = [] 

158 for subject in dsse_payload["subject"]: 

159 name = subject["name"] 

160 algorithm = "sha256" # hardcoded, the only supported one 

161 digest_value = subject["digest"][algorithm] 

162 digest = hashing.Digest(algorithm, bytes.fromhex(digest_value)) 

163 items.append(serialization.new_item(name, digest)) 

164 

165 # There is no verification that the manifest is missing items at this point. 

166 return manifest.Manifest(model_name, items, serialization) 

167 

168 

169class Payload: 

170 """In-toto payload used to represent a model for signing. 

171 

172 This payload represents all the object (files, shards, etc.) of the model 

173 paired with their hashes. It can be seen as a serialization of a manifest. 

174 The hashes are all recorded under the predicate, given that for the subject 

175 we are limited on what hashes we can use 

176 (https://github.com/sigstore/sigstore-python/issues/1018). Each hash follows 

177 the format of a ResourceDescriptor: is an object containing a name for the 

178 object, the hashing algorithm, and the digest value. These are recorded in 

179 the predicate, as part of the `"resources"` list. 

180 

181 The subject is a name for the model (taken from the model's directory) and a 

182 global digest over all the computed digests. This is SHA256 computed over 

183 all the digests, in the order they show up in the predicate (we canonicalize 

184 this to be in alphabetical order). This digest can be used to refer to the 

185 model from other metadata documents without having to carry the entire set 

186 of resource descriptors around. 

187 

188 To ensure backwards compatibility, the predicate contains a 

189 `"serialization"` section which describes the method used to serialize a 

190 model to the manifest used to generate this payload. The section includes a 

191 method name and a list of all relevant values needed to recompute the 

192 serialization. 

193 

194 Future extensions to the model signature (e.g., incorporating model cards, 

195 etc.) can be added as part of the predicate. For v1.0 of the predicate the 

196 only supported fields in the predicate are `"serialization"` and 

197 `"resources"`. Any other field should be ignored by verifiers adhering to 

198 v1.0 version. 

199 

200 Example: 

201 ```json 

202 { 

203 "_type": "https://in-toto.io/Statement/v1", 

204 "subject": [ 

205 { 

206 "name": "sample_model", 

207 "digest": { 

208 "sha256": "143cc6..." 

209 } 

210 } 

211 ], 

212 "predicateType": "https://model_signing/signature/v1.0", 

213 "predicate": { 

214 "serialization": { 

215 "method": "files", 

216 "hash_type": "sha256", 

217 "allow_symlinks": true 

218 "ignore_paths": [ 

219 "model.sig", 

220 ".git", 

221 ".gitattributes", 

222 ".github", 

223 ".gitignore" 

224 ], 

225 }, 

226 "resources": [ 

227 { 

228 "algorithm": "sha256", 

229 "digest": "fdd892...", 

230 "name": "d0/f00" 

231 }, 

232 { 

233 "algorithm": "sha256", 

234 "digest": "e16940...", 

235 "name": "d0/f01" 

236 }, 

237 { 

238 "algorithm": "sha256", 

239 "digest": "407822...", 

240 "name": "d0/f02" 

241 }, 

242 ... 

243 { 

244 "algorithm": "sha256", 

245 "digest": "912bcf...", 

246 "name": "f3" 

247 } 

248 ] 

249 } 

250 } 

251 ``` 

252 """ 

253 

254 def __init__(self, manifest: manifest.Manifest): 

255 """Builds an instance of this in-toto payload. 

256 

257 Args: 

258 manifest: the manifest to convert to signing payload. 

259 """ 

260 hasher = memory.SHA256() 

261 resources = [] 

262 for descriptor in manifest.resource_descriptors(): 

263 hasher.update(descriptor.digest.digest_value) 

264 resources.append( 

265 { 

266 "name": descriptor.identifier, 

267 "algorithm": descriptor.digest.algorithm, 

268 "digest": descriptor.digest.digest_hex, 

269 } 

270 ) 

271 

272 root_digest = {"sha256": hasher.compute().digest_hex} 

273 subject = statement.ResourceDescriptor( 

274 name=manifest.model_name, digest=root_digest 

275 ).pb 

276 

277 predicate = { 

278 "serialization": manifest.serialization_type, 

279 "resources": resources, 

280 # other properties can go here 

281 } 

282 

283 self.statement = statement.Statement( 

284 subjects=[subject], 

285 predicate_type=_PREDICATE_TYPE, 

286 predicate=predicate, 

287 ) 

288 

289 

290class Signature(metaclass=abc.ABCMeta): 

291 """Signature class, wrapping a sigstore bundle. 

292 

293 We only support sigstore bundle signature formats, but we need to have two 

294 separate classes for this given the need to support traditional signing as 

295 well as Sigstore one. One class wraps around `sigstore_models.Bundle` and 

296 the other around the bundle as defined by `sigstore_protobuf_specs`. 

297 """ 

298 

299 @abc.abstractmethod 

300 def write(self, path: pathlib.Path) -> None: 

301 """Writes the signature to disk, to the given path. 

302 

303 Args: 

304 path: The path to write the signature to. 

305 """ 

306 

307 @classmethod 

308 @abc.abstractmethod 

309 def read(cls, path: pathlib.Path) -> Self: 

310 """Reads the signature from disk. 

311 

312 Does not perform any signature verification, except what is needed to 

313 parse the signature file. 

314 

315 Args: 

316 path: The path to read the signature from. 

317 

318 Returns: 

319 An instance of the class which can be passed to a `Verifier` for 

320 signature and integrity verification. 

321 """ 

322 

323 

324class Signer(metaclass=abc.ABCMeta): 

325 """Generic signer. 

326 

327 Each signer may implement its own mechanism for managing the key material. 

328 """ 

329 

330 @abc.abstractmethod 

331 def sign(self, payload: Payload) -> Signature: 

332 """Signs the provided signing payload. 

333 

334 Args: 

335 payload: The `Payload` instance that should be signed. 

336 

337 Returns: 

338 A valid signature. 

339 """ 

340 

341 

342class Verifier(metaclass=abc.ABCMeta): 

343 """Generic signature verifier. 

344 

345 Every subclass of `Verifier` is paired with a subclass of `Signer`. This is 

346 to ensure that they support the same signature formats as well as have 

347 similar key materials. 

348 

349 If the signature is valid, the payload is expanded to a `Manifest` instance 

350 which can then be used to check the model integrity. 

351 """ 

352 

353 def verify(self, signature: Signature) -> manifest.Manifest: 

354 """Verifies the signature. 

355 

356 Args: 

357 signature: The signature to verify. 

358 

359 Returns: 

360 A `manifest.Manifest` instance that represents the model. 

361 

362 Raises: 

363 ValueError: Signature verification fails. 

364 """ 

365 payload_type, payload = self._verify_signed_content(signature) 

366 

367 if payload_type != _IN_TOTO_JSON_PAYLOAD_TYPE: 

368 raise ValueError( 

369 f"Expected DSSE payload {_IN_TOTO_JSON_PAYLOAD_TYPE}, " 

370 f"but got {payload_type}" 

371 ) 

372 

373 payload = json.loads(payload) 

374 

375 if payload["_type"] != _IN_TOTO_STATEMENT_TYPE: 

376 raise ValueError( 

377 f"Expected in-toto {_IN_TOTO_STATEMENT_TYPE} payload, " 

378 f"but got {payload['_type']}" 

379 ) 

380 

381 return dsse_payload_to_manifest(payload) 

382 

383 @abc.abstractmethod 

384 def _verify_signed_content(self, signature: Signature) -> tuple[str, bytes]: 

385 """Verifies the signed content and extract payload type and payload. 

386 

387 Subclasses only need to implement this method. 

388 

389 Args: 

390 signature: The signature to verify. 

391 

392 Returns: 

393 A tuple containing the payload type and the payload (as a JSON 

394 object loaded to a dictionary). 

395 """