1# Copyright 2024 The Sigstore Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Machinery for serializing ML models."""
16
17import abc
18from collections.abc import Iterable
19import pathlib
20
21from model_signing import manifest
22
23
24def check_file_or_directory(
25 path: pathlib.Path, *, allow_symlinks: bool = False
26) -> None:
27 """Checks that the given path is either a file or a directory.
28
29 There is no support for sockets, pipes, or any other operating system
30 concept abstracted as a file.
31
32 Furthermore, this would raise if the path is a broken symlink, if it doesn't
33 exists or if there are permission errors.
34
35 Args:
36 path: The path to check.
37 allow_symlinks: Controls whether symbolic links are included. If a
38 symlink is present but the flag is `False` (default) the
39 serialization would raise an error.
40
41 Raises:
42 ValueError: The path is neither a file or a directory, or the path
43 is a symlink and `allow_symlinks` is false.
44 """
45 if not allow_symlinks and path.is_symlink():
46 raise ValueError(
47 f"Cannot use '{path}' because it is a symlink. This"
48 " behavior can be changed with `allow_symlinks`."
49 )
50 if not (path.is_file() or path.is_dir()):
51 raise ValueError(
52 f"Cannot use '{path}' as file or directory. It could be a"
53 " special file, it could be missing, or there might be a"
54 " permission issue."
55 )
56
57
58def should_ignore(
59 path: pathlib.Path, ignore_paths: Iterable[pathlib.Path]
60) -> bool:
61 """Determines if the provided path should be ignored during serialization.
62
63 Args:
64 path: The path to check.
65 ignore_paths: The paths to ignore while serializing a model.
66
67 Returns:
68 Whether or not the provided path should be ignored.
69 """
70 return any(path.is_relative_to(ignore_path) for ignore_path in ignore_paths)
71
72
73class Serializer(metaclass=abc.ABCMeta):
74 """Generic ML model format serializer."""
75
76 @abc.abstractmethod
77 def serialize(
78 self,
79 model_path: pathlib.Path,
80 *,
81 ignore_paths: Iterable[pathlib.Path] = frozenset(),
82 ) -> manifest.Manifest:
83 """Serializes the model given by the `model_path` argument.
84
85 Args:
86 model_path: The path to the model.
87 ignore_paths: The paths to ignore during serialization. If a
88 provided path is a directory, all children of the directory are
89 ignored.
90
91 Returns:
92 The model's serialized manifest.
93 """