1# hash.py -- Object format abstraction layer for Git
2# Copyright (C) 2024 The Dulwich contributors
3#
4# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6# General Public License as published by the Free Software Foundation; version 2.0
7# or (at your option) any later version. You can redistribute it and/or
8# modify it under the terms of either of these two licenses.
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16# You should have received a copy of the licenses; if not, see
17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19# License, Version 2.0.
20#
21
22"""Object format abstraction for Git objects.
23
24This module provides an abstraction layer for different object formats
25used in Git repositories (SHA-1 and SHA-256).
26"""
27
28from collections.abc import Callable
29from hashlib import sha1, sha256
30from typing import TYPE_CHECKING
31
32if TYPE_CHECKING:
33 from _hashlib import HASH
34
35
36class ObjectFormat:
37 """Object format (hash algorithm) used in Git."""
38
39 def __init__(
40 self,
41 name: str,
42 type_num: int,
43 oid_length: int,
44 hex_length: int,
45 hash_func: Callable[[], "HASH"],
46 ) -> None:
47 """Initialize an object format.
48
49 Args:
50 name: Name of the format (e.g., "sha1", "sha256")
51 type_num: Format type number used in Git
52 oid_length: Length of the binary object ID in bytes
53 hex_length: Length of the hexadecimal object ID in characters
54 hash_func: Hash function from hashlib
55 """
56 self.name = name
57 self.type_num = type_num
58 self.oid_length = oid_length
59 self.hex_length = hex_length
60 self.hash_func = hash_func
61
62 def __str__(self) -> str:
63 """Return string representation."""
64 return self.name
65
66 def __repr__(self) -> str:
67 """Return repr."""
68 return f"ObjectFormat({self.name!r})"
69
70 def new_hash(self) -> "HASH":
71 """Create a new hash object."""
72 return self.hash_func()
73
74 def hash_object(self, data: bytes) -> bytes:
75 """Hash data and return the digest.
76
77 Args:
78 data: Data to hash
79
80 Returns:
81 Binary digest
82 """
83 h = self.new_hash()
84 h.update(data)
85 return h.digest()
86
87 def hash_object_hex(self, data: bytes) -> bytes:
88 """Hash data and return the hexadecimal digest.
89
90 Args:
91 data: Data to hash
92
93 Returns:
94 Hexadecimal digest as bytes
95 """
96 h = self.new_hash()
97 h.update(data)
98 return h.hexdigest().encode("ascii")
99
100
101# Define the supported object formats
102SHA1 = ObjectFormat("sha1", type_num=1, oid_length=20, hex_length=40, hash_func=sha1)
103SHA256 = ObjectFormat(
104 "sha256", type_num=20, oid_length=32, hex_length=64, hash_func=sha256
105)
106
107# Map of format names to ObjectFormat instances
108OBJECT_FORMATS = {
109 "sha1": SHA1,
110 "sha256": SHA256,
111}
112
113# Map of format numbers to ObjectFormat instances
114OBJECT_FORMAT_TYPE_NUMS = {
115 1: SHA1,
116 2: SHA256,
117}
118
119# Default format for backward compatibility
120DEFAULT_OBJECT_FORMAT = SHA1
121
122
123def get_object_format(name: str | None = None) -> ObjectFormat:
124 """Get an object format by name.
125
126 Args:
127 name: Format name ("sha1" or "sha256"). If None, returns default.
128
129 Returns:
130 ObjectFormat instance
131
132 Raises:
133 ValueError: If the format name is not supported
134 """
135 if name is None:
136 return DEFAULT_OBJECT_FORMAT
137 try:
138 return OBJECT_FORMATS[name.lower()]
139 except KeyError:
140 raise ValueError(f"Unsupported object format: {name}")
141
142
143def verify_same_object_format(*formats: ObjectFormat) -> ObjectFormat:
144 """Verify that all provided object formats are the same.
145
146 Args:
147 *formats: Object format instances to verify
148
149 Returns:
150 The common object format
151
152 Raises:
153 ValueError: If formats don't match or no formats provided
154 """
155 if not formats:
156 raise ValueError("At least one object format must be provided")
157
158 first = formats[0]
159 for fmt in formats[1:]:
160 if fmt != first:
161 raise ValueError(f"Object format mismatch: {first.name} != {fmt.name}")
162
163 return first