Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/gitdb/db/loose.py: 60%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
2#
3# This module is part of GitDB and is released under
4# the New BSD License: https://opensource.org/license/bsd-3-clause/
5from contextlib import suppress
7from gitdb.db.base import (
8 FileDBBase,
9 ObjectDBR,
10 ObjectDBW
11)
13from gitdb.exc import (
14 BadObject,
15 AmbiguousObjectName
16)
18from gitdb.stream import (
19 DecompressMemMapReader,
20 FDCompressedSha1Writer,
21 FDStream,
22 Sha1Writer
23)
25from gitdb.base import (
26 OStream,
27 OInfo
28)
30from gitdb.util import (
31 file_contents_ro_filepath,
32 ENOENT,
33 hex_to_bin,
34 bin_to_hex,
35 exists,
36 chmod,
37 isfile,
38 remove,
39 rename,
40 dirname,
41 basename,
42 join
43)
45from gitdb.fun import (
46 chunk_size,
47 loose_object_header_info,
48 write_object,
49 stream_copy
50)
52from gitdb.utils.encoding import force_bytes
54import tempfile
55import os
56import sys
57import time
60__all__ = ('LooseObjectDB', )
63class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
65 """A database which operates on loose object files"""
67 # CONFIGURATION
68 # chunks in which data will be copied between streams
69 stream_chunk_size = chunk_size
71 # On windows we need to keep it writable, otherwise it cannot be removed
72 # either
73 new_objects_mode = int("444", 8)
74 if os.name == 'nt':
75 new_objects_mode = int("644", 8)
77 def __init__(self, root_path):
78 super().__init__(root_path)
79 self._hexsha_to_file = dict()
80 # Additional Flags - might be set to 0 after the first failure
81 # Depending on the root, this might work for some mounts, for others not, which
82 # is why it is per instance
83 self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
85 #{ Interface
86 def object_path(self, hexsha):
87 """
88 :return: path at which the object with the given hexsha would be stored,
89 relative to the database root"""
90 return join(hexsha[:2], hexsha[2:])
92 def readable_db_object_path(self, hexsha):
93 """
94 :return: readable object path to the object identified by hexsha
95 :raise BadObject: If the object file does not exist"""
96 with suppress(KeyError):
97 return self._hexsha_to_file[hexsha]
98 # END ignore cache misses
100 # try filesystem
101 path = self.db_path(self.object_path(hexsha))
102 if exists(path):
103 self._hexsha_to_file[hexsha] = path
104 return path
105 # END handle cache
106 raise BadObject(hexsha)
108 def partial_to_complete_sha_hex(self, partial_hexsha):
109 """:return: 20 byte binary sha1 string which matches the given name uniquely
110 :param name: hexadecimal partial name (bytes or ascii string)
111 :raise AmbiguousObjectName:
112 :raise BadObject: """
113 candidate = None
114 for binsha in self.sha_iter():
115 if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)):
116 # it can't ever find the same object twice
117 if candidate is not None:
118 raise AmbiguousObjectName(partial_hexsha)
119 candidate = binsha
120 # END for each object
121 if candidate is None:
122 raise BadObject(partial_hexsha)
123 return candidate
125 #} END interface
127 def _map_loose_object(self, sha):
128 """
129 :return: memory map of that file to allow random read access
130 :raise BadObject: if object could not be located"""
131 db_path = self.db_path(self.object_path(bin_to_hex(sha)))
132 try:
133 return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
134 except OSError as e:
135 if e.errno != ENOENT:
136 # try again without noatime
137 try:
138 return file_contents_ro_filepath(db_path)
139 except OSError as new_e:
140 raise BadObject(sha) from new_e
141 # didn't work because of our flag, don't try it again
142 self._fd_open_flags = 0
143 else:
144 raise BadObject(sha) from e
145 # END handle error
146 # END exception handling
148 def set_ostream(self, stream):
149 """:raise TypeError: if the stream does not support the Sha1Writer interface"""
150 if stream is not None and not isinstance(stream, Sha1Writer):
151 raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
152 return super().set_ostream(stream)
154 def info(self, sha):
155 m = self._map_loose_object(sha)
156 try:
157 typ, size = loose_object_header_info(m)
158 return OInfo(sha, typ, size)
159 finally:
160 if hasattr(m, 'close'):
161 m.close()
162 # END assure release of system resources
164 def stream(self, sha):
165 m = self._map_loose_object(sha)
166 type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True)
167 return OStream(sha, type, size, stream)
169 def has_object(self, sha):
170 try:
171 self.readable_db_object_path(bin_to_hex(sha))
172 return True
173 except BadObject:
174 return False
175 # END check existence
177 def store(self, istream):
178 """note: The sha we produce will be hex by nature"""
179 tmp_path = None
180 writer = self.ostream()
181 if writer is None:
182 # open a tmp file to write the data to
183 fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
185 if istream.binsha is None:
186 writer = FDCompressedSha1Writer(fd)
187 else:
188 writer = FDStream(fd)
189 # END handle direct stream copies
190 # END handle custom writer
192 try:
193 try:
194 if istream.binsha is not None:
195 # copy as much as possible, the actual uncompressed item size might
196 # be smaller than the compressed version
197 stream_copy(istream.read, writer.write, sys.maxsize, self.stream_chunk_size)
198 else:
199 # write object with header, we have to make a new one
200 write_object(istream.type, istream.size, istream.read, writer.write,
201 chunk_size=self.stream_chunk_size)
202 # END handle direct stream copies
203 finally:
204 if tmp_path:
205 writer.close()
206 # END assure target stream is closed
207 except:
208 if tmp_path:
209 remove(tmp_path)
210 raise
211 # END assure tmpfile removal on error
213 hexsha = None
214 if istream.binsha:
215 hexsha = istream.hexsha
216 else:
217 hexsha = writer.sha(as_hex=True)
218 # END handle sha
220 if tmp_path:
221 obj_path = self.db_path(self.object_path(hexsha))
222 obj_dir = dirname(obj_path)
223 os.makedirs(obj_dir, exist_ok=True)
224 # END handle destination directory
225 # rename onto existing doesn't work on NTFS
226 if isfile(obj_path):
227 remove(tmp_path)
228 else:
229 rename(tmp_path, obj_path)
230 # end rename only if needed
232 # Ensure rename is actually done and file is stable
233 # Retry up to 14 times - exponential wait & retry in ms.
234 # The total maximum wait time is 1000ms, which should be vastly enough for the
235 # OS to return and commit the file to disk.
236 for exp_backoff_ms in [1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 181]:
237 with suppress(PermissionError):
238 # make sure its readable for all ! It started out as rw-- tmp file
239 # but needs to be rwrr
240 chmod(obj_path, self.new_objects_mode)
241 break
242 time.sleep(exp_backoff_ms / 1000.0)
243 else:
244 raise PermissionError(
245 "Impossible to apply `chmod` to file {}".format(obj_path)
246 )
248 # END handle dry_run
250 istream.binsha = hex_to_bin(hexsha)
251 return istream
253 def sha_iter(self):
254 # find all files which look like an object, extract sha from there
255 for root, dirs, files in os.walk(self.root_path()):
256 root_base = basename(root)
257 if len(root_base) != 2:
258 continue
260 for f in files:
261 if len(f) != 38:
262 continue
263 yield hex_to_bin(root_base + f)
264 # END for each file
265 # END for each walk iteration
267 def size(self):
268 return len(tuple(self.sha_iter()))