Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/gitdb/db/loose.py: 60%

3# This module is part of GitDB and is released under

4# the New BSD License: https://opensource.org/license/bsd-3-clause/

5from contextlib import suppress

7from gitdb.db.base import (

8 FileDBBase,

9 ObjectDBR,

10 ObjectDBW

11)

13from gitdb.exc import (

14 BadObject,

15 AmbiguousObjectName

16)

18from gitdb.stream import (

19 DecompressMemMapReader,

20 FDCompressedSha1Writer,

21 FDStream,

22 Sha1Writer

23)

25from gitdb.base import (

26 OStream,

27 OInfo

28)

30from gitdb.util import (

31 file_contents_ro_filepath,

32 ENOENT,

33 hex_to_bin,

34 bin_to_hex,

35 exists,

36 chmod,

37 isfile,

38 remove,

39 rename,

40 dirname,

41 basename,

42 join

43)

45from gitdb.fun import (

46 chunk_size,

47 loose_object_header_info,

48 write_object,

49 stream_copy

50)

52from gitdb.utils.encoding import force_bytes

54import tempfile

55import os

56import sys

57import time

60__all__ = ('LooseObjectDB', )

63class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):

65 """A database which operates on loose object files"""

67 # CONFIGURATION

68 # chunks in which data will be copied between streams

69 stream_chunk_size = chunk_size

71 # On windows we need to keep it writable, otherwise it cannot be removed

72 # either

73 new_objects_mode = int("444", 8)

74 if os.name == 'nt':

75 new_objects_mode = int("644", 8)

77 def __init__(self, root_path):

78 super().__init__(root_path)

79 self._hexsha_to_file = dict()

80 # Additional Flags - might be set to 0 after the first failure

81 # Depending on the root, this might work for some mounts, for others not, which

82 # is why it is per instance

83 self._fd_open_flags = getattr(os, 'O_NOATIME', 0)

85 #{ Interface

86 def object_path(self, hexsha):

87 """

88 :return: path at which the object with the given hexsha would be stored,

89 relative to the database root"""

90 return join(hexsha[:2], hexsha[2:])

92 def readable_db_object_path(self, hexsha):

93 """

94 :return: readable object path to the object identified by hexsha

95 :raise BadObject: If the object file does not exist"""

96 with suppress(KeyError):

97 return self._hexsha_to_file[hexsha]

98 # END ignore cache misses

100 # try filesystem

101 path = self.db_path(self.object_path(hexsha))

102 if exists(path):

103 self._hexsha_to_file[hexsha] = path

104 return path

105 # END handle cache

106 raise BadObject(hexsha)

107

108 def partial_to_complete_sha_hex(self, partial_hexsha):

109 """:return: 20 byte binary sha1 string which matches the given name uniquely

110 :param name: hexadecimal partial name (bytes or ascii string)

111 :raise AmbiguousObjectName:

112 :raise BadObject: """

113 candidate = None

114 for binsha in self.sha_iter():

115 if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)):

116 # it can't ever find the same object twice

117 if candidate is not None:

118 raise AmbiguousObjectName(partial_hexsha)

119 candidate = binsha

120 # END for each object

121 if candidate is None:

122 raise BadObject(partial_hexsha)

123 return candidate

124

125 #} END interface

126

127 def _map_loose_object(self, sha):

128 """

129 :return: memory map of that file to allow random read access

130 :raise BadObject: if object could not be located"""

131 db_path = self.db_path(self.object_path(bin_to_hex(sha)))

132 try:

133 return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)

134 except OSError as e:

135 if e.errno != ENOENT:

136 # try again without noatime

137 try:

138 return file_contents_ro_filepath(db_path)

139 except OSError as new_e:

140 raise BadObject(sha) from new_e

141 # didn't work because of our flag, don't try it again

142 self._fd_open_flags = 0

143 else:

144 raise BadObject(sha) from e

145 # END handle error

146 # END exception handling

147

148 def set_ostream(self, stream):

149 """:raise TypeError: if the stream does not support the Sha1Writer interface"""

150 if stream is not None and not isinstance(stream, Sha1Writer):

151 raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)

152 return super().set_ostream(stream)

153

154 def info(self, sha):

155 m = self._map_loose_object(sha)

156 try:

157 typ, size = loose_object_header_info(m)

158 return OInfo(sha, typ, size)

159 finally:

160 if hasattr(m, 'close'):

161 m.close()

162 # END assure release of system resources

163

164 def stream(self, sha):

165 m = self._map_loose_object(sha)

166 type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True)

167 return OStream(sha, type, size, stream)

168

169 def has_object(self, sha):

170 try:

171 self.readable_db_object_path(bin_to_hex(sha))

172 return True

173 except BadObject:

174 return False

175 # END check existence

176

177 def store(self, istream):

178 """note: The sha we produce will be hex by nature"""

179 tmp_path = None

180 writer = self.ostream()

181 if writer is None:

182 # open a tmp file to write the data to

183 fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)

184

185 if istream.binsha is None:

186 writer = FDCompressedSha1Writer(fd)

187 else:

188 writer = FDStream(fd)

189 # END handle direct stream copies

190 # END handle custom writer

191

192 try:

193 try:

194 if istream.binsha is not None:

195 # copy as much as possible, the actual uncompressed item size might

196 # be smaller than the compressed version

197 stream_copy(istream.read, writer.write, sys.maxsize, self.stream_chunk_size)

198 else:

199 # write object with header, we have to make a new one

200 write_object(istream.type, istream.size, istream.read, writer.write,

201 chunk_size=self.stream_chunk_size)

202 # END handle direct stream copies

203 finally:

204 if tmp_path:

205 writer.close()

206 # END assure target stream is closed

207 except:

208 if tmp_path:

209 remove(tmp_path)

210 raise

211 # END assure tmpfile removal on error

212

213 hexsha = None

214 if istream.binsha:

215 hexsha = istream.hexsha

216 else:

217 hexsha = writer.sha(as_hex=True)

218 # END handle sha

219

220 if tmp_path:

221 obj_path = self.db_path(self.object_path(hexsha))

222 obj_dir = dirname(obj_path)

223 os.makedirs(obj_dir, exist_ok=True)

224 # END handle destination directory

225 # rename onto existing doesn't work on NTFS

226 if isfile(obj_path):

227 remove(tmp_path)

228 else:

229 rename(tmp_path, obj_path)

230 # end rename only if needed

231

232 # Ensure rename is actually done and file is stable

233 # Retry up to 14 times - exponential wait & retry in ms.

234 # The total maximum wait time is 1000ms, which should be vastly enough for the

235 # OS to return and commit the file to disk.

236 for exp_backoff_ms in [1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 181]:

237 with suppress(PermissionError):

238 # make sure its readable for all ! It started out as rw-- tmp file

239 # but needs to be rwrr

240 chmod(obj_path, self.new_objects_mode)

241 break

242 time.sleep(exp_backoff_ms / 1000.0)

243 else:

244 raise PermissionError(

245 "Impossible to apply `chmod` to file {}".format(obj_path)

246 )

247

248 # END handle dry_run

249

250 istream.binsha = hex_to_bin(hexsha)

251 return istream

252

253 def sha_iter(self):

254 # find all files which look like an object, extract sha from there

255 for root, dirs, files in os.walk(self.root_path()):

256 root_base = basename(root)

257 if len(root_base) != 2:

258 continue

259

260 for f in files:

261 if len(f) != 38:

262 continue

263 yield hex_to_bin(root_base + f)

264 # END for each file

265 # END for each walk iteration

266

267 def size(self):

268 return len(tuple(self.sha_iter()))