Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/gitdb/db/loose.py: 60%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

124 statements  

1# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors 

2# 

3# This module is part of GitDB and is released under 

4# the New BSD License: https://opensource.org/license/bsd-3-clause/ 

5from contextlib import suppress 

6 

7from gitdb.db.base import ( 

8 FileDBBase, 

9 ObjectDBR, 

10 ObjectDBW 

11) 

12 

13from gitdb.exc import ( 

14 BadObject, 

15 AmbiguousObjectName 

16) 

17 

18from gitdb.stream import ( 

19 DecompressMemMapReader, 

20 FDCompressedSha1Writer, 

21 FDStream, 

22 Sha1Writer 

23) 

24 

25from gitdb.base import ( 

26 OStream, 

27 OInfo 

28) 

29 

30from gitdb.util import ( 

31 file_contents_ro_filepath, 

32 ENOENT, 

33 hex_to_bin, 

34 bin_to_hex, 

35 exists, 

36 chmod, 

37 isfile, 

38 remove, 

39 rename, 

40 dirname, 

41 basename, 

42 join 

43) 

44 

45from gitdb.fun import ( 

46 chunk_size, 

47 loose_object_header_info, 

48 write_object, 

49 stream_copy 

50) 

51 

52from gitdb.utils.encoding import force_bytes 

53 

54import tempfile 

55import os 

56import sys 

57import time 

58 

59 

60__all__ = ('LooseObjectDB', ) 

61 

62 

63class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): 

64 

65 """A database which operates on loose object files""" 

66 

67 # CONFIGURATION 

68 # chunks in which data will be copied between streams 

69 stream_chunk_size = chunk_size 

70 

71 # On windows we need to keep it writable, otherwise it cannot be removed 

72 # either 

73 new_objects_mode = int("444", 8) 

74 if os.name == 'nt': 

75 new_objects_mode = int("644", 8) 

76 

77 def __init__(self, root_path): 

78 super().__init__(root_path) 

79 self._hexsha_to_file = dict() 

80 # Additional Flags - might be set to 0 after the first failure 

81 # Depending on the root, this might work for some mounts, for others not, which 

82 # is why it is per instance 

83 self._fd_open_flags = getattr(os, 'O_NOATIME', 0) 

84 

85 #{ Interface 

86 def object_path(self, hexsha): 

87 """ 

88 :return: path at which the object with the given hexsha would be stored, 

89 relative to the database root""" 

90 return join(hexsha[:2], hexsha[2:]) 

91 

92 def readable_db_object_path(self, hexsha): 

93 """ 

94 :return: readable object path to the object identified by hexsha 

95 :raise BadObject: If the object file does not exist""" 

96 with suppress(KeyError): 

97 return self._hexsha_to_file[hexsha] 

98 # END ignore cache misses 

99 

100 # try filesystem 

101 path = self.db_path(self.object_path(hexsha)) 

102 if exists(path): 

103 self._hexsha_to_file[hexsha] = path 

104 return path 

105 # END handle cache 

106 raise BadObject(hexsha) 

107 

108 def partial_to_complete_sha_hex(self, partial_hexsha): 

109 """:return: 20 byte binary sha1 string which matches the given name uniquely 

110 :param name: hexadecimal partial name (bytes or ascii string) 

111 :raise AmbiguousObjectName: 

112 :raise BadObject: """ 

113 candidate = None 

114 for binsha in self.sha_iter(): 

115 if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)): 

116 # it can't ever find the same object twice 

117 if candidate is not None: 

118 raise AmbiguousObjectName(partial_hexsha) 

119 candidate = binsha 

120 # END for each object 

121 if candidate is None: 

122 raise BadObject(partial_hexsha) 

123 return candidate 

124 

125 #} END interface 

126 

127 def _map_loose_object(self, sha): 

128 """ 

129 :return: memory map of that file to allow random read access 

130 :raise BadObject: if object could not be located""" 

131 db_path = self.db_path(self.object_path(bin_to_hex(sha))) 

132 try: 

133 return file_contents_ro_filepath(db_path, flags=self._fd_open_flags) 

134 except OSError as e: 

135 if e.errno != ENOENT: 

136 # try again without noatime 

137 try: 

138 return file_contents_ro_filepath(db_path) 

139 except OSError as new_e: 

140 raise BadObject(sha) from new_e 

141 # didn't work because of our flag, don't try it again 

142 self._fd_open_flags = 0 

143 else: 

144 raise BadObject(sha) from e 

145 # END handle error 

146 # END exception handling 

147 

148 def set_ostream(self, stream): 

149 """:raise TypeError: if the stream does not support the Sha1Writer interface""" 

150 if stream is not None and not isinstance(stream, Sha1Writer): 

151 raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__) 

152 return super().set_ostream(stream) 

153 

154 def info(self, sha): 

155 m = self._map_loose_object(sha) 

156 try: 

157 typ, size = loose_object_header_info(m) 

158 return OInfo(sha, typ, size) 

159 finally: 

160 if hasattr(m, 'close'): 

161 m.close() 

162 # END assure release of system resources 

163 

164 def stream(self, sha): 

165 m = self._map_loose_object(sha) 

166 type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True) 

167 return OStream(sha, type, size, stream) 

168 

169 def has_object(self, sha): 

170 try: 

171 self.readable_db_object_path(bin_to_hex(sha)) 

172 return True 

173 except BadObject: 

174 return False 

175 # END check existence 

176 

177 def store(self, istream): 

178 """note: The sha we produce will be hex by nature""" 

179 tmp_path = None 

180 writer = self.ostream() 

181 if writer is None: 

182 # open a tmp file to write the data to 

183 fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) 

184 

185 if istream.binsha is None: 

186 writer = FDCompressedSha1Writer(fd) 

187 else: 

188 writer = FDStream(fd) 

189 # END handle direct stream copies 

190 # END handle custom writer 

191 

192 try: 

193 try: 

194 if istream.binsha is not None: 

195 # copy as much as possible, the actual uncompressed item size might 

196 # be smaller than the compressed version 

197 stream_copy(istream.read, writer.write, sys.maxsize, self.stream_chunk_size) 

198 else: 

199 # write object with header, we have to make a new one 

200 write_object(istream.type, istream.size, istream.read, writer.write, 

201 chunk_size=self.stream_chunk_size) 

202 # END handle direct stream copies 

203 finally: 

204 if tmp_path: 

205 writer.close() 

206 # END assure target stream is closed 

207 except: 

208 if tmp_path: 

209 remove(tmp_path) 

210 raise 

211 # END assure tmpfile removal on error 

212 

213 hexsha = None 

214 if istream.binsha: 

215 hexsha = istream.hexsha 

216 else: 

217 hexsha = writer.sha(as_hex=True) 

218 # END handle sha 

219 

220 if tmp_path: 

221 obj_path = self.db_path(self.object_path(hexsha)) 

222 obj_dir = dirname(obj_path) 

223 os.makedirs(obj_dir, exist_ok=True) 

224 # END handle destination directory 

225 # rename onto existing doesn't work on NTFS 

226 if isfile(obj_path): 

227 remove(tmp_path) 

228 else: 

229 rename(tmp_path, obj_path) 

230 # end rename only if needed 

231 

232 # Ensure rename is actually done and file is stable 

233 # Retry up to 14 times - exponential wait & retry in ms. 

234 # The total maximum wait time is 1000ms, which should be vastly enough for the 

235 # OS to return and commit the file to disk. 

236 for exp_backoff_ms in [1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 181]: 

237 with suppress(PermissionError): 

238 # make sure its readable for all ! It started out as rw-- tmp file 

239 # but needs to be rwrr 

240 chmod(obj_path, self.new_objects_mode) 

241 break 

242 time.sleep(exp_backoff_ms / 1000.0) 

243 else: 

244 raise PermissionError( 

245 "Impossible to apply `chmod` to file {}".format(obj_path) 

246 ) 

247 

248 # END handle dry_run 

249 

250 istream.binsha = hex_to_bin(hexsha) 

251 return istream 

252 

253 def sha_iter(self): 

254 # find all files which look like an object, extract sha from there 

255 for root, dirs, files in os.walk(self.root_path()): 

256 root_base = basename(root) 

257 if len(root_base) != 2: 

258 continue 

259 

260 for f in files: 

261 if len(f) != 38: 

262 continue 

263 yield hex_to_bin(root_base + f) 

264 # END for each file 

265 # END for each walk iteration 

266 

267 def size(self): 

268 return len(tuple(self.sha_iter()))