Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/gitdb/util.py: 57%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

169 statements  

1# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors 

2# 

3# This module is part of GitDB and is released under 

4# the New BSD License: https://opensource.org/license/bsd-3-clause/ 

5import binascii 

6import os 

7import mmap 

8import sys 

9import time 

10import errno 

11 

12from io import BytesIO 

13 

14from smmap import ( 

15 StaticWindowMapManager, 

16 SlidingWindowMapManager, 

17 SlidingWindowMapBuffer 

18) 

19 

20# initialize our global memory manager instance 

21# Use it to free cached (and unused) resources. 

22mman = SlidingWindowMapManager() 

23# END handle mman 

24 

25import hashlib 

26 

27try: 

28 from struct import unpack_from 

29except ImportError: 

30 from struct import unpack, calcsize 

31 __calcsize_cache = dict() 

32 

33 def unpack_from(fmt, data, offset=0): 

34 try: 

35 size = __calcsize_cache[fmt] 

36 except KeyError: 

37 size = calcsize(fmt) 

38 __calcsize_cache[fmt] = size 

39 # END exception handling 

40 return unpack(fmt, data[offset: offset + size]) 

41 # END own unpack_from implementation 

42 

43 

44#{ Aliases 

45 

46hex_to_bin = binascii.a2b_hex 

47bin_to_hex = binascii.b2a_hex 

48 

49# errors 

50ENOENT = errno.ENOENT 

51 

52# os shortcuts 

53exists = os.path.exists 

54mkdir = os.mkdir 

55chmod = os.chmod 

56isdir = os.path.isdir 

57isfile = os.path.isfile 

58rename = os.rename 

59dirname = os.path.dirname 

60basename = os.path.basename 

61join = os.path.join 

62read = os.read 

63write = os.write 

64close = os.close 

65fsync = os.fsync 

66 

67 

68def _retry(func, *args, **kwargs): 

69 # Wrapper around functions, that are problematic on "Windows". Sometimes 

70 # the OS or someone else has still a handle to the file 

71 if sys.platform == "win32": 

72 for _ in range(10): 

73 try: 

74 return func(*args, **kwargs) 

75 except Exception: 

76 time.sleep(0.1) 

77 return func(*args, **kwargs) 

78 else: 

79 return func(*args, **kwargs) 

80 

81 

82def remove(*args, **kwargs): 

83 return _retry(os.remove, *args, **kwargs) 

84 

85 

86# Backwards compatibility imports 

87from gitdb.const import ( 

88 NULL_BIN_SHA, 

89 NULL_HEX_SHA 

90) 

91 

92#} END Aliases 

93 

94#{ compatibility stuff ... 

95 

96 

97class _RandomAccessBytesIO: 

98 

99 """Wrapper to provide required functionality in case memory maps cannot or may 

100 not be used. This is only really required in python 2.4""" 

101 __slots__ = '_sio' 

102 

103 def __init__(self, buf=''): 

104 self._sio = BytesIO(buf) 

105 

106 def __getattr__(self, attr): 

107 return getattr(self._sio, attr) 

108 

109 def __len__(self): 

110 return len(self.getvalue()) 

111 

112 def __getitem__(self, i): 

113 return self.getvalue()[i] 

114 

115 def __getslice__(self, start, end): 

116 return self.getvalue()[start:end] 

117 

118 

119def byte_ord(b): 

120 """ 

121 Return the integer representation of the byte string. This supports Python 

122 3 byte arrays as well as standard strings. 

123 """ 

124 try: 

125 return ord(b) 

126 except TypeError: 

127 return b 

128 

129#} END compatibility stuff ... 

130 

131#{ Routines 

132 

133 

134def make_sha(source=b''): 

135 """A python2.4 workaround for the sha/hashlib module fiasco 

136 

137 **Note** From the dulwich project """ 

138 try: 

139 return hashlib.sha1(source) 

140 except NameError: 

141 import sha 

142 sha1 = sha.sha(source) 

143 return sha1 

144 

145 

146def allocate_memory(size): 

147 """:return: a file-protocol accessible memory block of the given size""" 

148 if size == 0: 

149 return _RandomAccessBytesIO(b'') 

150 # END handle empty chunks gracefully 

151 

152 try: 

153 return mmap.mmap(-1, size) # read-write by default 

154 except OSError: 

155 # setup real memory instead 

156 # this of course may fail if the amount of memory is not available in 

157 # one chunk - would only be the case in python 2.4, being more likely on 

158 # 32 bit systems. 

159 return _RandomAccessBytesIO(b"\0" * size) 

160 # END handle memory allocation 

161 

162 

163def file_contents_ro(fd, stream=False, allow_mmap=True): 

164 """:return: read-only contents of the file represented by the file descriptor fd 

165 

166 :param fd: file descriptor opened for reading 

167 :param stream: if False, random access is provided, otherwise the stream interface 

168 is provided. 

169 :param allow_mmap: if True, its allowed to map the contents into memory, which 

170 allows large files to be handled and accessed efficiently. The file-descriptor 

171 will change its position if this is False""" 

172 try: 

173 if allow_mmap: 

174 # supports stream and random access 

175 try: 

176 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) 

177 except OSError: 

178 # python 2.4 issue, 0 wants to be the actual size 

179 return mmap.mmap(fd, os.fstat(fd).st_size, access=mmap.ACCESS_READ) 

180 # END handle python 2.4 

181 except OSError: 

182 pass 

183 # END exception handling 

184 

185 # read manually 

186 contents = os.read(fd, os.fstat(fd).st_size) 

187 if stream: 

188 return _RandomAccessBytesIO(contents) 

189 return contents 

190 

191 

192def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0): 

193 """Get the file contents at filepath as fast as possible 

194 

195 :return: random access compatible memory of the given filepath 

196 :param stream: see ``file_contents_ro`` 

197 :param allow_mmap: see ``file_contents_ro`` 

198 :param flags: additional flags to pass to os.open 

199 :raise OSError: If the file could not be opened 

200 

201 **Note** for now we don't try to use O_NOATIME directly as the right value needs to be 

202 shared per database in fact. It only makes a real difference for loose object 

203 databases anyway, and they use it with the help of the ``flags`` parameter""" 

204 fd = os.open(filepath, os.O_RDONLY | getattr(os, 'O_BINARY', 0) | flags) 

205 try: 

206 return file_contents_ro(fd, stream, allow_mmap) 

207 finally: 

208 close(fd) 

209 # END assure file is closed 

210 

211 

212def sliding_ro_buffer(filepath, flags=0): 

213 """ 

214 :return: a buffer compatible object which uses our mapped memory manager internally 

215 ready to read the whole given filepath""" 

216 return SlidingWindowMapBuffer(mman.make_cursor(filepath), flags=flags) 

217 

218 

219def to_hex_sha(sha): 

220 """:return: hexified version of sha""" 

221 if len(sha) == 40: 

222 return sha 

223 return bin_to_hex(sha) 

224 

225 

226def to_bin_sha(sha): 

227 if len(sha) == 20: 

228 return sha 

229 return hex_to_bin(sha) 

230 

231 

232#} END routines 

233 

234 

235#{ Utilities 

236 

237class LazyMixin: 

238 

239 """ 

240 Base class providing an interface to lazily retrieve attribute values upon 

241 first access. If slots are used, memory will only be reserved once the attribute 

242 is actually accessed and retrieved the first time. All future accesses will 

243 return the cached value as stored in the Instance's dict or slot. 

244 """ 

245 

246 __slots__ = tuple() 

247 

248 def __getattr__(self, attr): 

249 """ 

250 Whenever an attribute is requested that we do not know, we allow it 

251 to be created and set. Next time the same attribute is requested, it is simply 

252 returned from our dict/slots. """ 

253 self._set_cache_(attr) 

254 # will raise in case the cache was not created 

255 return object.__getattribute__(self, attr) 

256 

257 def _set_cache_(self, attr): 

258 """ 

259 This method should be overridden in the derived class. 

260 It should check whether the attribute named by attr can be created 

261 and cached. Do nothing if you do not know the attribute or call your subclass 

262 

263 The derived class may create as many additional attributes as it deems 

264 necessary in case a git command returns more information than represented 

265 in the single attribute.""" 

266 pass 

267 

268 

269class LockedFD: 

270 

271 """ 

272 This class facilitates a safe read and write operation to a file on disk. 

273 If we write to 'file', we obtain a lock file at 'file.lock' and write to 

274 that instead. If we succeed, the lock file will be renamed to overwrite 

275 the original file. 

276 

277 When reading, we obtain a lock file, but to prevent other writers from 

278 succeeding while we are reading the file. 

279 

280 This type handles error correctly in that it will assure a consistent state 

281 on destruction. 

282 

283 **note** with this setup, parallel reading is not possible""" 

284 __slots__ = ("_filepath", '_fd', '_write') 

285 

286 def __init__(self, filepath): 

287 """Initialize an instance with the givne filepath""" 

288 self._filepath = filepath 

289 self._fd = None 

290 self._write = None # if True, we write a file 

291 

292 def __del__(self): 

293 # will do nothing if the file descriptor is already closed 

294 if self._fd is not None: 

295 self.rollback() 

296 

297 def _lockfilepath(self): 

298 return "%s.lock" % self._filepath 

299 

300 def open(self, write=False, stream=False): 

301 """ 

302 Open the file descriptor for reading or writing, both in binary mode. 

303 

304 :param write: if True, the file descriptor will be opened for writing. Other 

305 wise it will be opened read-only. 

306 :param stream: if True, the file descriptor will be wrapped into a simple stream 

307 object which supports only reading or writing 

308 :return: fd to read from or write to. It is still maintained by this instance 

309 and must not be closed directly 

310 :raise IOError: if the lock could not be retrieved 

311 :raise OSError: If the actual file could not be opened for reading 

312 

313 **note** must only be called once""" 

314 if self._write is not None: 

315 raise AssertionError("Called %s multiple times" % self.open) 

316 

317 self._write = write 

318 

319 # try to open the lock file 

320 binary = getattr(os, 'O_BINARY', 0) 

321 lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary 

322 try: 

323 fd = os.open(self._lockfilepath(), lockmode, int("600", 8)) 

324 if not write: 

325 os.close(fd) 

326 else: 

327 self._fd = fd 

328 # END handle file descriptor 

329 except OSError as e: 

330 raise OSError("Lock at %r could not be obtained" % self._lockfilepath()) from e 

331 # END handle lock retrieval 

332 

333 # open actual file if required 

334 if self._fd is None: 

335 # we could specify exclusive here, as we obtained the lock anyway 

336 try: 

337 self._fd = os.open(self._filepath, os.O_RDONLY | binary) 

338 except: 

339 # assure we release our lockfile 

340 remove(self._lockfilepath()) 

341 raise 

342 # END handle lockfile 

343 # END open descriptor for reading 

344 

345 if stream: 

346 # need delayed import 

347 from gitdb.stream import FDStream 

348 return FDStream(self._fd) 

349 else: 

350 return self._fd 

351 # END handle stream 

352 

353 def commit(self): 

354 """When done writing, call this function to commit your changes into the 

355 actual file. 

356 The file descriptor will be closed, and the lockfile handled. 

357 

358 **Note** can be called multiple times""" 

359 self._end_writing(successful=True) 

360 

361 def rollback(self): 

362 """Abort your operation without any changes. The file descriptor will be 

363 closed, and the lock released. 

364 

365 **Note** can be called multiple times""" 

366 self._end_writing(successful=False) 

367 

368 def _end_writing(self, successful=True): 

369 """Handle the lock according to the write mode """ 

370 if self._write is None: 

371 raise AssertionError("Cannot end operation if it wasn't started yet") 

372 

373 if self._fd is None: 

374 return 

375 

376 os.close(self._fd) 

377 self._fd = None 

378 

379 lockfile = self._lockfilepath() 

380 if self._write and successful: 

381 # on windows, rename does not silently overwrite the existing one 

382 if sys.platform == "win32": 

383 if isfile(self._filepath): 

384 remove(self._filepath) 

385 # END remove if exists 

386 # END win32 special handling 

387 os.rename(lockfile, self._filepath) 

388 

389 # assure others can at least read the file - the tmpfile left it at rw-- 

390 # We may also write that file, on windows that boils down to a remove- 

391 # protection as well 

392 chmod(self._filepath, int("644", 8)) 

393 else: 

394 # just delete the file so far, we failed 

395 remove(lockfile) 

396 # END successful handling 

397 

398#} END utilities