Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/gitdb/util.py: 57%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
2#
3# This module is part of GitDB and is released under
4# the New BSD License: https://opensource.org/license/bsd-3-clause/
5import binascii
6import os
7import mmap
8import sys
9import time
10import errno
12from io import BytesIO
14from smmap import (
15 StaticWindowMapManager,
16 SlidingWindowMapManager,
17 SlidingWindowMapBuffer
18)
20# initialize our global memory manager instance
21# Use it to free cached (and unused) resources.
22mman = SlidingWindowMapManager()
23# END handle mman
25import hashlib
27try:
28 from struct import unpack_from
29except ImportError:
30 from struct import unpack, calcsize
31 __calcsize_cache = dict()
33 def unpack_from(fmt, data, offset=0):
34 try:
35 size = __calcsize_cache[fmt]
36 except KeyError:
37 size = calcsize(fmt)
38 __calcsize_cache[fmt] = size
39 # END exception handling
40 return unpack(fmt, data[offset: offset + size])
41 # END own unpack_from implementation
44#{ Aliases
46hex_to_bin = binascii.a2b_hex
47bin_to_hex = binascii.b2a_hex
49# errors
50ENOENT = errno.ENOENT
52# os shortcuts
53exists = os.path.exists
54mkdir = os.mkdir
55chmod = os.chmod
56isdir = os.path.isdir
57isfile = os.path.isfile
58rename = os.rename
59dirname = os.path.dirname
60basename = os.path.basename
61join = os.path.join
62read = os.read
63write = os.write
64close = os.close
65fsync = os.fsync
68def _retry(func, *args, **kwargs):
69 # Wrapper around functions, that are problematic on "Windows". Sometimes
70 # the OS or someone else has still a handle to the file
71 if sys.platform == "win32":
72 for _ in range(10):
73 try:
74 return func(*args, **kwargs)
75 except Exception:
76 time.sleep(0.1)
77 return func(*args, **kwargs)
78 else:
79 return func(*args, **kwargs)
82def remove(*args, **kwargs):
83 return _retry(os.remove, *args, **kwargs)
86# Backwards compatibility imports
87from gitdb.const import (
88 NULL_BIN_SHA,
89 NULL_HEX_SHA
90)
92#} END Aliases
94#{ compatibility stuff ...
97class _RandomAccessBytesIO:
99 """Wrapper to provide required functionality in case memory maps cannot or may
100 not be used. This is only really required in python 2.4"""
101 __slots__ = '_sio'
103 def __init__(self, buf=''):
104 self._sio = BytesIO(buf)
106 def __getattr__(self, attr):
107 return getattr(self._sio, attr)
109 def __len__(self):
110 return len(self.getvalue())
112 def __getitem__(self, i):
113 return self.getvalue()[i]
115 def __getslice__(self, start, end):
116 return self.getvalue()[start:end]
119def byte_ord(b):
120 """
121 Return the integer representation of the byte string. This supports Python
122 3 byte arrays as well as standard strings.
123 """
124 try:
125 return ord(b)
126 except TypeError:
127 return b
129#} END compatibility stuff ...
131#{ Routines
134def make_sha(source=b''):
135 """A python2.4 workaround for the sha/hashlib module fiasco
137 **Note** From the dulwich project """
138 try:
139 return hashlib.sha1(source)
140 except NameError:
141 import sha
142 sha1 = sha.sha(source)
143 return sha1
146def allocate_memory(size):
147 """:return: a file-protocol accessible memory block of the given size"""
148 if size == 0:
149 return _RandomAccessBytesIO(b'')
150 # END handle empty chunks gracefully
152 try:
153 return mmap.mmap(-1, size) # read-write by default
154 except OSError:
155 # setup real memory instead
156 # this of course may fail if the amount of memory is not available in
157 # one chunk - would only be the case in python 2.4, being more likely on
158 # 32 bit systems.
159 return _RandomAccessBytesIO(b"\0" * size)
160 # END handle memory allocation
163def file_contents_ro(fd, stream=False, allow_mmap=True):
164 """:return: read-only contents of the file represented by the file descriptor fd
166 :param fd: file descriptor opened for reading
167 :param stream: if False, random access is provided, otherwise the stream interface
168 is provided.
169 :param allow_mmap: if True, its allowed to map the contents into memory, which
170 allows large files to be handled and accessed efficiently. The file-descriptor
171 will change its position if this is False"""
172 try:
173 if allow_mmap:
174 # supports stream and random access
175 try:
176 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
177 except OSError:
178 # python 2.4 issue, 0 wants to be the actual size
179 return mmap.mmap(fd, os.fstat(fd).st_size, access=mmap.ACCESS_READ)
180 # END handle python 2.4
181 except OSError:
182 pass
183 # END exception handling
185 # read manually
186 contents = os.read(fd, os.fstat(fd).st_size)
187 if stream:
188 return _RandomAccessBytesIO(contents)
189 return contents
192def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0):
193 """Get the file contents at filepath as fast as possible
195 :return: random access compatible memory of the given filepath
196 :param stream: see ``file_contents_ro``
197 :param allow_mmap: see ``file_contents_ro``
198 :param flags: additional flags to pass to os.open
199 :raise OSError: If the file could not be opened
201 **Note** for now we don't try to use O_NOATIME directly as the right value needs to be
202 shared per database in fact. It only makes a real difference for loose object
203 databases anyway, and they use it with the help of the ``flags`` parameter"""
204 fd = os.open(filepath, os.O_RDONLY | getattr(os, 'O_BINARY', 0) | flags)
205 try:
206 return file_contents_ro(fd, stream, allow_mmap)
207 finally:
208 close(fd)
209 # END assure file is closed
212def sliding_ro_buffer(filepath, flags=0):
213 """
214 :return: a buffer compatible object which uses our mapped memory manager internally
215 ready to read the whole given filepath"""
216 return SlidingWindowMapBuffer(mman.make_cursor(filepath), flags=flags)
219def to_hex_sha(sha):
220 """:return: hexified version of sha"""
221 if len(sha) == 40:
222 return sha
223 return bin_to_hex(sha)
226def to_bin_sha(sha):
227 if len(sha) == 20:
228 return sha
229 return hex_to_bin(sha)
232#} END routines
235#{ Utilities
237class LazyMixin:
239 """
240 Base class providing an interface to lazily retrieve attribute values upon
241 first access. If slots are used, memory will only be reserved once the attribute
242 is actually accessed and retrieved the first time. All future accesses will
243 return the cached value as stored in the Instance's dict or slot.
244 """
246 __slots__ = tuple()
248 def __getattr__(self, attr):
249 """
250 Whenever an attribute is requested that we do not know, we allow it
251 to be created and set. Next time the same attribute is requested, it is simply
252 returned from our dict/slots. """
253 self._set_cache_(attr)
254 # will raise in case the cache was not created
255 return object.__getattribute__(self, attr)
257 def _set_cache_(self, attr):
258 """
259 This method should be overridden in the derived class.
260 It should check whether the attribute named by attr can be created
261 and cached. Do nothing if you do not know the attribute or call your subclass
263 The derived class may create as many additional attributes as it deems
264 necessary in case a git command returns more information than represented
265 in the single attribute."""
266 pass
269class LockedFD:
271 """
272 This class facilitates a safe read and write operation to a file on disk.
273 If we write to 'file', we obtain a lock file at 'file.lock' and write to
274 that instead. If we succeed, the lock file will be renamed to overwrite
275 the original file.
277 When reading, we obtain a lock file, but to prevent other writers from
278 succeeding while we are reading the file.
280 This type handles error correctly in that it will assure a consistent state
281 on destruction.
283 **note** with this setup, parallel reading is not possible"""
284 __slots__ = ("_filepath", '_fd', '_write')
286 def __init__(self, filepath):
287 """Initialize an instance with the givne filepath"""
288 self._filepath = filepath
289 self._fd = None
290 self._write = None # if True, we write a file
292 def __del__(self):
293 # will do nothing if the file descriptor is already closed
294 if self._fd is not None:
295 self.rollback()
297 def _lockfilepath(self):
298 return "%s.lock" % self._filepath
300 def open(self, write=False, stream=False):
301 """
302 Open the file descriptor for reading or writing, both in binary mode.
304 :param write: if True, the file descriptor will be opened for writing. Other
305 wise it will be opened read-only.
306 :param stream: if True, the file descriptor will be wrapped into a simple stream
307 object which supports only reading or writing
308 :return: fd to read from or write to. It is still maintained by this instance
309 and must not be closed directly
310 :raise IOError: if the lock could not be retrieved
311 :raise OSError: If the actual file could not be opened for reading
313 **note** must only be called once"""
314 if self._write is not None:
315 raise AssertionError("Called %s multiple times" % self.open)
317 self._write = write
319 # try to open the lock file
320 binary = getattr(os, 'O_BINARY', 0)
321 lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary
322 try:
323 fd = os.open(self._lockfilepath(), lockmode, int("600", 8))
324 if not write:
325 os.close(fd)
326 else:
327 self._fd = fd
328 # END handle file descriptor
329 except OSError as e:
330 raise OSError("Lock at %r could not be obtained" % self._lockfilepath()) from e
331 # END handle lock retrieval
333 # open actual file if required
334 if self._fd is None:
335 # we could specify exclusive here, as we obtained the lock anyway
336 try:
337 self._fd = os.open(self._filepath, os.O_RDONLY | binary)
338 except:
339 # assure we release our lockfile
340 remove(self._lockfilepath())
341 raise
342 # END handle lockfile
343 # END open descriptor for reading
345 if stream:
346 # need delayed import
347 from gitdb.stream import FDStream
348 return FDStream(self._fd)
349 else:
350 return self._fd
351 # END handle stream
353 def commit(self):
354 """When done writing, call this function to commit your changes into the
355 actual file.
356 The file descriptor will be closed, and the lockfile handled.
358 **Note** can be called multiple times"""
359 self._end_writing(successful=True)
361 def rollback(self):
362 """Abort your operation without any changes. The file descriptor will be
363 closed, and the lock released.
365 **Note** can be called multiple times"""
366 self._end_writing(successful=False)
368 def _end_writing(self, successful=True):
369 """Handle the lock according to the write mode """
370 if self._write is None:
371 raise AssertionError("Cannot end operation if it wasn't started yet")
373 if self._fd is None:
374 return
376 os.close(self._fd)
377 self._fd = None
379 lockfile = self._lockfilepath()
380 if self._write and successful:
381 # on windows, rename does not silently overwrite the existing one
382 if sys.platform == "win32":
383 if isfile(self._filepath):
384 remove(self._filepath)
385 # END remove if exists
386 # END win32 special handling
387 os.rename(lockfile, self._filepath)
389 # assure others can at least read the file - the tmpfile left it at rw--
390 # We may also write that file, on windows that boils down to a remove-
391 # protection as well
392 chmod(self._filepath, int("644", 8))
393 else:
394 # just delete the file so far, we failed
395 remove(lockfile)
396 # END successful handling
398#} END utilities