Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dask/hashing.py: 61%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

46 statements  

1from __future__ import annotations 

2 

3import binascii 

4import hashlib 

5 

6hashers = [] # In decreasing performance order 

7 

8 

9# Timings on a largish array: 

10# - CityHash is 2x faster than MurmurHash 

11# - xxHash is slightly slower than CityHash 

12# - MurmurHash is 8x faster than SHA1 

13# - SHA1 is significantly faster than all other hashlib algorithms 

14 

15try: 

16 import cityhash # `python -m pip install cityhash` 

17except ImportError: 

18 pass 

19else: 

20 # CityHash disabled unless the reference leak in 

21 # https://github.com/escherba/python-cityhash/pull/16 

22 # is fixed. 

23 if cityhash.__version__ >= "0.2.2": 

24 

25 def _hash_cityhash(buf): 

26 """ 

27 Produce a 16-bytes hash of *buf* using CityHash. 

28 """ 

29 h = cityhash.CityHash128(buf) 

30 return h.to_bytes(16, "little") 

31 

32 hashers.append(_hash_cityhash) 

33 

34try: 

35 import xxhash # `python -m pip install xxhash` 

36except ImportError: 

37 pass 

38else: 

39 

40 def _hash_xxhash(buf): 

41 """ 

42 Produce a 8-bytes hash of *buf* using xxHash. 

43 """ 

44 return xxhash.xxh64(buf).digest() 

45 

46 hashers.append(_hash_xxhash) 

47 

48try: 

49 import mmh3 # `python -m pip install mmh3` 

50except ImportError: 

51 pass 

52else: 

53 

54 def _hash_murmurhash(buf): 

55 """ 

56 Produce a 16-bytes hash of *buf* using MurmurHash. 

57 """ 

58 return mmh3.hash_bytes(buf) 

59 

60 hashers.append(_hash_murmurhash) 

61 

62 

63def _hash_sha1(buf): 

64 """ 

65 Produce a 20-bytes hash of *buf* using SHA1. 

66 """ 

67 return hashlib.sha1(buf).digest() 

68 

69 

70hashers.append(_hash_sha1) 

71 

72 

73def hash_buffer(buf, hasher=None): 

74 """ 

75 Hash a bytes-like (buffer-compatible) object. This function returns 

76 a good quality hash but is not cryptographically secure. The fastest 

77 available algorithm is selected. A fixed-length bytes object is returned. 

78 """ 

79 if hasher is not None: 

80 try: 

81 return hasher(buf) 

82 except (TypeError, OverflowError): 

83 # Some hash libraries may have overly-strict type checking, 

84 # not accepting all buffers 

85 pass 

86 for hasher in hashers: 

87 try: 

88 return hasher(buf) 

89 except (TypeError, OverflowError): 

90 pass 

91 raise TypeError(f"unsupported type for hashing: {type(buf)}") 

92 

93 

94def hash_buffer_hex(buf, hasher=None): 

95 """ 

96 Same as hash_buffer, but returns its result in hex-encoded form. 

97 """ 

98 h = hash_buffer(buf, hasher) 

99 s = binascii.b2a_hex(h) 

100 return s.decode()