Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/fastavro/io/binary_decoder.py: 35%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

60 statements  

1from struct import unpack 

2 

3 

4class BinaryDecoder: 

5 """Decoder for the avro binary format. 

6 

7 NOTE: All attributes and methods on this class should be considered 

8 private. 

9 

10 Parameters 

11 ---------- 

12 fo: file-like 

13 Input stream 

14 

15 """ 

16 

17 def __init__(self, fo): 

18 self.fo = fo 

19 

20 def read_null(self): 

21 """null is written as zero bytes.""" 

22 return None 

23 

24 def read_boolean(self): 

25 """A boolean is written as a single byte whose value is either 0 

26 (false) or 1 (true). 

27 """ 

28 

29 # technically 0x01 == true and 0x00 == false, but many languages will 

30 # cast anything other than 0 to True and only 0 to False 

31 return unpack("B", self.fo.read(1))[0] != 0 

32 

33 def read_long(self): 

34 """int and long values are written using variable-length, zig-zag 

35 coding.""" 

36 c = self.fo.read(1) 

37 

38 # We do EOF checking only here, since most reader start here 

39 if not c: 

40 raise EOFError 

41 

42 b = ord(c) 

43 n = b & 0x7F 

44 shift = 7 

45 

46 while (b & 0x80) != 0: 

47 b = ord(self.fo.read(1)) 

48 n |= (b & 0x7F) << shift 

49 shift += 7 

50 

51 return (n >> 1) ^ -(n & 1) 

52 

53 read_int = read_long 

54 

55 def read_float(self): 

56 """A float is written as 4 bytes. 

57 

58 The float is converted into a 32-bit integer using a method equivalent 

59 to Java's floatToIntBits and then encoded in little-endian format. 

60 """ 

61 return unpack("<f", self.fo.read(4))[0] 

62 

63 def read_double(self): 

64 """A double is written as 8 bytes. 

65 

66 The double is converted into a 64-bit integer using a method equivalent 

67 to Java's doubleToLongBits and then encoded in little-endian format. 

68 """ 

69 return unpack("<d", self.fo.read(8))[0] 

70 

71 def read_bytes(self): 

72 """Bytes are encoded as a long followed by that many bytes of data.""" 

73 size = self.read_long() 

74 out = self.fo.read(size) 

75 if len(out) != size: 

76 raise EOFError(f"Expected {size} bytes, read {len(out)}") 

77 return out 

78 

79 def read_utf8(self, handle_unicode_errors="strict"): 

80 """A string is encoded as a long followed by that many bytes of UTF-8 

81 encoded character data. 

82 """ 

83 return self.read_bytes().decode(errors=handle_unicode_errors) 

84 

85 def read_fixed(self, size): 

86 """Fixed instances are encoded using the number of bytes declared in the 

87 schema.""" 

88 out = self.fo.read(size) 

89 if len(out) < size: 

90 raise EOFError(f"Expected {size} bytes, read {len(out)}") 

91 return out 

92 

93 def read_enum(self): 

94 """An enum is encoded by a int, representing the zero-based position of the 

95 symbol in the schema. 

96 """ 

97 return self.read_long() 

98 

99 def read_array_start(self): 

100 """Arrays are encoded as a series of blocks.""" 

101 self._block_count = self.read_long() 

102 

103 def read_array_end(self): 

104 pass 

105 

106 def _iter_array_or_map(self): 

107 """Each block consists of a long count value, followed by that many 

108 array items. A block with count zero indicates the end of the array. 

109 Each item is encoded per the array's item schema. 

110 

111 If a block's count is negative, then the count is followed immediately 

112 by a long block size, indicating the number of bytes in the block. 

113 The actual count in this case is the absolute value of the count 

114 written. 

115 """ 

116 while self._block_count != 0: 

117 if self._block_count < 0: 

118 self._block_count = -self._block_count 

119 # Read block size, unused 

120 self.read_long() 

121 

122 for i in range(self._block_count): 

123 yield 

124 self._block_count = self.read_long() 

125 

126 iter_array = _iter_array_or_map 

127 iter_map = _iter_array_or_map 

128 

129 def read_map_start(self): 

130 """Maps are encoded as a series of blocks.""" 

131 self._block_count = self.read_long() 

132 

133 def read_map_end(self): 

134 pass 

135 

136 def read_index(self): 

137 """A union is encoded by first writing a long value indicating the 

138 zero-based position within the union of the schema of its value. 

139 

140 The value is then encoded per the indicated schema within the union. 

141 """ 

142 return self.read_long()