1import hashlib
2
3
4PRIMITIVES = {
5 "boolean",
6 "bytes",
7 "double",
8 "float",
9 "int",
10 "long",
11 "null",
12 "string",
13}
14
15RESERVED_PROPERTIES = {
16 "type",
17 "name",
18 "namespace",
19 "fields", # Record
20 "items", # Array
21 "size", # Fixed
22 "symbols", # Enum
23 "values", # Map
24 "doc",
25}
26
27OPTIONAL_FIELD_PROPERTIES = {
28 "doc",
29 "aliases",
30 "default",
31}
32
33RESERVED_FIELD_PROPERTIES = {"type", "name"} | OPTIONAL_FIELD_PROPERTIES
34
35RABIN_64 = "CRC-64-AVRO"
36JAVA_FINGERPRINT_MAPPING = {"SHA-256": "sha256", "MD5": "md5"}
37FINGERPRINT_ALGORITHMS = (
38 hashlib.algorithms_guaranteed | JAVA_FINGERPRINT_MAPPING.keys() | {RABIN_64}
39)
40
41
42class UnknownType(ValueError):
43 def __init__(self, name):
44 super().__init__(name)
45 self.name = name
46
47
48class SchemaParseException(Exception):
49 pass
50
51
52def rabin_fingerprint(data):
53 empty_64 = 0xC15D213AA4D7A795
54
55 fp_table = []
56 for i in range(256):
57 fp = i
58 for j in range(8):
59 mask = -(fp & 1)
60 fp = (fp >> 1) ^ (empty_64 & mask)
61 fp_table.append(fp)
62
63 result = empty_64
64 for byte in data:
65 result = (result >> 8) ^ fp_table[(result ^ byte) & 0xFF]
66
67 # Although not mentioned in the Avro specification, the Java
68 # implementation gives fingerprint bytes in little-endian order
69 return result.to_bytes(length=8, byteorder="little", signed=False).hex()