/src/duckdb/extension/parquet/include/parquet_crypto.hpp
Line | Count | Source |
1 | | //===----------------------------------------------------------------------===// |
2 | | // DuckDB |
3 | | // |
4 | | // parquet_crypto.hpp |
5 | | // |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #pragma once |
10 | | |
11 | | #include "parquet_types.h" |
12 | | #include "duckdb/common/allocator.hpp" |
13 | | #include "duckdb/common/encryption_state.hpp" |
14 | | #include "duckdb/common/encryption_functions.hpp" |
15 | | #include "duckdb/storage/object_cache.hpp" |
16 | | |
17 | | namespace duckdb { |
18 | | class ParquetAdditionalAuthenticatedData; |
19 | | |
20 | | using duckdb_apache::thrift::TBase; |
21 | | using duckdb_apache::thrift::protocol::TProtocol; |
22 | | using duckdb_parquet::ColumnChunk; |
23 | | using duckdb_parquet::PageType; |
24 | | class Allocator; |
25 | | class BufferedFileWriter; |
26 | | |
27 | | class ParquetKeys : public ObjectCacheEntry { |
28 | | public: |
29 | | static ParquetKeys &Get(ClientContext &context); |
30 | | |
31 | | public: |
32 | | void AddKey(const string &key_name, const string &key); |
33 | | bool HasKey(const string &key_name) const; |
34 | | const string &GetKey(const string &key_name) const; |
35 | | |
36 | | public: |
37 | | static string ObjectType(); |
38 | | string GetObjectType() override; |
39 | 0 | optional_idx GetEstimatedCacheMemory() const override { |
40 | 0 | return optional_idx {}; |
41 | 0 | } |
42 | | |
43 | | private: |
44 | | unordered_map<string, string> keys; |
45 | | }; |
46 | | |
47 | | struct CryptoMetaData { |
48 | | CryptoMetaData(Allocator &allocator); |
49 | | void Initialize(const std::string &unique_file_identifier_p, int16_t row_group_ordinal = -1, |
50 | | int16_t column_ordinal = -1, int8_t module = -1, int16_t page_ordinal = -1); |
51 | | void ClearAdditionalAuthenticatedData(); |
52 | | void SetModule(int8_t module_p); |
53 | | bool IsEmpty() const; |
54 | | |
55 | | public: |
56 | | string unique_file_identifier = ""; |
57 | | int8_t module; |
58 | | int16_t row_group_ordinal; |
59 | | int16_t column_ordinal; |
60 | | int16_t page_ordinal; |
61 | | |
62 | | public: |
63 | | unique_ptr<ParquetAdditionalAuthenticatedData> additional_authenticated_data; |
64 | | }; |
65 | | |
66 | | class ParquetAdditionalAuthenticatedData : public AdditionalAuthenticatedData { |
67 | | public: |
68 | | explicit ParquetAdditionalAuthenticatedData(Allocator &allocator); |
69 | | ~ParquetAdditionalAuthenticatedData() override; |
70 | | |
71 | | public: |
72 | | idx_t GetPrefixSize() const; |
73 | | void Rewind() const; |
74 | | void WriteParquetAAD(const CryptoMetaData &crypto_meta_data); |
75 | | |
76 | | private: |
77 | | void WritePrefix(const std::string &prefix); |
78 | | void WriteSuffix(const CryptoMetaData &crypto_meta_data); |
79 | | |
80 | | private: |
81 | | optional_idx additional_authenticated_data_prefix_size; |
82 | | }; |
83 | | |
84 | | class ParquetEncryptionConfig { |
85 | | public: |
86 | | explicit ParquetEncryptionConfig(); |
87 | | ParquetEncryptionConfig(ClientContext &context, const Value &arg); |
88 | | ParquetEncryptionConfig(string footer_key); |
89 | | |
90 | | public: |
91 | | static shared_ptr<ParquetEncryptionConfig> Create(ClientContext &context, const Value &arg); |
92 | | const string &GetFooterKey() const; |
93 | | |
94 | | public: |
95 | | void Serialize(Serializer &serializer) const; |
96 | | static shared_ptr<ParquetEncryptionConfig> Deserialize(Deserializer &deserializer); |
97 | | |
98 | | private: |
99 | | //! The encryption key used for the footer |
100 | | string footer_key; |
101 | | //! Mapping from column name to key name |
102 | | unordered_map<string, string> column_keys; |
103 | | }; |
104 | | |
105 | | class ParquetCrypto { |
106 | | public: |
107 | | //! Encrypted modules |
108 | | static constexpr idx_t LENGTH_BYTES = 4; |
109 | | static constexpr idx_t NONCE_BYTES = 12; |
110 | | static constexpr idx_t TAG_BYTES = 16; |
111 | | |
112 | | //! Block size we encrypt/decrypt |
113 | | static constexpr idx_t CRYPTO_BLOCK_SIZE = 4096; |
114 | | static constexpr idx_t BLOCK_SIZE = 16; |
115 | | |
116 | | // Module types for encryption |
117 | | static constexpr int8_t FOOTER = 0; |
118 | | static constexpr int8_t COLUMN_METADATA = 1; |
119 | | static constexpr int8_t DATA_PAGE = 2; |
120 | | static constexpr int8_t DICTIONARY_PAGE = 3; |
121 | | static constexpr int8_t DATA_PAGE_HEADER = 4; |
122 | | static constexpr int8_t DICTIONARY_PAGE_HEADER = 5; |
123 | | static constexpr int8_t COLUMN_INDEX = 6; |
124 | | static constexpr int8_t OFFSET_INDEX = 7; |
125 | | static constexpr int8_t BLOOM_FILTER_HEADER = 8; |
126 | | static constexpr int8_t BLOOM_FILTER_BITSET = 9; |
127 | | |
128 | | // Standard AAD length for file |
129 | | static constexpr int32_t UNIQUE_FILE_ID_LEN = 8; |
130 | | // Maximum Parquet AAD suffix bytes |
131 | | static constexpr int32_t AAD_MAX_SUFFIX_BYTES = 7; |
132 | | |
133 | | public: |
134 | | //! Decrypt and read a Thrift object from the transport protocol |
135 | | static uint32_t Read(TBase &object, TProtocol &iprot, const string &key, const EncryptionUtil &encryption_util_p, |
136 | | const CryptoMetaData &crypto_meta_data); |
137 | | //! Encrypt and write a Thrift object to the transport protocol |
138 | | static uint32_t Write(const TBase &object, TProtocol &oprot, const string &key, |
139 | | const EncryptionUtil &encryption_util_p); |
140 | | //! Decrypt and read a buffer |
141 | | static uint32_t ReadData(TProtocol &iprot, const data_ptr_t buffer, const uint32_t buffer_size, const string &key, |
142 | | const EncryptionUtil &encryption_util_p, const CryptoMetaData &crypto_meta_data); |
143 | | //! Encrypt and write a buffer to a file |
144 | | static uint32_t WriteData(TProtocol &oprot, const const_data_ptr_t buffer, const uint32_t buffer_size, |
145 | | const string &key, const EncryptionUtil &encryption_util_p); |
146 | | |
147 | | public: |
148 | | static void AddKey(ClientContext &context, const FunctionParameters ¶meters); |
149 | | static bool ValidKey(const std::string &key); |
150 | | |
151 | | public: |
152 | | static int8_t GetModuleHeader(const ColumnChunk &chunk, uint16_t page_ordinal); |
153 | | static int8_t GetModule(const ColumnChunk &chunk, PageType::type page_type, uint16_t page_ordinal); |
154 | | static int16_t GetFinalPageOrdinal(const ColumnChunk &chunk, uint8_t module, uint16_t page_ordinal); |
155 | | static void GenerateAdditionalAuthenticatedData(Allocator &allocator, CryptoMetaData &aad_crypto_metadata); |
156 | | static unique_ptr<ParquetAdditionalAuthenticatedData> GenerateFooterAAD(Allocator &allocator, |
157 | | const std::string &unique_file_identifier); |
158 | | }; |
159 | | |
160 | | } // namespace duckdb |