/src/arrow/cpp/src/parquet/encryption/encryption.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <cassert> |
21 | | #include <map> |
22 | | #include <memory> |
23 | | #include <string> |
24 | | #include <utility> |
25 | | |
26 | | #include "arrow/util/secure_string.h" |
27 | | #include "parquet/exception.h" |
28 | | #include "parquet/schema.h" |
29 | | #include "parquet/types.h" |
30 | | |
31 | | namespace parquet { |
32 | | |
33 | | static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm = |
34 | | ParquetCipher::AES_GCM_V1; |
35 | | static constexpr int32_t kMaximalAadMetadataLength = 256; |
36 | | static constexpr bool kDefaultEncryptedFooter = true; |
37 | | static constexpr bool kDefaultCheckSignature = true; |
38 | | static constexpr bool kDefaultAllowPlaintextFiles = false; |
39 | | static constexpr int32_t kAadFileUniqueLength = 8; |
40 | | |
41 | | class ColumnDecryptionProperties; |
42 | | using ColumnPathToDecryptionPropertiesMap = |
43 | | std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>>; |
44 | | |
45 | | class ColumnEncryptionProperties; |
46 | | using ColumnPathToEncryptionPropertiesMap = |
47 | | std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>; |
48 | | |
49 | | class PARQUET_EXPORT DecryptionKeyRetriever { |
50 | | public: |
51 | | /// \brief Retrieve a key. |
52 | | virtual ::arrow::util::SecureString GetKey(const std::string& key_id) = 0; |
53 | | |
54 | 22.7k | virtual ~DecryptionKeyRetriever() {} |
55 | | }; |
56 | | |
57 | | /// Simple integer key retriever |
58 | | class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { |
59 | | public: |
60 | | void PutKey(uint32_t key_id, ::arrow::util::SecureString key); |
61 | | |
62 | 0 | ::arrow::util::SecureString GetKey(const std::string& key_id_string) override { |
63 | 0 | // key_id_string is string but for IntegerKeyIdRetriever it encodes |
64 | 0 | // a native-endian 32 bit unsigned integer key_id |
65 | 0 | uint32_t key_id; |
66 | 0 | assert(key_id_string.size() == sizeof(key_id)); |
67 | 0 | memcpy(&key_id, key_id_string.data(), sizeof(key_id)); |
68 | 0 |
|
69 | 0 | return key_map_.at(key_id); |
70 | 0 | } |
71 | | |
72 | | private: |
73 | | std::map<uint32_t, ::arrow::util::SecureString> key_map_; |
74 | | }; |
75 | | |
76 | | // Simple string key retriever |
77 | | class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever { |
78 | | public: |
79 | | void PutKey(std::string key_id, ::arrow::util::SecureString key); |
80 | | ::arrow::util::SecureString GetKey(const std::string& key_id) override; |
81 | | |
82 | | private: |
83 | | std::map<std::string, ::arrow::util::SecureString> key_map_; |
84 | | }; |
85 | | |
86 | | class PARQUET_EXPORT HiddenColumnException : public ParquetException { |
87 | | public: |
88 | | explicit HiddenColumnException(const std::string& columnPath) |
89 | 0 | : ParquetException(columnPath.c_str()) {} |
90 | | }; |
91 | | |
92 | | class PARQUET_EXPORT KeyAccessDeniedException : public ParquetException { |
93 | | public: |
94 | | explicit KeyAccessDeniedException(const std::string& columnPath) |
95 | 0 | : ParquetException(columnPath.c_str()) {} |
96 | | }; |
97 | | |
98 | 31.7k | inline ::arrow::util::span<const uint8_t> str2span(const std::string& str) { |
99 | 31.7k | if (str.empty()) { |
100 | 0 | return {}; |
101 | 0 | } |
102 | | |
103 | 31.7k | return {reinterpret_cast<const uint8_t*>(str.data()), str.size()}; |
104 | 31.7k | } |
105 | | |
106 | | class PARQUET_EXPORT ColumnEncryptionProperties { |
107 | | public: |
108 | | class PARQUET_EXPORT Builder { |
109 | | public: |
110 | | PARQUET_DEPRECATED("name argument is ignored, use default constructor instead") |
111 | 0 | explicit Builder(const std::string& name) : encrypted_(true) {} |
112 | | |
113 | | PARQUET_DEPRECATED("path argument is ignored, use default constructor instead") |
114 | 0 | explicit Builder(const schema::ColumnPath& path) : encrypted_(true) {} |
115 | | |
116 | | Builder() = default; |
117 | | |
118 | | /// Set a column-specific key. |
119 | | /// If key is not set on an encrypted column, the column will |
120 | | /// be encrypted with the footer key. |
121 | | /// keyBytes Key length must be either 16, 24 or 32 bytes. |
122 | | /// Caller is responsible for wiping out the input key array. |
123 | | Builder* key(::arrow::util::SecureString column_key); |
124 | | |
125 | | /// Set a key retrieval metadata. |
126 | | /// use either key_metadata() or key_id(), not both |
127 | | Builder* key_metadata(std::string key_metadata); |
128 | | |
129 | | /// A convenience function to set key metadata using a string id. |
130 | | /// Set a key retrieval metadata (converted from String). |
131 | | /// use either key_metadata() or key_id(), not both |
132 | | /// key_id will be converted to metadata (UTF-8 array). |
133 | | Builder* key_id(std::string key_id); |
134 | | |
135 | 0 | std::shared_ptr<ColumnEncryptionProperties> build() { |
136 | 0 | return std::shared_ptr<ColumnEncryptionProperties>( |
137 | 0 | new ColumnEncryptionProperties(encrypted_, key_, key_metadata_)); |
138 | 0 | } |
139 | | |
140 | | private: |
141 | | bool encrypted_ = true; |
142 | | ::arrow::util::SecureString key_; |
143 | | std::string key_metadata_; |
144 | | }; |
145 | | |
146 | 0 | bool is_encrypted() const { return encrypted_; } |
147 | 0 | bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; } |
148 | 0 | const ::arrow::util::SecureString& key() const { return key_; } |
149 | 0 | const std::string& key_metadata() const { return key_metadata_; } |
150 | | |
151 | | static std::shared_ptr<ColumnEncryptionProperties> Unencrypted(); |
152 | | static std::shared_ptr<ColumnEncryptionProperties> WithFooterKey(); |
153 | | static std::shared_ptr<ColumnEncryptionProperties> WithColumnKey( |
154 | | ::arrow::util::SecureString key, std::string key_metadata = ""); |
155 | | |
156 | | private: |
157 | | bool encrypted_; |
158 | | bool encrypted_with_footer_key_; |
159 | | ::arrow::util::SecureString key_; |
160 | | std::string key_metadata_; |
161 | | explicit ColumnEncryptionProperties(bool encrypted, ::arrow::util::SecureString key, |
162 | | std::string key_metadata); |
163 | | }; |
164 | | |
165 | | class PARQUET_EXPORT ColumnDecryptionProperties { |
166 | | public: |
167 | | class PARQUET_EXPORT Builder { |
168 | | public: |
169 | 0 | explicit Builder(std::string name) : column_path_(std::move(name)) {} |
170 | | |
171 | 0 | explicit Builder(const schema::ColumnPath& path) : Builder(path.ToDotString()) {} |
172 | | |
173 | | /// Set an explicit column key. If applied on a file that contains |
174 | | /// key metadata for this column the metadata will be ignored, |
175 | | /// the column will be decrypted with this key. |
176 | | /// key length must be either 16, 24 or 32 bytes. |
177 | | Builder* key(::arrow::util::SecureString key); |
178 | | |
179 | | std::shared_ptr<ColumnDecryptionProperties> build(); |
180 | | |
181 | | private: |
182 | | std::string column_path_; |
183 | | ::arrow::util::SecureString key_; |
184 | | }; |
185 | | |
186 | 0 | const std::string& column_path() const { return column_path_; } |
187 | 0 | const ::arrow::util::SecureString& key() const { return key_; } |
188 | | |
189 | | private: |
190 | | std::string column_path_; |
191 | | ::arrow::util::SecureString key_; |
192 | | |
193 | | /// This class is only required for setting explicit column decryption keys - |
194 | | /// to override key retriever (or to provide keys when key metadata and/or |
195 | | /// key retriever are not available) |
196 | | explicit ColumnDecryptionProperties(std::string column_path, |
197 | | ::arrow::util::SecureString key); |
198 | | }; |
199 | | |
200 | | class PARQUET_EXPORT AADPrefixVerifier { |
201 | | public: |
202 | | /// Verifies identity (AAD Prefix) of individual file, |
203 | | /// or of file collection in a data set. |
204 | | /// Throws exception if an AAD prefix is wrong. |
205 | | /// In a data set, AAD Prefixes should be collected, |
206 | | /// and then checked for missing files. |
207 | | virtual void Verify(const std::string& aad_prefix) = 0; |
208 | 0 | virtual ~AADPrefixVerifier() {} |
209 | | }; |
210 | | |
211 | | class PARQUET_EXPORT FileDecryptionProperties { |
212 | | public: |
213 | | class PARQUET_EXPORT Builder { |
214 | | public: |
215 | 22.7k | Builder() { |
216 | 22.7k | check_plaintext_footer_integrity_ = kDefaultCheckSignature; |
217 | 22.7k | plaintext_files_allowed_ = kDefaultAllowPlaintextFiles; |
218 | 22.7k | } |
219 | | |
220 | | /// Set an explicit footer key. If applied on a file that contains |
221 | | /// footer key metadata the metadata will be ignored, the footer |
222 | | /// will be decrypted/verified with this key. |
223 | | /// If explicit key is not set, footer key will be fetched from |
224 | | /// key retriever. |
225 | | /// With explicit keys or AAD prefix, new encryption properties object must be |
226 | | /// created for each encrypted file. |
227 | | /// Explicit encryption keys (footer and column) are cloned. |
228 | | /// Upon completion of file reading, the cloned encryption keys in the properties |
229 | | /// will be wiped out (array values set to 0). |
230 | | /// Caller is responsible for wiping out the input key array. |
231 | | /// param footerKey Key length must be either 16, 24 or 32 bytes. |
232 | | Builder* footer_key(::arrow::util::SecureString footer_key); |
233 | | |
234 | | /// Set explicit column keys (decryption properties). |
235 | | /// Its also possible to set a key retriever on this property object. |
236 | | /// Upon file decryption, availability of explicit keys is checked before |
237 | | /// invocation of the retriever callback. |
238 | | /// If an explicit key is available for a footer or a column, |
239 | | /// its key metadata will be ignored. |
240 | | Builder* column_keys( |
241 | | ColumnPathToDecryptionPropertiesMap column_decryption_properties); |
242 | | |
243 | | /// Set a key retriever callback. Its also possible to |
244 | | /// set explicit footer or column keys on this file property object. |
245 | | /// Upon file decryption, availability of explicit keys is checked before |
246 | | /// invocation of the retriever callback. |
247 | | /// If an explicit key is available for a footer or a column, |
248 | | /// its key metadata will be ignored. |
249 | | Builder* key_retriever(std::shared_ptr<DecryptionKeyRetriever> key_retriever); |
250 | | |
251 | | /// Skip integrity verification of plaintext footers. |
252 | | /// If not called, integrity of plaintext footers will be checked in runtime, |
253 | | /// and an exception will be thrown in the following situations: |
254 | | /// - footer signing key is not available |
255 | | /// (not passed, or not found by key retriever) |
256 | | /// - footer content and signature don't match |
257 | 0 | Builder* disable_footer_signature_verification() { |
258 | 0 | check_plaintext_footer_integrity_ = false; |
259 | 0 | return this; |
260 | 0 | } |
261 | | |
262 | | /// Explicitly supply the file AAD prefix. |
263 | | /// A must when a prefix is used for file encryption, but not stored in file. |
264 | | /// If AAD prefix is stored in file, it will be compared to the explicitly |
265 | | /// supplied value and an exception will be thrown if they differ. |
266 | | Builder* aad_prefix(std::string aad_prefix); |
267 | | |
268 | | /// Set callback for verification of AAD Prefixes stored in file. |
269 | | Builder* aad_prefix_verifier(std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier); |
270 | | |
271 | | /// By default, reading plaintext (unencrypted) files is not |
272 | | /// allowed when using a decryptor |
273 | | /// - in order to detect files that were not encrypted by mistake. |
274 | | /// However, the default behavior can be overridden by calling this method. |
275 | | /// The caller should use then a different method to ensure encryption |
276 | | /// of files with sensitive data. |
277 | 22.7k | Builder* plaintext_files_allowed() { |
278 | 22.7k | plaintext_files_allowed_ = true; |
279 | 22.7k | return this; |
280 | 22.7k | } |
281 | | |
282 | 22.7k | std::shared_ptr<FileDecryptionProperties> build() { |
283 | 22.7k | return std::shared_ptr<FileDecryptionProperties>(new FileDecryptionProperties( |
284 | 22.7k | footer_key_, key_retriever_, check_plaintext_footer_integrity_, aad_prefix_, |
285 | 22.7k | aad_prefix_verifier_, column_decryption_properties_, plaintext_files_allowed_)); |
286 | 22.7k | } |
287 | | |
288 | | private: |
289 | | ::arrow::util::SecureString footer_key_; |
290 | | std::string aad_prefix_; |
291 | | std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_; |
292 | | ColumnPathToDecryptionPropertiesMap column_decryption_properties_; |
293 | | |
294 | | std::shared_ptr<DecryptionKeyRetriever> key_retriever_; |
295 | | bool check_plaintext_footer_integrity_; |
296 | | bool plaintext_files_allowed_; |
297 | | }; |
298 | | |
299 | | const ::arrow::util::SecureString& column_key(const std::string& column_path) const; |
300 | | |
301 | 233 | const ::arrow::util::SecureString& footer_key() const { return footer_key_; } |
302 | | |
303 | 251 | const std::string& aad_prefix() const { return aad_prefix_; } |
304 | | |
305 | 48.8k | const std::shared_ptr<DecryptionKeyRetriever>& key_retriever() const { |
306 | 48.8k | return key_retriever_; |
307 | 48.8k | } |
308 | | |
309 | 33 | bool check_plaintext_footer_integrity() const { |
310 | 33 | return check_plaintext_footer_integrity_; |
311 | 33 | } |
312 | | |
313 | 19.4k | bool plaintext_files_allowed() const { return plaintext_files_allowed_; } |
314 | | |
315 | 244 | const std::shared_ptr<AADPrefixVerifier>& aad_prefix_verifier() const { |
316 | 244 | return aad_prefix_verifier_; |
317 | 244 | } |
318 | | |
319 | | private: |
320 | | ::arrow::util::SecureString footer_key_; |
321 | | std::string aad_prefix_; |
322 | | std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_; |
323 | | ColumnPathToDecryptionPropertiesMap column_decryption_properties_; |
324 | | std::shared_ptr<DecryptionKeyRetriever> key_retriever_; |
325 | | bool check_plaintext_footer_integrity_; |
326 | | bool plaintext_files_allowed_; |
327 | | |
328 | | FileDecryptionProperties( |
329 | | ::arrow::util::SecureString footer_key, |
330 | | std::shared_ptr<DecryptionKeyRetriever> key_retriever, |
331 | | bool check_plaintext_footer_integrity, std::string aad_prefix, |
332 | | std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier, |
333 | | ColumnPathToDecryptionPropertiesMap column_decryption_properties, |
334 | | bool plaintext_files_allowed); |
335 | | }; |
336 | | |
337 | | class PARQUET_EXPORT FileEncryptionProperties { |
338 | | public: |
339 | | class PARQUET_EXPORT Builder { |
340 | | public: |
341 | | explicit Builder(::arrow::util::SecureString footer_key) |
342 | | : parquet_cipher_(kDefaultEncryptionAlgorithm), |
343 | | encrypted_footer_(kDefaultEncryptedFooter), |
344 | 0 | footer_key_(std::move(footer_key)) { |
345 | 0 | store_aad_prefix_in_file_ = false; |
346 | 0 | } |
347 | | |
348 | | /// Create files with plaintext footer. |
349 | | /// If not called, the files will be created with encrypted footer (default). |
350 | 0 | Builder* set_plaintext_footer() { |
351 | 0 | encrypted_footer_ = false; |
352 | 0 | return this; |
353 | 0 | } |
354 | | |
355 | | /// Set encryption algorithm. |
356 | | /// If not called, files will be encrypted with AES_GCM_V1 (default). |
357 | 0 | Builder* algorithm(ParquetCipher::type parquet_cipher) { |
358 | 0 | parquet_cipher_ = parquet_cipher; |
359 | 0 | return this; |
360 | 0 | } |
361 | | |
362 | | /// Set a key retrieval metadata (converted from String). |
363 | | /// use either footer_key_metadata or footer_key_id, not both. |
364 | | Builder* footer_key_id(std::string key_id); |
365 | | |
366 | | /// Set a key retrieval metadata. |
367 | | /// use either footer_key_metadata or footer_key_id, not both. |
368 | | Builder* footer_key_metadata(std::string footer_key_metadata); |
369 | | |
370 | | /// Set the file AAD Prefix. |
371 | | Builder* aad_prefix(std::string aad_prefix); |
372 | | |
373 | | /// Skip storing AAD Prefix in file. |
374 | | /// If not called, and if AAD Prefix is set, it will be stored. |
375 | | Builder* disable_aad_prefix_storage(); |
376 | | |
377 | | /// Set the list of encrypted columns and their properties (keys etc). |
378 | | /// If not called, all columns will be encrypted with the footer key. |
379 | | /// If called, the file columns not in the list will be left unencrypted. |
380 | | Builder* encrypted_columns(ColumnPathToEncryptionPropertiesMap encrypted_columns); |
381 | | |
382 | 0 | std::shared_ptr<FileEncryptionProperties> build() { |
383 | 0 | return std::shared_ptr<FileEncryptionProperties>(new FileEncryptionProperties( |
384 | 0 | parquet_cipher_, footer_key_, footer_key_metadata_, encrypted_footer_, |
385 | 0 | aad_prefix_, store_aad_prefix_in_file_, encrypted_columns_)); |
386 | 0 | } |
387 | | |
388 | | private: |
389 | | ParquetCipher::type parquet_cipher_; |
390 | | bool encrypted_footer_; |
391 | | ::arrow::util::SecureString footer_key_; |
392 | | std::string footer_key_metadata_; |
393 | | |
394 | | std::string aad_prefix_; |
395 | | bool store_aad_prefix_in_file_; |
396 | | ColumnPathToEncryptionPropertiesMap encrypted_columns_; |
397 | | }; |
398 | | |
399 | 0 | bool encrypted_footer() const { return encrypted_footer_; } |
400 | | |
401 | 0 | EncryptionAlgorithm algorithm() const { return algorithm_; } |
402 | | |
403 | 0 | const ::arrow::util::SecureString& footer_key() const { return footer_key_; } |
404 | | |
405 | 0 | const std::string& footer_key_metadata() const { return footer_key_metadata_; } |
406 | | |
407 | 0 | const std::string& file_aad() const { return file_aad_; } |
408 | | |
409 | | std::shared_ptr<ColumnEncryptionProperties> column_encryption_properties( |
410 | | const std::string& column_path); |
411 | | |
412 | 0 | const ColumnPathToEncryptionPropertiesMap& encrypted_columns() const { |
413 | 0 | return encrypted_columns_; |
414 | 0 | } |
415 | | |
416 | | private: |
417 | | EncryptionAlgorithm algorithm_; |
418 | | ::arrow::util::SecureString footer_key_; |
419 | | std::string footer_key_metadata_; |
420 | | bool encrypted_footer_; |
421 | | std::string file_aad_; |
422 | | std::string aad_prefix_; |
423 | | bool store_aad_prefix_in_file_; |
424 | | ColumnPathToEncryptionPropertiesMap encrypted_columns_; |
425 | | |
426 | | FileEncryptionProperties(ParquetCipher::type cipher, |
427 | | ::arrow::util::SecureString footer_key, |
428 | | std::string footer_key_metadata, bool encrypted_footer, |
429 | | std::string aad_prefix, bool store_aad_prefix_in_file, |
430 | | ColumnPathToEncryptionPropertiesMap encrypted_columns); |
431 | | }; |
432 | | |
433 | | } // namespace parquet |