Coverage Report

Created: 2026-03-10 06:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/arrow/cpp/src/parquet/encryption/encryption.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cassert>
21
#include <map>
22
#include <memory>
23
#include <string>
24
#include <utility>
25
26
#include "arrow/util/secure_string.h"
27
#include "parquet/exception.h"
28
#include "parquet/schema.h"
29
#include "parquet/types.h"
30
31
namespace parquet {
32
33
static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm =
34
    ParquetCipher::AES_GCM_V1;
35
static constexpr int32_t kMaximalAadMetadataLength = 256;
36
static constexpr bool kDefaultEncryptedFooter = true;
37
static constexpr bool kDefaultCheckSignature = true;
38
static constexpr bool kDefaultAllowPlaintextFiles = false;
39
static constexpr int32_t kAadFileUniqueLength = 8;
40
41
class ColumnDecryptionProperties;
42
using ColumnPathToDecryptionPropertiesMap =
43
    std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>>;
44
45
class ColumnEncryptionProperties;
46
using ColumnPathToEncryptionPropertiesMap =
47
    std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>;
48
49
class PARQUET_EXPORT DecryptionKeyRetriever {
50
 public:
51
  /// \brief Retrieve a key.
52
  virtual ::arrow::util::SecureString GetKey(const std::string& key_id) = 0;
53
54
22.7k
  virtual ~DecryptionKeyRetriever() {}
55
};
56
57
/// Simple integer key retriever
58
class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever {
59
 public:
60
  void PutKey(uint32_t key_id, ::arrow::util::SecureString key);
61
62
0
  ::arrow::util::SecureString GetKey(const std::string& key_id_string) override {
63
0
    // key_id_string is string but for IntegerKeyIdRetriever it encodes
64
0
    // a native-endian 32 bit unsigned integer key_id
65
0
    uint32_t key_id;
66
0
    assert(key_id_string.size() == sizeof(key_id));
67
0
    memcpy(&key_id, key_id_string.data(), sizeof(key_id));
68
0
69
0
    return key_map_.at(key_id);
70
0
  }
71
72
 private:
73
  std::map<uint32_t, ::arrow::util::SecureString> key_map_;
74
};
75
76
// Simple string key retriever
77
class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever {
78
 public:
79
  void PutKey(std::string key_id, ::arrow::util::SecureString key);
80
  ::arrow::util::SecureString GetKey(const std::string& key_id) override;
81
82
 private:
83
  std::map<std::string, ::arrow::util::SecureString> key_map_;
84
};
85
86
class PARQUET_EXPORT HiddenColumnException : public ParquetException {
87
 public:
88
  explicit HiddenColumnException(const std::string& columnPath)
89
0
      : ParquetException(columnPath.c_str()) {}
90
};
91
92
class PARQUET_EXPORT KeyAccessDeniedException : public ParquetException {
93
 public:
94
  explicit KeyAccessDeniedException(const std::string& columnPath)
95
0
      : ParquetException(columnPath.c_str()) {}
96
};
97
98
31.7k
inline ::arrow::util::span<const uint8_t> str2span(const std::string& str) {
99
31.7k
  if (str.empty()) {
100
0
    return {};
101
0
  }
102
103
31.7k
  return {reinterpret_cast<const uint8_t*>(str.data()), str.size()};
104
31.7k
}
105
106
class PARQUET_EXPORT ColumnEncryptionProperties {
107
 public:
108
  class PARQUET_EXPORT Builder {
109
   public:
110
    PARQUET_DEPRECATED("name argument is ignored, use default constructor instead")
111
0
    explicit Builder(const std::string& name) : encrypted_(true) {}
112
113
    PARQUET_DEPRECATED("path argument is ignored, use default constructor instead")
114
0
    explicit Builder(const schema::ColumnPath& path) : encrypted_(true) {}
115
116
    Builder() = default;
117
118
    /// Set a column-specific key.
119
    /// If key is not set on an encrypted column, the column will
120
    /// be encrypted with the footer key.
121
    /// keyBytes Key length must be either 16, 24 or 32 bytes.
122
    /// Caller is responsible for wiping out the input key array.
123
    Builder* key(::arrow::util::SecureString column_key);
124
125
    /// Set a key retrieval metadata.
126
    /// use either key_metadata() or key_id(), not both
127
    Builder* key_metadata(std::string key_metadata);
128
129
    /// A convenience function to set key metadata using a string id.
130
    /// Set a key retrieval metadata (converted from String).
131
    /// use either key_metadata() or key_id(), not both
132
    /// key_id will be converted to metadata (UTF-8 array).
133
    Builder* key_id(std::string key_id);
134
135
0
    std::shared_ptr<ColumnEncryptionProperties> build() {
136
0
      return std::shared_ptr<ColumnEncryptionProperties>(
137
0
          new ColumnEncryptionProperties(encrypted_, key_, key_metadata_));
138
0
    }
139
140
   private:
141
    bool encrypted_ = true;
142
    ::arrow::util::SecureString key_;
143
    std::string key_metadata_;
144
  };
145
146
0
  bool is_encrypted() const { return encrypted_; }
147
0
  bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; }
148
0
  const ::arrow::util::SecureString& key() const { return key_; }
149
0
  const std::string& key_metadata() const { return key_metadata_; }
150
151
  static std::shared_ptr<ColumnEncryptionProperties> Unencrypted();
152
  static std::shared_ptr<ColumnEncryptionProperties> WithFooterKey();
153
  static std::shared_ptr<ColumnEncryptionProperties> WithColumnKey(
154
      ::arrow::util::SecureString key, std::string key_metadata = "");
155
156
 private:
157
  bool encrypted_;
158
  bool encrypted_with_footer_key_;
159
  ::arrow::util::SecureString key_;
160
  std::string key_metadata_;
161
  explicit ColumnEncryptionProperties(bool encrypted, ::arrow::util::SecureString key,
162
                                      std::string key_metadata);
163
};
164
165
class PARQUET_EXPORT ColumnDecryptionProperties {
166
 public:
167
  class PARQUET_EXPORT Builder {
168
   public:
169
0
    explicit Builder(std::string name) : column_path_(std::move(name)) {}
170
171
0
    explicit Builder(const schema::ColumnPath& path) : Builder(path.ToDotString()) {}
172
173
    /// Set an explicit column key. If applied on a file that contains
174
    /// key metadata for this column the metadata will be ignored,
175
    /// the column will be decrypted with this key.
176
    /// key length must be either 16, 24 or 32 bytes.
177
    Builder* key(::arrow::util::SecureString key);
178
179
    std::shared_ptr<ColumnDecryptionProperties> build();
180
181
   private:
182
    std::string column_path_;
183
    ::arrow::util::SecureString key_;
184
  };
185
186
0
  const std::string& column_path() const { return column_path_; }
187
0
  const ::arrow::util::SecureString& key() const { return key_; }
188
189
 private:
190
  std::string column_path_;
191
  ::arrow::util::SecureString key_;
192
193
  /// This class is only required for setting explicit column decryption keys -
194
  /// to override key retriever (or to provide keys when key metadata and/or
195
  /// key retriever are not available)
196
  explicit ColumnDecryptionProperties(std::string column_path,
197
                                      ::arrow::util::SecureString key);
198
};
199
200
class PARQUET_EXPORT AADPrefixVerifier {
201
 public:
202
  /// Verifies identity (AAD Prefix) of individual file,
203
  /// or of file collection in a data set.
204
  /// Throws exception if an AAD prefix is wrong.
205
  /// In a data set, AAD Prefixes should be collected,
206
  /// and then checked for missing files.
207
  virtual void Verify(const std::string& aad_prefix) = 0;
208
0
  virtual ~AADPrefixVerifier() {}
209
};
210
211
class PARQUET_EXPORT FileDecryptionProperties {
212
 public:
213
  class PARQUET_EXPORT Builder {
214
   public:
215
22.7k
    Builder() {
216
22.7k
      check_plaintext_footer_integrity_ = kDefaultCheckSignature;
217
22.7k
      plaintext_files_allowed_ = kDefaultAllowPlaintextFiles;
218
22.7k
    }
219
220
    /// Set an explicit footer key. If applied on a file that contains
221
    /// footer key metadata the metadata will be ignored, the footer
222
    /// will be decrypted/verified with this key.
223
    /// If explicit key is not set, footer key will be fetched from
224
    /// key retriever.
225
    /// With explicit keys or AAD prefix, new encryption properties object must be
226
    /// created for each encrypted file.
227
    /// Explicit encryption keys (footer and column) are cloned.
228
    /// Upon completion of file reading, the cloned encryption keys in the properties
229
    /// will be wiped out (array values set to 0).
230
    /// Caller is responsible for wiping out the input key array.
231
    /// param footerKey Key length must be either 16, 24 or 32 bytes.
232
    Builder* footer_key(::arrow::util::SecureString footer_key);
233
234
    /// Set explicit column keys (decryption properties).
235
    /// Its also possible to set a key retriever on this property object.
236
    /// Upon file decryption, availability of explicit keys is checked before
237
    /// invocation of the retriever callback.
238
    /// If an explicit key is available for a footer or a column,
239
    /// its key metadata will be ignored.
240
    Builder* column_keys(
241
        ColumnPathToDecryptionPropertiesMap column_decryption_properties);
242
243
    /// Set a key retriever callback. Its also possible to
244
    /// set explicit footer or column keys on this file property object.
245
    /// Upon file decryption, availability of explicit keys is checked before
246
    /// invocation of the retriever callback.
247
    /// If an explicit key is available for a footer or a column,
248
    /// its key metadata will be ignored.
249
    Builder* key_retriever(std::shared_ptr<DecryptionKeyRetriever> key_retriever);
250
251
    /// Skip integrity verification of plaintext footers.
252
    /// If not called, integrity of plaintext footers will be checked in runtime,
253
    /// and an exception will be thrown in the following situations:
254
    /// - footer signing key is not available
255
    /// (not passed, or not found by key retriever)
256
    /// - footer content and signature don't match
257
0
    Builder* disable_footer_signature_verification() {
258
0
      check_plaintext_footer_integrity_ = false;
259
0
      return this;
260
0
    }
261
262
    /// Explicitly supply the file AAD prefix.
263
    /// A must when a prefix is used for file encryption, but not stored in file.
264
    /// If AAD prefix is stored in file, it will be compared to the explicitly
265
    /// supplied value and an exception will be thrown if they differ.
266
    Builder* aad_prefix(std::string aad_prefix);
267
268
    /// Set callback for verification of AAD Prefixes stored in file.
269
    Builder* aad_prefix_verifier(std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier);
270
271
    /// By default, reading plaintext (unencrypted) files is not
272
    /// allowed when using a decryptor
273
    /// - in order to detect files that were not encrypted by mistake.
274
    /// However, the default behavior can be overridden by calling this method.
275
    /// The caller should use then a different method to ensure encryption
276
    /// of files with sensitive data.
277
22.7k
    Builder* plaintext_files_allowed() {
278
22.7k
      plaintext_files_allowed_ = true;
279
22.7k
      return this;
280
22.7k
    }
281
282
22.7k
    std::shared_ptr<FileDecryptionProperties> build() {
283
22.7k
      return std::shared_ptr<FileDecryptionProperties>(new FileDecryptionProperties(
284
22.7k
          footer_key_, key_retriever_, check_plaintext_footer_integrity_, aad_prefix_,
285
22.7k
          aad_prefix_verifier_, column_decryption_properties_, plaintext_files_allowed_));
286
22.7k
    }
287
288
   private:
289
    ::arrow::util::SecureString footer_key_;
290
    std::string aad_prefix_;
291
    std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
292
    ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
293
294
    std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
295
    bool check_plaintext_footer_integrity_;
296
    bool plaintext_files_allowed_;
297
  };
298
299
  const ::arrow::util::SecureString& column_key(const std::string& column_path) const;
300
301
233
  const ::arrow::util::SecureString& footer_key() const { return footer_key_; }
302
303
251
  const std::string& aad_prefix() const { return aad_prefix_; }
304
305
48.8k
  const std::shared_ptr<DecryptionKeyRetriever>& key_retriever() const {
306
48.8k
    return key_retriever_;
307
48.8k
  }
308
309
33
  bool check_plaintext_footer_integrity() const {
310
33
    return check_plaintext_footer_integrity_;
311
33
  }
312
313
19.4k
  bool plaintext_files_allowed() const { return plaintext_files_allowed_; }
314
315
244
  const std::shared_ptr<AADPrefixVerifier>& aad_prefix_verifier() const {
316
244
    return aad_prefix_verifier_;
317
244
  }
318
319
 private:
320
  ::arrow::util::SecureString footer_key_;
321
  std::string aad_prefix_;
322
  std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
323
  ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
324
  std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
325
  bool check_plaintext_footer_integrity_;
326
  bool plaintext_files_allowed_;
327
328
  FileDecryptionProperties(
329
      ::arrow::util::SecureString footer_key,
330
      std::shared_ptr<DecryptionKeyRetriever> key_retriever,
331
      bool check_plaintext_footer_integrity, std::string aad_prefix,
332
      std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier,
333
      ColumnPathToDecryptionPropertiesMap column_decryption_properties,
334
      bool plaintext_files_allowed);
335
};
336
337
class PARQUET_EXPORT FileEncryptionProperties {
338
 public:
339
  class PARQUET_EXPORT Builder {
340
   public:
341
    explicit Builder(::arrow::util::SecureString footer_key)
342
        : parquet_cipher_(kDefaultEncryptionAlgorithm),
343
          encrypted_footer_(kDefaultEncryptedFooter),
344
0
          footer_key_(std::move(footer_key)) {
345
0
      store_aad_prefix_in_file_ = false;
346
0
    }
347
348
    /// Create files with plaintext footer.
349
    /// If not called, the files will be created with encrypted footer (default).
350
0
    Builder* set_plaintext_footer() {
351
0
      encrypted_footer_ = false;
352
0
      return this;
353
0
    }
354
355
    /// Set encryption algorithm.
356
    /// If not called, files will be encrypted with AES_GCM_V1 (default).
357
0
    Builder* algorithm(ParquetCipher::type parquet_cipher) {
358
0
      parquet_cipher_ = parquet_cipher;
359
0
      return this;
360
0
    }
361
362
    /// Set a key retrieval metadata (converted from String).
363
    /// use either footer_key_metadata or footer_key_id, not both.
364
    Builder* footer_key_id(std::string key_id);
365
366
    /// Set a key retrieval metadata.
367
    /// use either footer_key_metadata or footer_key_id, not both.
368
    Builder* footer_key_metadata(std::string footer_key_metadata);
369
370
    /// Set the file AAD Prefix.
371
    Builder* aad_prefix(std::string aad_prefix);
372
373
    /// Skip storing AAD Prefix in file.
374
    /// If not called, and if AAD Prefix is set, it will be stored.
375
    Builder* disable_aad_prefix_storage();
376
377
    /// Set the list of encrypted columns and their properties (keys etc).
378
    /// If not called, all columns will be encrypted with the footer key.
379
    /// If called, the file columns not in the list will be left unencrypted.
380
    Builder* encrypted_columns(ColumnPathToEncryptionPropertiesMap encrypted_columns);
381
382
0
    std::shared_ptr<FileEncryptionProperties> build() {
383
0
      return std::shared_ptr<FileEncryptionProperties>(new FileEncryptionProperties(
384
0
          parquet_cipher_, footer_key_, footer_key_metadata_, encrypted_footer_,
385
0
          aad_prefix_, store_aad_prefix_in_file_, encrypted_columns_));
386
0
    }
387
388
   private:
389
    ParquetCipher::type parquet_cipher_;
390
    bool encrypted_footer_;
391
    ::arrow::util::SecureString footer_key_;
392
    std::string footer_key_metadata_;
393
394
    std::string aad_prefix_;
395
    bool store_aad_prefix_in_file_;
396
    ColumnPathToEncryptionPropertiesMap encrypted_columns_;
397
  };
398
399
0
  bool encrypted_footer() const { return encrypted_footer_; }
400
401
0
  EncryptionAlgorithm algorithm() const { return algorithm_; }
402
403
0
  const ::arrow::util::SecureString& footer_key() const { return footer_key_; }
404
405
0
  const std::string& footer_key_metadata() const { return footer_key_metadata_; }
406
407
0
  const std::string& file_aad() const { return file_aad_; }
408
409
  std::shared_ptr<ColumnEncryptionProperties> column_encryption_properties(
410
      const std::string& column_path);
411
412
0
  const ColumnPathToEncryptionPropertiesMap& encrypted_columns() const {
413
0
    return encrypted_columns_;
414
0
  }
415
416
 private:
417
  EncryptionAlgorithm algorithm_;
418
  ::arrow::util::SecureString footer_key_;
419
  std::string footer_key_metadata_;
420
  bool encrypted_footer_;
421
  std::string file_aad_;
422
  std::string aad_prefix_;
423
  bool store_aad_prefix_in_file_;
424
  ColumnPathToEncryptionPropertiesMap encrypted_columns_;
425
426
  FileEncryptionProperties(ParquetCipher::type cipher,
427
                           ::arrow::util::SecureString footer_key,
428
                           std::string footer_key_metadata, bool encrypted_footer,
429
                           std::string aad_prefix, bool store_aad_prefix_in_file,
430
                           ColumnPathToEncryptionPropertiesMap encrypted_columns);
431
};
432
433
}  // namespace parquet