1# Copyright (c) 2022, exiledkingcc
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8# * Redistributions of source code must retain the above copyright notice,
9# this list of conditions and the following disclaimer.
10# * Redistributions in binary form must reproduce the above copyright notice,
11# this list of conditions and the following disclaimer in the documentation
12# and/or other materials provided with the distribution.
13# * The name of the author may not be used to endorse or promote products
14# derived from this software without specific prior written permission.
15#
16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26# POSSIBILITY OF SUCH DAMAGE.
27import hashlib
28import secrets
29import struct
30from enum import Enum, IntEnum
31from typing import Any, Dict, Optional, Tuple, Union, cast
32
33from pypdf._crypt_providers import (
34 CryptAES,
35 CryptBase,
36 CryptIdentity,
37 CryptRC4,
38 aes_cbc_decrypt,
39 aes_cbc_encrypt,
40 aes_ecb_decrypt,
41 aes_ecb_encrypt,
42 rc4_decrypt,
43 rc4_encrypt,
44)
45
46from ._utils import logger_warning
47from .generic import (
48 ArrayObject,
49 ByteStringObject,
50 DictionaryObject,
51 NameObject,
52 NumberObject,
53 PdfObject,
54 StreamObject,
55 TextStringObject,
56 create_string_object,
57)
58
59
60class CryptFilter:
61 def __init__(
62 self,
63 stm_crypt: CryptBase,
64 str_crypt: CryptBase,
65 ef_crypt: CryptBase,
66 ) -> None:
67 self.stm_crypt = stm_crypt
68 self.str_crypt = str_crypt
69 self.ef_crypt = ef_crypt
70
71 def encrypt_object(self, obj: PdfObject) -> PdfObject:
72 if isinstance(obj, ByteStringObject):
73 data = self.str_crypt.encrypt(obj.original_bytes)
74 obj = ByteStringObject(data)
75 elif isinstance(obj, TextStringObject):
76 data = self.str_crypt.encrypt(obj.get_encoded_bytes())
77 obj = ByteStringObject(data)
78 elif isinstance(obj, StreamObject):
79 obj2 = StreamObject()
80 obj2.update(obj)
81 obj2.set_data(self.stm_crypt.encrypt(obj._data))
82 for key, value in obj.items(): # Dont forget the Stream dict.
83 obj2[key] = self.encrypt_object(value)
84 obj = obj2
85 elif isinstance(obj, DictionaryObject):
86 obj2 = DictionaryObject() # type: ignore
87 for key, value in obj.items():
88 obj2[key] = self.encrypt_object(value)
89 obj = obj2
90 elif isinstance(obj, ArrayObject):
91 obj = ArrayObject(self.encrypt_object(x) for x in obj)
92 return obj
93
94 def decrypt_object(self, obj: PdfObject) -> PdfObject:
95 if isinstance(obj, (ByteStringObject, TextStringObject)):
96 data = self.str_crypt.decrypt(obj.original_bytes)
97 obj = create_string_object(data)
98 elif isinstance(obj, StreamObject):
99 obj._data = self.stm_crypt.decrypt(obj._data)
100 for key, value in obj.items(): # Dont forget the Stream dict.
101 obj[key] = self.decrypt_object(value)
102 elif isinstance(obj, DictionaryObject):
103 for key, value in obj.items():
104 obj[key] = self.decrypt_object(value)
105 elif isinstance(obj, ArrayObject):
106 for i in range(len(obj)):
107 obj[i] = self.decrypt_object(obj[i])
108 return obj
109
110
111_PADDING = (
112 b"\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56\xff\xfa\x01\x08"
113 b"\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c\xa9\xfe\x64\x53\x69\x7a"
114)
115
116
117def _padding(data: bytes) -> bytes:
118 return (data + _PADDING)[:32]
119
120
121class AlgV4:
122 @staticmethod
123 def compute_key(
124 password: bytes,
125 rev: int,
126 key_size: int,
127 o_entry: bytes,
128 P: int,
129 id1_entry: bytes,
130 metadata_encrypted: bool,
131 ) -> bytes:
132 """
133 Algorithm 2: Computing an encryption key.
134
135 a) Pad or truncate the password string to exactly 32 bytes. If the
136 password string is more than 32 bytes long,
137 use only its first 32 bytes; if it is less than 32 bytes long, pad it
138 by appending the required number of
139 additional bytes from the beginning of the following padding string:
140 < 28 BF 4E 5E 4E 75 8A 41 64 00 4E 56 FF FA 01 08
141 2E 2E 00 B6 D0 68 3E 80 2F 0C A9 FE 64 53 69 7A >
142 That is, if the password string is n bytes long, append
143 the first 32 - n bytes of the padding string to the end
144 of the password string. If the password string is empty
145 (zero-length), meaning there is no user password,
146 substitute the entire padding string in its place.
147
148 b) Initialize the MD5 hash function and pass the result of step (a)
149 as input to this function.
150 c) Pass the value of the encryption dictionary’s O entry to the
151 MD5 hash function. ("Algorithm 3: Computing
152 the encryption dictionary’s O (owner password) value" shows how the
153 O value is computed.)
154 d) Convert the integer value of the P entry to a 32-bit unsigned binary
155 number and pass these bytes to the
156 MD5 hash function, low-order byte first.
157 e) Pass the first element of the file’s file identifier array (the value
158 of the ID entry in the document’s trailer
159 dictionary; see Table 15) to the MD5 hash function.
160 f) (Security handlers of revision 4 or greater) If document metadata is
161 not being encrypted, pass 4 bytes with
162 the value 0xFFFFFFFF to the MD5 hash function.
163 g) Finish the hash.
164 h) (Security handlers of revision 3 or greater) Do the following
165 50 times: Take the output from the previous
166 MD5 hash and pass the first n bytes of the output as input into a new
167 MD5 hash, where n is the number of
168 bytes of the encryption key as defined by the value of the encryption
169 dictionary’s Length entry.
170 i) Set the encryption key to the first n bytes of the output from the
171 final MD5 hash, where n shall always be 5
172 for security handlers of revision 2 but, for security handlers of
173 revision 3 or greater, shall depend on the
174 value of the encryption dictionary’s Length entry.
175
176 Args:
177 password: The encryption secret as a bytes-string
178 rev: The encryption revision (see PDF standard)
179 key_size: The size of the key in bytes
180 o_entry: The owner entry
181 P: A set of flags specifying which operations shall be permitted
182 when the document is opened with user access. If bit 2 is set to 1,
183 all other bits are ignored and all operations are permitted.
184 If bit 2 is set to 0, permission for operations are based on the
185 values of the remaining flags defined in Table 24.
186 id1_entry:
187 metadata_encrypted: A boolean indicating if the metadata is encrypted.
188
189 Returns:
190 The u_hash digest of length key_size
191
192 """
193 a = _padding(password)
194 u_hash = hashlib.md5(a)
195 u_hash.update(o_entry)
196 u_hash.update(struct.pack("<I", P))
197 u_hash.update(id1_entry)
198 if rev >= 4 and not metadata_encrypted:
199 u_hash.update(b"\xff\xff\xff\xff")
200 u_hash_digest = u_hash.digest()
201 length = key_size // 8
202 if rev >= 3:
203 for _ in range(50):
204 u_hash_digest = hashlib.md5(u_hash_digest[:length]).digest()
205 return u_hash_digest[:length]
206
207 @staticmethod
208 def compute_O_value_key(owner_password: bytes, rev: int, key_size: int) -> bytes:
209 """
210 Algorithm 3: Computing the encryption dictionary’s O (owner password) value.
211
212 a) Pad or truncate the owner password string as described in step (a)
213 of "Algorithm 2: Computing an encryption key".
214 If there is no owner password, use the user password instead.
215 b) Initialize the MD5 hash function and pass the result of step (a) as
216 input to this function.
217 c) (Security handlers of revision 3 or greater) Do the following 50 times:
218 Take the output from the previous
219 MD5 hash and pass it as input into a new MD5 hash.
220 d) Create an RC4 encryption key using the first n bytes of the output
221 from the final MD5 hash, where n shall
222 always be 5 for security handlers of revision 2 but, for security
223 handlers of revision 3 or greater, shall
224 depend on the value of the encryption dictionary’s Length entry.
225 e) Pad or truncate the user password string as described in step (a) of
226 "Algorithm 2: Computing an encryption key".
227 f) Encrypt the result of step (e), using an RC4 encryption function with
228 the encryption key obtained in step (d).
229 g) (Security handlers of revision 3 or greater) Do the following 19 times:
230 Take the output from the previous
231 invocation of the RC4 function and pass it as input to a new
232 invocation of the function; use an encryption
233 key generated by taking each byte of the encryption key obtained in
234 step (d) and performing an XOR
235 (exclusive or) operation between that byte and the single-byte value
236 of the iteration counter (from 1 to 19).
237 h) Store the output from the final invocation of the RC4 function as
238 the value of the O entry in the encryption dictionary.
239
240 Args:
241 owner_password:
242 rev: The encryption revision (see PDF standard)
243 key_size: The size of the key in bytes
244
245 Returns:
246 The RC4 key
247
248 """
249 a = _padding(owner_password)
250 o_hash_digest = hashlib.md5(a).digest()
251
252 if rev >= 3:
253 for _ in range(50):
254 o_hash_digest = hashlib.md5(o_hash_digest).digest()
255
256 return o_hash_digest[: key_size // 8]
257
258 @staticmethod
259 def compute_O_value(rc4_key: bytes, user_password: bytes, rev: int) -> bytes:
260 """
261 See :func:`compute_O_value_key`.
262
263 Args:
264 rc4_key:
265 user_password:
266 rev: The encryption revision (see PDF standard)
267
268 Returns:
269 The RC4 encrypted
270
271 """
272 a = _padding(user_password)
273 rc4_enc = rc4_encrypt(rc4_key, a)
274 if rev >= 3:
275 for i in range(1, 20):
276 key = bytes(x ^ i for x in rc4_key)
277 rc4_enc = rc4_encrypt(key, rc4_enc)
278 return rc4_enc
279
280 @staticmethod
281 def compute_U_value(key: bytes, rev: int, id1_entry: bytes) -> bytes:
282 """
283 Algorithm 4: Computing the encryption dictionary’s U (user password) value.
284
285 (Security handlers of revision 2)
286
287 a) Create an encryption key based on the user password string, as
288 described in "Algorithm 2: Computing an encryption key".
289 b) Encrypt the 32-byte padding string shown in step (a) of
290 "Algorithm 2: Computing an encryption key", using an RC4 encryption
291 function with the encryption key from the preceding step.
292 c) Store the result of step (b) as the value of the U entry in the
293 encryption dictionary.
294
295 Args:
296 key:
297 rev: The encryption revision (see PDF standard)
298 id1_entry:
299
300 Returns:
301 The value
302
303 """
304 if rev <= 2:
305 return rc4_encrypt(key, _PADDING)
306
307 """
308 Algorithm 5: Computing the encryption dictionary’s U (user password) value.
309
310 (Security handlers of revision 3 or greater)
311
312 a) Create an encryption key based on the user password string, as
313 described in "Algorithm 2: Computing an encryption key".
314 b) Initialize the MD5 hash function and pass the 32-byte padding string
315 shown in step (a) of "Algorithm 2:
316 Computing an encryption key" as input to this function.
317 c) Pass the first element of the file’s file identifier array (the value
318 of the ID entry in the document’s trailer
319 dictionary; see Table 15) to the hash function and finish the hash.
320 d) Encrypt the 16-byte result of the hash, using an RC4 encryption
321 function with the encryption key from step (a).
322 e) Do the following 19 times: Take the output from the previous
323 invocation of the RC4 function and pass it as input to a new
324 invocation of the function; use an encryption key generated by
325 taking each byte of the original encryption key obtained in
326 step (a) and performing an XOR (exclusive or) operation between that
327 byte and the single-byte value of the iteration counter (from 1 to 19).
328 f) Append 16 bytes of arbitrary padding to the output from the final
329 invocation of the RC4 function and store the 32-byte result as the
330 value of the U entry in the encryption dictionary.
331 """
332 u_hash = hashlib.md5(_PADDING)
333 u_hash.update(id1_entry)
334 rc4_enc = rc4_encrypt(key, u_hash.digest())
335 for i in range(1, 20):
336 rc4_key = bytes(x ^ i for x in key)
337 rc4_enc = rc4_encrypt(rc4_key, rc4_enc)
338 return _padding(rc4_enc)
339
340 @staticmethod
341 def verify_user_password(
342 user_password: bytes,
343 rev: int,
344 key_size: int,
345 o_entry: bytes,
346 u_entry: bytes,
347 P: int,
348 id1_entry: bytes,
349 metadata_encrypted: bool,
350 ) -> bytes:
351 """
352 Algorithm 6: Authenticating the user password.
353
354 a) Perform all but the last step of "Algorithm 4: Computing the
355 encryption dictionary’s U (user password) value (Security handlers of
356 revision 2)" or "Algorithm 5: Computing the encryption dictionary’s U
357 (user password) value (Security handlers of revision 3 or greater)"
358 using the supplied password string.
359 b) If the result of step (a) is equal to the value of the encryption
360 dictionary’s U entry (comparing on the first 16 bytes in the case of
361 security handlers of revision 3 or greater), the password supplied is
362 the correct user password. The key obtained in step (a) (that is, in
363 the first step of "Algorithm 4: Computing the encryption
364 dictionary’s U (user password) value
365 (Security handlers of revision 2)" or
366 "Algorithm 5: Computing the encryption dictionary’s U (user password)
367 value (Security handlers of revision 3 or greater)") shall be used
368 to decrypt the document.
369
370 Args:
371 user_password: The user password as a bytes stream
372 rev: The encryption revision (see PDF standard)
373 key_size: The size of the key in bytes
374 o_entry: The owner entry
375 u_entry: The user entry
376 P: A set of flags specifying which operations shall be permitted
377 when the document is opened with user access. If bit 2 is set to 1,
378 all other bits are ignored and all operations are permitted.
379 If bit 2 is set to 0, permission for operations are based on the
380 values of the remaining flags defined in Table 24.
381 id1_entry:
382 metadata_encrypted: A boolean indicating if the metadata is encrypted.
383
384 Returns:
385 The key
386
387 """
388 key = AlgV4.compute_key(
389 user_password, rev, key_size, o_entry, P, id1_entry, metadata_encrypted
390 )
391 u_value = AlgV4.compute_U_value(key, rev, id1_entry)
392 if rev >= 3:
393 u_value = u_value[:16]
394 u_entry = u_entry[:16]
395 if u_value != u_entry:
396 key = b""
397 return key
398
399 @staticmethod
400 def verify_owner_password(
401 owner_password: bytes,
402 rev: int,
403 key_size: int,
404 o_entry: bytes,
405 u_entry: bytes,
406 P: int,
407 id1_entry: bytes,
408 metadata_encrypted: bool,
409 ) -> bytes:
410 """
411 Algorithm 7: Authenticating the owner password.
412
413 a) Compute an encryption key from the supplied password string, as
414 described in steps (a) to (d) of
415 "Algorithm 3: Computing the encryption dictionary’s O (owner password)
416 value".
417 b) (Security handlers of revision 2 only) Decrypt the value of the
418 encryption dictionary’s O entry, using an RC4
419 encryption function with the encryption key computed in step (a).
420 (Security handlers of revision 3 or greater) Do the following 20 times:
421 Decrypt the value of the encryption dictionary’s O entry (first iteration)
422 or the output from the previous iteration (all subsequent iterations),
423 using an RC4 encryption function with a different encryption key at
424 each iteration. The key shall be generated by taking the original key
425 (obtained in step (a)) and performing an XOR (exclusive or) operation
426 between each byte of the key and the single-byte value of the
427 iteration counter (from 19 to 0).
428 c) The result of step (b) purports to be the user password.
429 Authenticate this user password using
430 "Algorithm 6: Authenticating the user password".
431 If it is correct, the password supplied is the correct owner password.
432
433 Args:
434 owner_password:
435 rev: The encryption revision (see PDF standard)
436 key_size: The size of the key in bytes
437 o_entry: The owner entry
438 u_entry: The user entry
439 P: A set of flags specifying which operations shall be permitted
440 when the document is opened with user access. If bit 2 is set to 1,
441 all other bits are ignored and all operations are permitted.
442 If bit 2 is set to 0, permission for operations are based on the
443 values of the remaining flags defined in Table 24.
444 id1_entry:
445 metadata_encrypted: A boolean indicating if the metadata is encrypted.
446
447 Returns:
448 bytes
449
450 """
451 rc4_key = AlgV4.compute_O_value_key(owner_password, rev, key_size)
452
453 if rev <= 2:
454 user_password = rc4_decrypt(rc4_key, o_entry)
455 else:
456 user_password = o_entry
457 for i in range(19, -1, -1):
458 key = bytes(x ^ i for x in rc4_key)
459 user_password = rc4_decrypt(key, user_password)
460 return AlgV4.verify_user_password(
461 user_password,
462 rev,
463 key_size,
464 o_entry,
465 u_entry,
466 P,
467 id1_entry,
468 metadata_encrypted,
469 )
470
471
472class AlgV5:
473 @staticmethod
474 def verify_owner_password(
475 R: int, password: bytes, o_value: bytes, oe_value: bytes, u_value: bytes
476 ) -> bytes:
477 """
478 Algorithm 3.2a Computing an encryption key.
479
480 To understand the algorithm below, it is necessary to treat the O and U
481 strings in the Encrypt dictionary as made up of three sections.
482 The first 32 bytes are a hash value (explained below). The next 8 bytes
483 are called the Validation Salt. The final 8 bytes are called the Key Salt.
484
485 1. The password string is generated from Unicode input by processing the
486 input string with the SASLprep (IETF RFC 4013) profile of
487 stringprep (IETF RFC 3454), and then converting to a UTF-8
488 representation.
489 2. Truncate the UTF-8 representation to 127 bytes if it is longer than
490 127 bytes.
491 3. Test the password against the owner key by computing the SHA-256 hash
492 of the UTF-8 password concatenated with the 8 bytes of owner
493 Validation Salt, concatenated with the 48-byte U string. If the
494 32-byte result matches the first 32 bytes of the O string, this is
495 the owner password.
496 Compute an intermediate owner key by computing the SHA-256 hash of
497 the UTF-8 password concatenated with the 8 bytes of owner Key Salt,
498 concatenated with the 48-byte U string. The 32-byte result is the
499 key used to decrypt the 32-byte OE string using AES-256 in CBC mode
500 with no padding and an initialization vector of zero.
501 The 32-byte result is the file encryption key.
502 4. Test the password against the user key by computing the SHA-256 hash
503 of the UTF-8 password concatenated with the 8 bytes of user
504 Validation Salt. If the 32 byte result matches the first 32 bytes of
505 the U string, this is the user password.
506 Compute an intermediate user key by computing the SHA-256 hash of the
507 UTF-8 password concatenated with the 8 bytes of user Key Salt.
508 The 32-byte result is the key used to decrypt the 32-byte
509 UE string using AES-256 in CBC mode with no padding and an
510 initialization vector of zero. The 32-byte result is the file
511 encryption key.
512 5. Decrypt the 16-byte Perms string using AES-256 in ECB mode with an
513 initialization vector of zero and the file encryption key as the key.
514 Verify that bytes 9-11 of the result are the characters ‘a’, ‘d’, ‘b’.
515 Bytes 0-3 of the decrypted Perms entry, treated as a little-endian
516 integer, are the user permissions.
517 They should match the value in the P key.
518
519 Args:
520 R: A number specifying which revision of the standard security
521 handler shall be used to interpret this dictionary
522 password: The owner password
523 o_value: A 32-byte string, based on both the owner and user passwords,
524 that shall be used in computing the encryption key and in
525 determining whether a valid owner password was entered
526 oe_value:
527 u_value: A 32-byte string, based on the user password, that shall be
528 used in determining whether to prompt the user for a password and,
529 if so, whether a valid user or owner password was entered.
530
531 Returns:
532 The key
533
534 """
535 password = password[:127]
536 if (
537 AlgV5.calculate_hash(R, password, o_value[32:40], u_value[:48])
538 != o_value[:32]
539 ):
540 return b""
541 iv = bytes(0 for _ in range(16))
542 tmp_key = AlgV5.calculate_hash(R, password, o_value[40:48], u_value[:48])
543 return aes_cbc_decrypt(tmp_key, iv, oe_value)
544
545 @staticmethod
546 def verify_user_password(
547 R: int, password: bytes, u_value: bytes, ue_value: bytes
548 ) -> bytes:
549 """
550 See :func:`verify_owner_password`.
551
552 Args:
553 R: A number specifying which revision of the standard security
554 handler shall be used to interpret this dictionary
555 password: The user password
556 u_value: A 32-byte string, based on the user password, that shall be
557 used in determining whether to prompt the user for a password
558 and, if so, whether a valid user or owner password was entered.
559 ue_value:
560
561 Returns:
562 bytes
563
564 """
565 password = password[:127]
566 if AlgV5.calculate_hash(R, password, u_value[32:40], b"") != u_value[:32]:
567 return b""
568 iv = bytes(0 for _ in range(16))
569 tmp_key = AlgV5.calculate_hash(R, password, u_value[40:48], b"")
570 return aes_cbc_decrypt(tmp_key, iv, ue_value)
571
572 @staticmethod
573 def calculate_hash(R: int, password: bytes, salt: bytes, udata: bytes) -> bytes:
574 # https://github.com/qpdf/qpdf/blob/main/libqpdf/QPDF_encryption.cc
575 k = hashlib.sha256(password + salt + udata).digest()
576 if R < 6:
577 return k
578 count = 0
579 while True:
580 count += 1
581 k1 = password + k + udata
582 e = aes_cbc_encrypt(k[:16], k[16:32], k1 * 64)
583 hash_fn = (
584 hashlib.sha256,
585 hashlib.sha384,
586 hashlib.sha512,
587 )[sum(e[:16]) % 3]
588 k = hash_fn(e).digest()
589 if count >= 64 and e[-1] <= count - 32:
590 break
591 return k[:32]
592
593 @staticmethod
594 def verify_perms(
595 key: bytes, perms: bytes, p: int, metadata_encrypted: bool
596 ) -> bool:
597 """
598 See :func:`verify_owner_password` and :func:`compute_perms_value`.
599
600 Args:
601 key: The owner password
602 perms:
603 p: A set of flags specifying which operations shall be permitted
604 when the document is opened with user access.
605 If bit 2 is set to 1, all other bits are ignored and all
606 operations are permitted.
607 If bit 2 is set to 0, permission for operations are based on
608 the values of the remaining flags defined in Table 24.
609 metadata_encrypted:
610
611 Returns:
612 A boolean
613
614 """
615 b8 = b"T" if metadata_encrypted else b"F"
616 p1 = struct.pack("<I", p) + b"\xff\xff\xff\xff" + b8 + b"adb"
617 p2 = aes_ecb_decrypt(key, perms)
618 return p1 == p2[:12]
619
620 @staticmethod
621 def generate_values(
622 R: int,
623 user_password: bytes,
624 owner_password: bytes,
625 key: bytes,
626 p: int,
627 metadata_encrypted: bool,
628 ) -> Dict[Any, Any]:
629 user_password = user_password[:127]
630 owner_password = owner_password[:127]
631 u_value, ue_value = AlgV5.compute_U_value(R, user_password, key)
632 o_value, oe_value = AlgV5.compute_O_value(R, owner_password, key, u_value)
633 perms = AlgV5.compute_Perms_value(key, p, metadata_encrypted)
634 return {
635 "/U": u_value,
636 "/UE": ue_value,
637 "/O": o_value,
638 "/OE": oe_value,
639 "/Perms": perms,
640 }
641
642 @staticmethod
643 def compute_U_value(R: int, password: bytes, key: bytes) -> Tuple[bytes, bytes]:
644 """
645 Algorithm 3.8 Computing the encryption dictionary’s U (user password)
646 and UE (user encryption key) values.
647
648 1. Generate 16 random bytes of data using a strong random number generator.
649 The first 8 bytes are the User Validation Salt. The second 8 bytes
650 are the User Key Salt. Compute the 32-byte SHA-256 hash of the
651 password concatenated with the User Validation Salt. The 48-byte
652 string consisting of the 32-byte hash followed by the User
653 Validation Salt followed by the User Key Salt is stored as the U key.
654 2. Compute the 32-byte SHA-256 hash of the password concatenated with
655 the User Key Salt. Using this hash as the key, encrypt the file
656 encryption key using AES-256 in CBC mode with no padding and an
657 initialization vector of zero. The resulting 32-byte string is stored
658 as the UE key.
659
660 Args:
661 R:
662 password:
663 key:
664
665 Returns:
666 A tuple (u-value, ue value)
667
668 """
669 random_bytes = secrets.token_bytes(16)
670 val_salt = random_bytes[:8]
671 key_salt = random_bytes[8:]
672 u_value = AlgV5.calculate_hash(R, password, val_salt, b"") + val_salt + key_salt
673
674 tmp_key = AlgV5.calculate_hash(R, password, key_salt, b"")
675 iv = bytes(0 for _ in range(16))
676 ue_value = aes_cbc_encrypt(tmp_key, iv, key)
677 return u_value, ue_value
678
679 @staticmethod
680 def compute_O_value(
681 R: int, password: bytes, key: bytes, u_value: bytes
682 ) -> Tuple[bytes, bytes]:
683 """
684 Algorithm 3.9 Computing the encryption dictionary’s O (owner password)
685 and OE (owner encryption key) values.
686
687 1. Generate 16 random bytes of data using a strong random number
688 generator. The first 8 bytes are the Owner Validation Salt. The
689 second 8 bytes are the Owner Key Salt. Compute the 32-byte SHA-256
690 hash of the password concatenated with the Owner Validation Salt and
691 then concatenated with the 48-byte U string as generated in
692 Algorithm 3.8. The 48-byte string consisting of the 32-byte hash
693 followed by the Owner Validation Salt followed by the Owner Key Salt
694 is stored as the O key.
695 2. Compute the 32-byte SHA-256 hash of the password concatenated with
696 the Owner Key Salt and then concatenated with the 48-byte U string as
697 generated in Algorithm 3.8. Using this hash as the key,
698 encrypt the file encryption key using AES-256 in CBC mode with
699 no padding and an initialization vector of zero.
700 The resulting 32-byte string is stored as the OE key.
701
702 Args:
703 R:
704 password:
705 key:
706 u_value: A 32-byte string, based on the user password, that shall be
707 used in determining whether to prompt the user for a password
708 and, if so, whether a valid user or owner password was entered.
709
710 Returns:
711 A tuple (O value, OE value)
712
713 """
714 random_bytes = secrets.token_bytes(16)
715 val_salt = random_bytes[:8]
716 key_salt = random_bytes[8:]
717 o_value = (
718 AlgV5.calculate_hash(R, password, val_salt, u_value) + val_salt + key_salt
719 )
720 tmp_key = AlgV5.calculate_hash(R, password, key_salt, u_value[:48])
721 iv = bytes(0 for _ in range(16))
722 oe_value = aes_cbc_encrypt(tmp_key, iv, key)
723 return o_value, oe_value
724
725 @staticmethod
726 def compute_Perms_value(key: bytes, p: int, metadata_encrypted: bool) -> bytes:
727 """
728 Algorithm 3.10 Computing the encryption dictionary’s Perms
729 (permissions) value.
730
731 1. Extend the permissions (contents of the P integer) to 64 bits by
732 setting the upper 32 bits to all 1’s.
733 (This allows for future extension without changing the format.)
734 2. Record the 8 bytes of permission in the bytes 0-7 of the block,
735 low order byte first.
736 3. Set byte 8 to the ASCII value ' T ' or ' F ' according to the
737 EncryptMetadata Boolean.
738 4. Set bytes 9-11 to the ASCII characters ' a ', ' d ', ' b '.
739 5. Set bytes 12-15 to 4 bytes of random data, which will be ignored.
740 6. Encrypt the 16-byte block using AES-256 in ECB mode with an
741 initialization vector of zero, using the file encryption key as the
742 key. The result (16 bytes) is stored as the Perms string, and checked
743 for validity when the file is opened.
744
745 Args:
746 key:
747 p: A set of flags specifying which operations shall be permitted
748 when the document is opened with user access. If bit 2 is set to 1,
749 all other bits are ignored and all operations are permitted.
750 If bit 2 is set to 0, permission for operations are based on the
751 values of the remaining flags defined in Table 24.
752 metadata_encrypted: A boolean indicating if the metadata is encrypted.
753
754 Returns:
755 The perms value
756
757 """
758 b8 = b"T" if metadata_encrypted else b"F"
759 rr = secrets.token_bytes(4)
760 data = struct.pack("<I", p) + b"\xff\xff\xff\xff" + b8 + b"adb" + rr
761 return aes_ecb_encrypt(key, data)
762
763
764class PasswordType(IntEnum):
765 NOT_DECRYPTED = 0
766 USER_PASSWORD = 1
767 OWNER_PASSWORD = 2
768
769
770class EncryptAlgorithm(tuple, Enum): # type: ignore # noqa: SLOT001
771 # V, R, Length
772 RC4_40 = (1, 2, 40)
773 RC4_128 = (2, 3, 128)
774 AES_128 = (4, 4, 128)
775 AES_256_R5 = (5, 5, 256)
776 AES_256 = (5, 6, 256)
777
778
779class EncryptionValues:
780 O: bytes # noqa: E741
781 U: bytes
782 OE: bytes
783 UE: bytes
784 Perms: bytes
785
786
787class Encryption:
788 """
789 Collects and manages parameters for PDF document encryption and decryption.
790
791 Args:
792 V: A code specifying the algorithm to be used in encrypting and
793 decrypting the document.
794 R: The revision of the standard security handler.
795 Length: The length of the encryption key in bits.
796 P: A set of flags specifying which operations shall be permitted
797 when the document is opened with user access
798 entry: The encryption dictionary object.
799 EncryptMetadata: Whether to encrypt metadata in the document.
800 first_id_entry: The first 16 bytes of the file's original ID.
801 StmF: The name of the crypt filter that shall be used by default
802 when decrypting streams.
803 StrF: The name of the crypt filter that shall be used when decrypting
804 all strings in the document.
805 EFF: The name of the crypt filter that shall be used when
806 encrypting embedded file streams that do not have their own
807 crypt filter specifier.
808 values: Additional encryption parameters.
809
810 """
811
812 def __init__(
813 self,
814 V: int,
815 R: int,
816 Length: int,
817 P: int,
818 entry: DictionaryObject,
819 EncryptMetadata: bool,
820 first_id_entry: bytes,
821 StmF: str,
822 StrF: str,
823 EFF: str,
824 values: Optional[EncryptionValues],
825 ) -> None:
826 # §7.6.2, entries common to all encryption dictionaries
827 # use same name as keys of encryption dictionaries entries
828 self.V = V
829 self.R = R
830 self.Length = Length # key_size
831 self.P = (P + 0x100000000) % 0x100000000 # maybe P < 0
832 self.EncryptMetadata = EncryptMetadata
833 self.id1_entry = first_id_entry
834 self.StmF = StmF
835 self.StrF = StrF
836 self.EFF = EFF
837 self.values: EncryptionValues = values if values else EncryptionValues()
838
839 self._password_type = PasswordType.NOT_DECRYPTED
840 self._key: Optional[bytes] = None
841
842 def is_decrypted(self) -> bool:
843 return self._password_type != PasswordType.NOT_DECRYPTED
844
845 def encrypt_object(self, obj: PdfObject, idnum: int, generation: int) -> PdfObject:
846 # skip calculate key
847 if not self._is_encryption_object(obj):
848 return obj
849
850 cf = self._make_crypt_filter(idnum, generation)
851 return cf.encrypt_object(obj)
852
853 def decrypt_object(self, obj: PdfObject, idnum: int, generation: int) -> PdfObject:
854 # skip calculate key
855 if not self._is_encryption_object(obj):
856 return obj
857
858 cf = self._make_crypt_filter(idnum, generation)
859 return cf.decrypt_object(obj)
860
861 @staticmethod
862 def _is_encryption_object(obj: PdfObject) -> bool:
863 return isinstance(
864 obj,
865 (
866 ByteStringObject,
867 TextStringObject,
868 StreamObject,
869 ArrayObject,
870 DictionaryObject,
871 ),
872 )
873
874 def _make_crypt_filter(self, idnum: int, generation: int) -> CryptFilter:
875 """
876 Algorithm 1: Encryption of data using the RC4 or AES algorithms.
877
878 a) Obtain the object number and generation number from the object
879 identifier of the string or stream to be encrypted
880 (see 7.3.10, "Indirect Objects"). If the string is a direct object,
881 use the identifier of the indirect object containing it.
882 b) For all strings and streams without crypt filter specifier; treating
883 the object number and generation number as binary integers, extend
884 the original n-byte encryption key to n + 5 bytes by appending the
885 low-order 3 bytes of the object number and the low-order 2 bytes of
886 the generation number in that order, low-order byte first.
887 (n is 5 unless the value of V in the encryption dictionary is greater
888 than 1, in which case n is the value of Length divided by 8.)
889 If using the AES algorithm, extend the encryption key an additional
890 4 bytes by adding the value “sAlT”, which corresponds to the
891 hexadecimal values 0x73, 0x41, 0x6C, 0x54. (This addition is done for
892 backward compatibility and is not intended to provide additional
893 security.)
894 c) Initialize the MD5 hash function and pass the result of step (b) as
895 input to this function.
896 d) Use the first (n + 5) bytes, up to a maximum of 16, of the output
897 from the MD5 hash as the key for the RC4 or AES symmetric key
898 algorithms, along with the string or stream data to be encrypted.
899 If using the AES algorithm, the Cipher Block Chaining (CBC) mode,
900 which requires an initialization vector, is used. The block size
901 parameter is set to 16 bytes, and the initialization vector is a
902 16-byte random number that is stored as the first 16 bytes of the
903 encrypted stream or string.
904
905 Algorithm 3.1a Encryption of data using the AES algorithm
906 1. Use the 32-byte file encryption key for the AES-256 symmetric key
907 algorithm, along with the string or stream data to be encrypted.
908 Use the AES algorithm in Cipher Block Chaining (CBC) mode, which
909 requires an initialization vector. The block size parameter is set to
910 16 bytes, and the initialization vector is a 16-byte random number
911 that is stored as the first 16 bytes of the encrypted stream or string.
912 The output is the encrypted data to be stored in the PDF file.
913 """
914 pack1 = struct.pack("<i", idnum)[:3]
915 pack2 = struct.pack("<i", generation)[:2]
916
917 assert self._key
918 key = self._key
919 n = 5 if self.V == 1 else self.Length // 8
920 key_data = key[:n] + pack1 + pack2
921 key_hash = hashlib.md5(key_data)
922 rc4_key = key_hash.digest()[: min(n + 5, 16)]
923
924 # for AES-128
925 key_hash.update(b"sAlT")
926 aes128_key = key_hash.digest()[: min(n + 5, 16)]
927
928 # for AES-256
929 aes256_key = key
930
931 stm_crypt = self._get_crypt(self.StmF, rc4_key, aes128_key, aes256_key)
932 str_crypt = self._get_crypt(self.StrF, rc4_key, aes128_key, aes256_key)
933 ef_crypt = self._get_crypt(self.EFF, rc4_key, aes128_key, aes256_key)
934
935 return CryptFilter(stm_crypt, str_crypt, ef_crypt)
936
937 @staticmethod
938 def _get_crypt(
939 method: str, rc4_key: bytes, aes128_key: bytes, aes256_key: bytes
940 ) -> CryptBase:
941 if method == "/AESV2":
942 return CryptAES(aes128_key)
943 if method == "/AESV3":
944 return CryptAES(aes256_key)
945 if method == "/Identity":
946 return CryptIdentity()
947
948 return CryptRC4(rc4_key)
949
950 @staticmethod
951 def _encode_password(password: Union[bytes, str]) -> bytes:
952 if isinstance(password, str):
953 try:
954 pwd = password.encode("latin-1")
955 except Exception:
956 pwd = password.encode("utf-8")
957 else:
958 pwd = password
959 return pwd
960
961 def verify(self, password: Union[bytes, str]) -> PasswordType:
962 pwd = self._encode_password(password)
963 key, rc = self.verify_v4(pwd) if self.V <= 4 else self.verify_v5(pwd)
964 if rc != PasswordType.NOT_DECRYPTED:
965 self._password_type = rc
966 self._key = key
967 return rc
968
969 def verify_v4(self, password: bytes) -> Tuple[bytes, PasswordType]:
970 # verify owner password first
971 key = AlgV4.verify_owner_password(
972 password,
973 self.R,
974 self.Length,
975 self.values.O,
976 self.values.U,
977 self.P,
978 self.id1_entry,
979 self.EncryptMetadata,
980 )
981 if key:
982 return key, PasswordType.OWNER_PASSWORD
983 key = AlgV4.verify_user_password(
984 password,
985 self.R,
986 self.Length,
987 self.values.O,
988 self.values.U,
989 self.P,
990 self.id1_entry,
991 self.EncryptMetadata,
992 )
993 if key:
994 return key, PasswordType.USER_PASSWORD
995 return b"", PasswordType.NOT_DECRYPTED
996
997 def verify_v5(self, password: bytes) -> Tuple[bytes, PasswordType]:
998 # TODO: use SASLprep process
999 # verify owner password first
1000 key = AlgV5.verify_owner_password(
1001 self.R, password, self.values.O, self.values.OE, self.values.U
1002 )
1003 rc = PasswordType.OWNER_PASSWORD
1004 if not key:
1005 key = AlgV5.verify_user_password(
1006 self.R, password, self.values.U, self.values.UE
1007 )
1008 rc = PasswordType.USER_PASSWORD
1009 if not key:
1010 return b"", PasswordType.NOT_DECRYPTED
1011
1012 # verify Perms
1013 if not AlgV5.verify_perms(key, self.values.Perms, self.P, self.EncryptMetadata):
1014 logger_warning("ignore '/Perms' verify failed", __name__)
1015 return key, rc
1016
1017 def write_entry(
1018 self, user_password: str, owner_password: Optional[str]
1019 ) -> DictionaryObject:
1020 user_pwd = self._encode_password(user_password)
1021 owner_pwd = self._encode_password(owner_password) if owner_password else None
1022 if owner_pwd is None:
1023 owner_pwd = user_pwd
1024
1025 if self.V <= 4:
1026 self.compute_values_v4(user_pwd, owner_pwd)
1027 else:
1028 self._key = secrets.token_bytes(self.Length // 8)
1029 values = AlgV5.generate_values(
1030 self.R, user_pwd, owner_pwd, self._key, self.P, self.EncryptMetadata
1031 )
1032 self.values.O = values["/O"]
1033 self.values.U = values["/U"]
1034 self.values.OE = values["/OE"]
1035 self.values.UE = values["/UE"]
1036 self.values.Perms = values["/Perms"]
1037
1038 dict_obj = DictionaryObject()
1039 dict_obj[NameObject("/V")] = NumberObject(self.V)
1040 dict_obj[NameObject("/R")] = NumberObject(self.R)
1041 dict_obj[NameObject("/Length")] = NumberObject(self.Length)
1042 dict_obj[NameObject("/P")] = NumberObject(self.P)
1043 dict_obj[NameObject("/Filter")] = NameObject("/Standard")
1044 # ignore /EncryptMetadata
1045
1046 dict_obj[NameObject("/O")] = ByteStringObject(self.values.O)
1047 dict_obj[NameObject("/U")] = ByteStringObject(self.values.U)
1048
1049 if self.V >= 4:
1050 # TODO: allow different method
1051 std_cf = DictionaryObject()
1052 std_cf[NameObject("/AuthEvent")] = NameObject("/DocOpen")
1053 std_cf[NameObject("/CFM")] = NameObject(self.StmF)
1054 std_cf[NameObject("/Length")] = NumberObject(self.Length // 8)
1055 cf = DictionaryObject()
1056 cf[NameObject("/StdCF")] = std_cf
1057 dict_obj[NameObject("/CF")] = cf
1058 dict_obj[NameObject("/StmF")] = NameObject("/StdCF")
1059 dict_obj[NameObject("/StrF")] = NameObject("/StdCF")
1060 # ignore EFF
1061 # dict_obj[NameObject("/EFF")] = NameObject("/StdCF")
1062
1063 if self.V >= 5:
1064 dict_obj[NameObject("/OE")] = ByteStringObject(self.values.OE)
1065 dict_obj[NameObject("/UE")] = ByteStringObject(self.values.UE)
1066 dict_obj[NameObject("/Perms")] = ByteStringObject(self.values.Perms)
1067 return dict_obj
1068
1069 def compute_values_v4(self, user_password: bytes, owner_password: bytes) -> None:
1070 rc4_key = AlgV4.compute_O_value_key(owner_password, self.R, self.Length)
1071 o_value = AlgV4.compute_O_value(rc4_key, user_password, self.R)
1072
1073 key = AlgV4.compute_key(
1074 user_password,
1075 self.R,
1076 self.Length,
1077 o_value,
1078 self.P,
1079 self.id1_entry,
1080 self.EncryptMetadata,
1081 )
1082 u_value = AlgV4.compute_U_value(key, self.R, self.id1_entry)
1083
1084 self._key = key
1085 self.values.O = o_value
1086 self.values.U = u_value
1087
1088 @staticmethod
1089 def read(encryption_entry: DictionaryObject, first_id_entry: bytes) -> "Encryption":
1090 if encryption_entry.get("/Filter") != "/Standard":
1091 raise NotImplementedError(
1092 "only Standard PDF encryption handler is available"
1093 )
1094 if "/SubFilter" in encryption_entry:
1095 raise NotImplementedError("/SubFilter NOT supported")
1096
1097 stm_filter = "/V2"
1098 str_filter = "/V2"
1099 ef_filter = "/V2"
1100
1101 alg_ver = encryption_entry.get("/V", 0)
1102 if alg_ver not in (1, 2, 3, 4, 5):
1103 raise NotImplementedError(f"Encryption V={alg_ver} NOT supported")
1104 if alg_ver >= 4:
1105 filters = encryption_entry["/CF"]
1106
1107 stm_filter = encryption_entry.get("/StmF", "/Identity")
1108 str_filter = encryption_entry.get("/StrF", "/Identity")
1109 ef_filter = encryption_entry.get("/EFF", stm_filter)
1110
1111 if stm_filter != "/Identity":
1112 stm_filter = filters[stm_filter]["/CFM"] # type: ignore
1113 if str_filter != "/Identity":
1114 str_filter = filters[str_filter]["/CFM"] # type: ignore
1115 if ef_filter != "/Identity":
1116 ef_filter = filters[ef_filter]["/CFM"] # type: ignore
1117
1118 allowed_methods = ("/Identity", "/V2", "/AESV2", "/AESV3")
1119 if stm_filter not in allowed_methods:
1120 raise NotImplementedError(f"StmF Method {stm_filter} NOT supported!")
1121 if str_filter not in allowed_methods:
1122 raise NotImplementedError(f"StrF Method {str_filter} NOT supported!")
1123 if ef_filter not in allowed_methods:
1124 raise NotImplementedError(f"EFF Method {ef_filter} NOT supported!")
1125
1126 alg_rev = cast(int, encryption_entry["/R"])
1127 perm_flags = cast(int, encryption_entry["/P"])
1128 key_bits = encryption_entry.get("/Length", 40)
1129 encrypt_metadata = encryption_entry.get("/EncryptMetadata")
1130 encrypt_metadata = (
1131 encrypt_metadata.value if encrypt_metadata is not None else True
1132 )
1133 values = EncryptionValues()
1134 values.O = cast(ByteStringObject, encryption_entry["/O"]).original_bytes
1135 values.U = cast(ByteStringObject, encryption_entry["/U"]).original_bytes
1136 values.OE = encryption_entry.get("/OE", ByteStringObject()).original_bytes
1137 values.UE = encryption_entry.get("/UE", ByteStringObject()).original_bytes
1138 values.Perms = encryption_entry.get("/Perms", ByteStringObject()).original_bytes
1139 return Encryption(
1140 V=alg_ver,
1141 R=alg_rev,
1142 Length=key_bits,
1143 P=perm_flags,
1144 EncryptMetadata=encrypt_metadata,
1145 first_id_entry=first_id_entry,
1146 values=values,
1147 StrF=str_filter,
1148 StmF=stm_filter,
1149 EFF=ef_filter,
1150 entry=encryption_entry, # Dummy entry for the moment; will get removed
1151 )
1152
1153 @staticmethod
1154 def make(
1155 alg: EncryptAlgorithm, permissions: int, first_id_entry: bytes
1156 ) -> "Encryption":
1157 alg_ver, alg_rev, key_bits = alg
1158
1159 stm_filter, str_filter, ef_filter = "/V2", "/V2", "/V2"
1160
1161 if alg == EncryptAlgorithm.AES_128:
1162 stm_filter, str_filter, ef_filter = "/AESV2", "/AESV2", "/AESV2"
1163 elif alg in (EncryptAlgorithm.AES_256_R5, EncryptAlgorithm.AES_256):
1164 stm_filter, str_filter, ef_filter = "/AESV3", "/AESV3", "/AESV3"
1165
1166 return Encryption(
1167 V=alg_ver,
1168 R=alg_rev,
1169 Length=key_bits,
1170 P=permissions,
1171 EncryptMetadata=True,
1172 first_id_entry=first_id_entry,
1173 values=None,
1174 StrF=str_filter,
1175 StmF=stm_filter,
1176 EFF=ef_filter,
1177 entry=DictionaryObject(), # Dummy entry for the moment; will get removed
1178 )