Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/_encryption.py: 70%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright (c) 2022, exiledkingcc
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8# * Redistributions of source code must retain the above copyright notice,
9# this list of conditions and the following disclaimer.
10# * Redistributions in binary form must reproduce the above copyright notice,
11# this list of conditions and the following disclaimer in the documentation
12# and/or other materials provided with the distribution.
13# * The name of the author may not be used to endorse or promote products
14# derived from this software without specific prior written permission.
15#
16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26# POSSIBILITY OF SUCH DAMAGE.
27import hashlib
28import secrets
29import struct
30from enum import Enum, IntEnum
31from typing import Any, Optional, Union, cast
33from pypdf._crypt_providers import (
34 CryptAES,
35 CryptBase,
36 CryptIdentity,
37 CryptRC4,
38 aes_cbc_decrypt,
39 aes_cbc_encrypt,
40 aes_ecb_decrypt,
41 aes_ecb_encrypt,
42 rc4_decrypt,
43 rc4_encrypt,
44)
46from ._utils import logger_warning
47from .generic import (
48 ArrayObject,
49 ByteStringObject,
50 DictionaryObject,
51 NameObject,
52 NumberObject,
53 PdfObject,
54 StreamObject,
55 TextStringObject,
56 create_string_object,
57)
60class CryptFilter:
61 def __init__(
62 self,
63 stm_crypt: CryptBase,
64 str_crypt: CryptBase,
65 ef_crypt: CryptBase,
66 ) -> None:
67 self.stm_crypt = stm_crypt
68 self.str_crypt = str_crypt
69 self.ef_crypt = ef_crypt
71 def encrypt_object(self, obj: PdfObject) -> PdfObject:
72 if isinstance(obj, ByteStringObject):
73 data = self.str_crypt.encrypt(obj.original_bytes)
74 obj = ByteStringObject(data)
75 elif isinstance(obj, TextStringObject):
76 data = self.str_crypt.encrypt(obj.get_encoded_bytes())
77 obj = ByteStringObject(data)
78 elif isinstance(obj, StreamObject):
79 obj2 = StreamObject()
80 obj2.update(obj)
81 obj2.set_data(self.stm_crypt.encrypt(obj._data))
82 for key, value in obj.items(): # Dont forget the Stream dict.
83 obj2[key] = self.encrypt_object(value)
84 obj = obj2
85 elif isinstance(obj, DictionaryObject):
86 obj2 = DictionaryObject() # type: ignore
87 for key, value in obj.items():
88 obj2[key] = self.encrypt_object(value)
89 obj = obj2
90 elif isinstance(obj, ArrayObject):
91 obj = ArrayObject(self.encrypt_object(x) for x in obj)
92 return obj
94 def decrypt_object(self, obj: PdfObject, *, strict: bool = True) -> PdfObject:
95 if isinstance(obj, (ByteStringObject, TextStringObject)):
96 data = self.str_crypt.decrypt(obj.original_bytes, strict=strict)
97 obj = create_string_object(data)
98 elif isinstance(obj, StreamObject):
99 obj._data = self.stm_crypt.decrypt(obj._data, strict=strict)
100 for key, value in obj.items(): # Dont forget the Stream dict.
101 obj[key] = self.decrypt_object(value, strict=strict)
102 elif isinstance(obj, DictionaryObject):
103 for key, value in obj.items():
104 obj[key] = self.decrypt_object(value, strict=strict)
105 elif isinstance(obj, ArrayObject):
106 for i in range(len(obj)):
107 obj[i] = self.decrypt_object(obj[i], strict=strict)
108 return obj
111_PADDING = (
112 b"\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56\xff\xfa\x01\x08"
113 b"\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c\xa9\xfe\x64\x53\x69\x7a"
114)
117def _padding(data: bytes) -> bytes:
118 return (data + _PADDING)[:32]
121class AlgV4:
122 @staticmethod
123 def compute_key(
124 password: bytes,
125 rev: int,
126 key_size: int,
127 o_entry: bytes,
128 P: int,
129 id1_entry: bytes,
130 metadata_encrypted: bool,
131 ) -> bytes:
132 """
133 Algorithm 2: Computing an encryption key.
135 a) Pad or truncate the password string to exactly 32 bytes. If the
136 password string is more than 32 bytes long,
137 use only its first 32 bytes; if it is less than 32 bytes long, pad it
138 by appending the required number of
139 additional bytes from the beginning of the following padding string:
140 < 28 BF 4E 5E 4E 75 8A 41 64 00 4E 56 FF FA 01 08
141 2E 2E 00 B6 D0 68 3E 80 2F 0C A9 FE 64 53 69 7A >
142 That is, if the password string is n bytes long, append
143 the first 32 - n bytes of the padding string to the end
144 of the password string. If the password string is empty
145 (zero-length), meaning there is no user password,
146 substitute the entire padding string in its place.
148 b) Initialize the MD5 hash function and pass the result of step (a)
149 as input to this function.
150 c) Pass the value of the encryption dictionary’s O entry to the
151 MD5 hash function. ("Algorithm 3: Computing
152 the encryption dictionary’s O (owner password) value" shows how the
153 O value is computed.)
154 d) Convert the integer value of the P entry to a 32-bit unsigned binary
155 number and pass these bytes to the
156 MD5 hash function, low-order byte first.
157 e) Pass the first element of the file’s file identifier array (the value
158 of the ID entry in the document’s trailer
159 dictionary; see Table 15) to the MD5 hash function.
160 f) (Security handlers of revision 4 or greater) If document metadata is
161 not being encrypted, pass 4 bytes with
162 the value 0xFFFFFFFF to the MD5 hash function.
163 g) Finish the hash.
164 h) (Security handlers of revision 3 or greater) Do the following
165 50 times: Take the output from the previous
166 MD5 hash and pass the first n bytes of the output as input into a new
167 MD5 hash, where n is the number of
168 bytes of the encryption key as defined by the value of the encryption
169 dictionary’s Length entry.
170 i) Set the encryption key to the first n bytes of the output from the
171 final MD5 hash, where n shall always be 5
172 for security handlers of revision 2 but, for security handlers of
173 revision 3 or greater, shall depend on the
174 value of the encryption dictionary’s Length entry.
176 Args:
177 password: The encryption secret as a bytes-string
178 rev: The encryption revision (see PDF standard)
179 key_size: The size of the key in bytes
180 o_entry: The owner entry
181 P: A set of flags specifying which operations shall be permitted
182 when the document is opened with user access. If bit 2 is set to 1,
183 all other bits are ignored and all operations are permitted.
184 If bit 2 is set to 0, permission for operations are based on the
185 values of the remaining flags defined in Table 24.
186 id1_entry:
187 metadata_encrypted: A boolean indicating if the metadata is encrypted.
189 Returns:
190 The u_hash digest of length key_size
192 """
193 a = _padding(password)
194 u_hash = hashlib.md5(a)
195 u_hash.update(o_entry)
196 u_hash.update(struct.pack("<I", P))
197 u_hash.update(id1_entry)
198 if rev >= 4 and not metadata_encrypted:
199 u_hash.update(b"\xff\xff\xff\xff")
200 u_hash_digest = u_hash.digest()
201 length = key_size // 8
202 if rev >= 3:
203 for _ in range(50):
204 u_hash_digest = hashlib.md5(u_hash_digest[:length]).digest()
205 return u_hash_digest[:length]
207 @staticmethod
208 def compute_O_value_key(owner_password: bytes, rev: int, key_size: int) -> bytes:
209 """
210 Algorithm 3: Computing the encryption dictionary’s O (owner password) value.
212 a) Pad or truncate the owner password string as described in step (a)
213 of "Algorithm 2: Computing an encryption key".
214 If there is no owner password, use the user password instead.
215 b) Initialize the MD5 hash function and pass the result of step (a) as
216 input to this function.
217 c) (Security handlers of revision 3 or greater) Do the following 50 times:
218 Take the output from the previous
219 MD5 hash and pass it as input into a new MD5 hash.
220 d) Create an RC4 encryption key using the first n bytes of the output
221 from the final MD5 hash, where n shall
222 always be 5 for security handlers of revision 2 but, for security
223 handlers of revision 3 or greater, shall
224 depend on the value of the encryption dictionary’s Length entry.
225 e) Pad or truncate the user password string as described in step (a) of
226 "Algorithm 2: Computing an encryption key".
227 f) Encrypt the result of step (e), using an RC4 encryption function with
228 the encryption key obtained in step (d).
229 g) (Security handlers of revision 3 or greater) Do the following 19 times:
230 Take the output from the previous
231 invocation of the RC4 function and pass it as input to a new
232 invocation of the function; use an encryption
233 key generated by taking each byte of the encryption key obtained in
234 step (d) and performing an XOR
235 (exclusive or) operation between that byte and the single-byte value
236 of the iteration counter (from 1 to 19).
237 h) Store the output from the final invocation of the RC4 function as
238 the value of the O entry in the encryption dictionary.
240 Args:
241 owner_password:
242 rev: The encryption revision (see PDF standard)
243 key_size: The size of the key in bytes
245 Returns:
246 The RC4 key
248 """
249 a = _padding(owner_password)
250 o_hash_digest = hashlib.md5(a).digest()
252 if rev >= 3:
253 for _ in range(50):
254 o_hash_digest = hashlib.md5(o_hash_digest).digest()
256 return o_hash_digest[: key_size // 8]
258 @staticmethod
259 def compute_O_value(rc4_key: bytes, user_password: bytes, rev: int) -> bytes:
260 """
261 See :func:`compute_O_value_key`.
263 Args:
264 rc4_key:
265 user_password:
266 rev: The encryption revision (see PDF standard)
268 Returns:
269 The RC4 encrypted
271 """
272 a = _padding(user_password)
273 rc4_enc = rc4_encrypt(rc4_key, a)
274 if rev >= 3:
275 for i in range(1, 20):
276 key = bytes(x ^ i for x in rc4_key)
277 rc4_enc = rc4_encrypt(key, rc4_enc)
278 return rc4_enc
280 @staticmethod
281 def compute_U_value(key: bytes, rev: int, id1_entry: bytes) -> bytes:
282 """
283 Algorithm 4: Computing the encryption dictionary’s U (user password) value.
285 (Security handlers of revision 2)
287 a) Create an encryption key based on the user password string, as
288 described in "Algorithm 2: Computing an encryption key".
289 b) Encrypt the 32-byte padding string shown in step (a) of
290 "Algorithm 2: Computing an encryption key", using an RC4 encryption
291 function with the encryption key from the preceding step.
292 c) Store the result of step (b) as the value of the U entry in the
293 encryption dictionary.
295 Args:
296 key:
297 rev: The encryption revision (see PDF standard)
298 id1_entry:
300 Returns:
301 The value
303 """
304 if rev <= 2:
305 return rc4_encrypt(key, _PADDING)
307 """
308 Algorithm 5: Computing the encryption dictionary’s U (user password) value.
310 (Security handlers of revision 3 or greater)
312 a) Create an encryption key based on the user password string, as
313 described in "Algorithm 2: Computing an encryption key".
314 b) Initialize the MD5 hash function and pass the 32-byte padding string
315 shown in step (a) of "Algorithm 2:
316 Computing an encryption key" as input to this function.
317 c) Pass the first element of the file’s file identifier array (the value
318 of the ID entry in the document’s trailer
319 dictionary; see Table 15) to the hash function and finish the hash.
320 d) Encrypt the 16-byte result of the hash, using an RC4 encryption
321 function with the encryption key from step (a).
322 e) Do the following 19 times: Take the output from the previous
323 invocation of the RC4 function and pass it as input to a new
324 invocation of the function; use an encryption key generated by
325 taking each byte of the original encryption key obtained in
326 step (a) and performing an XOR (exclusive or) operation between that
327 byte and the single-byte value of the iteration counter (from 1 to 19).
328 f) Append 16 bytes of arbitrary padding to the output from the final
329 invocation of the RC4 function and store the 32-byte result as the
330 value of the U entry in the encryption dictionary.
331 """
332 u_hash = hashlib.md5(_PADDING)
333 u_hash.update(id1_entry)
334 rc4_enc = rc4_encrypt(key, u_hash.digest())
335 for i in range(1, 20):
336 rc4_key = bytes(x ^ i for x in key)
337 rc4_enc = rc4_encrypt(rc4_key, rc4_enc)
338 return _padding(rc4_enc)
340 @staticmethod
341 def verify_user_password(
342 user_password: bytes,
343 rev: int,
344 key_size: int,
345 o_entry: bytes,
346 u_entry: bytes,
347 P: int,
348 id1_entry: bytes,
349 metadata_encrypted: bool,
350 ) -> bytes:
351 """
352 Algorithm 6: Authenticating the user password.
354 a) Perform all but the last step of "Algorithm 4: Computing the
355 encryption dictionary’s U (user password) value (Security handlers of
356 revision 2)" or "Algorithm 5: Computing the encryption dictionary’s U
357 (user password) value (Security handlers of revision 3 or greater)"
358 using the supplied password string.
359 b) If the result of step (a) is equal to the value of the encryption
360 dictionary’s U entry (comparing on the first 16 bytes in the case of
361 security handlers of revision 3 or greater), the password supplied is
362 the correct user password. The key obtained in step (a) (that is, in
363 the first step of "Algorithm 4: Computing the encryption
364 dictionary’s U (user password) value
365 (Security handlers of revision 2)" or
366 "Algorithm 5: Computing the encryption dictionary’s U (user password)
367 value (Security handlers of revision 3 or greater)") shall be used
368 to decrypt the document.
370 Args:
371 user_password: The user password as a bytes stream
372 rev: The encryption revision (see PDF standard)
373 key_size: The size of the key in bytes
374 o_entry: The owner entry
375 u_entry: The user entry
376 P: A set of flags specifying which operations shall be permitted
377 when the document is opened with user access. If bit 2 is set to 1,
378 all other bits are ignored and all operations are permitted.
379 If bit 2 is set to 0, permission for operations are based on the
380 values of the remaining flags defined in Table 24.
381 id1_entry:
382 metadata_encrypted: A boolean indicating if the metadata is encrypted.
384 Returns:
385 The key
387 """
388 key = AlgV4.compute_key(
389 user_password, rev, key_size, o_entry, P, id1_entry, metadata_encrypted
390 )
391 u_value = AlgV4.compute_U_value(key, rev, id1_entry)
392 if rev >= 3:
393 u_value = u_value[:16]
394 u_entry = u_entry[:16]
395 if u_value != u_entry:
396 key = b""
397 return key
399 @staticmethod
400 def verify_owner_password(
401 owner_password: bytes,
402 rev: int,
403 key_size: int,
404 o_entry: bytes,
405 u_entry: bytes,
406 P: int,
407 id1_entry: bytes,
408 metadata_encrypted: bool,
409 ) -> bytes:
410 """
411 Algorithm 7: Authenticating the owner password.
413 a) Compute an encryption key from the supplied password string, as
414 described in steps (a) to (d) of
415 "Algorithm 3: Computing the encryption dictionary’s O (owner password)
416 value".
417 b) (Security handlers of revision 2 only) Decrypt the value of the
418 encryption dictionary’s O entry, using an RC4
419 encryption function with the encryption key computed in step (a).
420 (Security handlers of revision 3 or greater) Do the following 20 times:
421 Decrypt the value of the encryption dictionary’s O entry (first iteration)
422 or the output from the previous iteration (all subsequent iterations),
423 using an RC4 encryption function with a different encryption key at
424 each iteration. The key shall be generated by taking the original key
425 (obtained in step (a)) and performing an XOR (exclusive or) operation
426 between each byte of the key and the single-byte value of the
427 iteration counter (from 19 to 0).
428 c) The result of step (b) purports to be the user password.
429 Authenticate this user password using
430 "Algorithm 6: Authenticating the user password".
431 If it is correct, the password supplied is the correct owner password.
433 Args:
434 owner_password:
435 rev: The encryption revision (see PDF standard)
436 key_size: The size of the key in bytes
437 o_entry: The owner entry
438 u_entry: The user entry
439 P: A set of flags specifying which operations shall be permitted
440 when the document is opened with user access. If bit 2 is set to 1,
441 all other bits are ignored and all operations are permitted.
442 If bit 2 is set to 0, permission for operations are based on the
443 values of the remaining flags defined in Table 24.
444 id1_entry:
445 metadata_encrypted: A boolean indicating if the metadata is encrypted.
447 Returns:
448 bytes
450 """
451 rc4_key = AlgV4.compute_O_value_key(owner_password, rev, key_size)
453 if rev <= 2:
454 user_password = rc4_decrypt(rc4_key, o_entry)
455 else:
456 user_password = o_entry
457 for i in range(19, -1, -1):
458 key = bytes(x ^ i for x in rc4_key)
459 user_password = rc4_decrypt(key, user_password)
460 return AlgV4.verify_user_password(
461 user_password,
462 rev,
463 key_size,
464 o_entry,
465 u_entry,
466 P,
467 id1_entry,
468 metadata_encrypted,
469 )
472class AlgV5:
473 @staticmethod
474 def verify_owner_password(
475 R: int, password: bytes, o_value: bytes, oe_value: bytes, u_value: bytes
476 ) -> bytes:
477 """
478 Algorithm 3.2a Computing an encryption key.
480 To understand the algorithm below, it is necessary to treat the O and U
481 strings in the Encrypt dictionary as made up of three sections.
482 The first 32 bytes are a hash value (explained below). The next 8 bytes
483 are called the Validation Salt. The final 8 bytes are called the Key Salt.
485 1. The password string is generated from Unicode input by processing the
486 input string with the SASLprep (IETF RFC 4013) profile of
487 stringprep (IETF RFC 3454), and then converting to a UTF-8
488 representation.
489 2. Truncate the UTF-8 representation to 127 bytes if it is longer than
490 127 bytes.
491 3. Test the password against the owner key by computing the SHA-256 hash
492 of the UTF-8 password concatenated with the 8 bytes of owner
493 Validation Salt, concatenated with the 48-byte U string. If the
494 32-byte result matches the first 32 bytes of the O string, this is
495 the owner password.
496 Compute an intermediate owner key by computing the SHA-256 hash of
497 the UTF-8 password concatenated with the 8 bytes of owner Key Salt,
498 concatenated with the 48-byte U string. The 32-byte result is the
499 key used to decrypt the 32-byte OE string using AES-256 in CBC mode
500 with no padding and an initialization vector of zero.
501 The 32-byte result is the file encryption key.
502 4. Test the password against the user key by computing the SHA-256 hash
503 of the UTF-8 password concatenated with the 8 bytes of user
504 Validation Salt. If the 32 byte result matches the first 32 bytes of
505 the U string, this is the user password.
506 Compute an intermediate user key by computing the SHA-256 hash of the
507 UTF-8 password concatenated with the 8 bytes of user Key Salt.
508 The 32-byte result is the key used to decrypt the 32-byte
509 UE string using AES-256 in CBC mode with no padding and an
510 initialization vector of zero. The 32-byte result is the file
511 encryption key.
512 5. Decrypt the 16-byte Perms string using AES-256 in ECB mode with an
513 initialization vector of zero and the file encryption key as the key.
514 Verify that bytes 9-11 of the result are the characters ‘a’, ‘d’, ‘b’.
515 Bytes 0-3 of the decrypted Perms entry, treated as a little-endian
516 integer, are the user permissions.
517 They should match the value in the P key.
519 Args:
520 R: A number specifying which revision of the standard security
521 handler shall be used to interpret this dictionary
522 password: The owner password
523 o_value: A 32-byte string, based on both the owner and user passwords,
524 that shall be used in computing the encryption key and in
525 determining whether a valid owner password was entered
526 oe_value:
527 u_value: A 32-byte string, based on the user password, that shall be
528 used in determining whether to prompt the user for a password and,
529 if so, whether a valid user or owner password was entered.
531 Returns:
532 The key
534 """
535 password = password[:127]
536 if (
537 AlgV5.calculate_hash(R, password, o_value[32:40], u_value[:48])
538 != o_value[:32]
539 ):
540 return b""
541 iv = bytes(0 for _ in range(16))
542 tmp_key = AlgV5.calculate_hash(R, password, o_value[40:48], u_value[:48])
543 return aes_cbc_decrypt(tmp_key, iv, oe_value)
545 @staticmethod
546 def verify_user_password(
547 R: int, password: bytes, u_value: bytes, ue_value: bytes
548 ) -> bytes:
549 """
550 See :func:`verify_owner_password`.
552 Args:
553 R: A number specifying which revision of the standard security
554 handler shall be used to interpret this dictionary
555 password: The user password
556 u_value: A 32-byte string, based on the user password, that shall be
557 used in determining whether to prompt the user for a password
558 and, if so, whether a valid user or owner password was entered.
559 ue_value:
561 Returns:
562 bytes
564 """
565 password = password[:127]
566 if AlgV5.calculate_hash(R, password, u_value[32:40], b"") != u_value[:32]:
567 return b""
568 iv = bytes(0 for _ in range(16))
569 tmp_key = AlgV5.calculate_hash(R, password, u_value[40:48], b"")
570 return aes_cbc_decrypt(tmp_key, iv, ue_value)
572 @staticmethod
573 def calculate_hash(R: int, password: bytes, salt: bytes, udata: bytes) -> bytes:
574 # https://github.com/qpdf/qpdf/blob/main/libqpdf/QPDF_encryption.cc
575 k = hashlib.sha256(password + salt + udata).digest()
576 if R < 6:
577 return k
578 count = 0
579 while True:
580 count += 1
581 k1 = password + k + udata
582 e = aes_cbc_encrypt(k[:16], k[16:32], k1 * 64)
583 hash_fn = (
584 hashlib.sha256,
585 hashlib.sha384,
586 hashlib.sha512,
587 )[sum(e[:16]) % 3]
588 k = hash_fn(e).digest()
589 if count >= 64 and e[-1] <= count - 32:
590 break
591 return k[:32]
593 @staticmethod
594 def verify_perms(
595 key: bytes, perms: bytes, p: int, metadata_encrypted: bool
596 ) -> bool:
597 """
598 See :func:`verify_owner_password` and :func:`compute_perms_value`.
600 Args:
601 key: The owner password
602 perms:
603 p: A set of flags specifying which operations shall be permitted
604 when the document is opened with user access.
605 If bit 2 is set to 1, all other bits are ignored and all
606 operations are permitted.
607 If bit 2 is set to 0, permission for operations are based on
608 the values of the remaining flags defined in Table 24.
609 metadata_encrypted:
611 Returns:
612 A boolean
614 """
615 b8 = b"T" if metadata_encrypted else b"F"
616 p1 = struct.pack("<I", p) + b"\xff\xff\xff\xff" + b8 + b"adb"
617 p2 = aes_ecb_decrypt(key, perms)
618 return p1 == p2[:12]
620 @staticmethod
621 def generate_values(
622 R: int,
623 user_password: bytes,
624 owner_password: bytes,
625 key: bytes,
626 p: int,
627 metadata_encrypted: bool,
628 ) -> dict[Any, Any]:
629 user_password = user_password[:127]
630 owner_password = owner_password[:127]
631 u_value, ue_value = AlgV5.compute_U_value(R, user_password, key)
632 o_value, oe_value = AlgV5.compute_O_value(R, owner_password, key, u_value)
633 perms = AlgV5.compute_Perms_value(key, p, metadata_encrypted)
634 return {
635 "/U": u_value,
636 "/UE": ue_value,
637 "/O": o_value,
638 "/OE": oe_value,
639 "/Perms": perms,
640 }
642 @staticmethod
643 def compute_U_value(R: int, password: bytes, key: bytes) -> tuple[bytes, bytes]:
644 """
645 Algorithm 3.8 Computing the encryption dictionary’s U (user password)
646 and UE (user encryption key) values.
648 1. Generate 16 random bytes of data using a strong random number generator.
649 The first 8 bytes are the User Validation Salt. The second 8 bytes
650 are the User Key Salt. Compute the 32-byte SHA-256 hash of the
651 password concatenated with the User Validation Salt. The 48-byte
652 string consisting of the 32-byte hash followed by the User
653 Validation Salt followed by the User Key Salt is stored as the U key.
654 2. Compute the 32-byte SHA-256 hash of the password concatenated with
655 the User Key Salt. Using this hash as the key, encrypt the file
656 encryption key using AES-256 in CBC mode with no padding and an
657 initialization vector of zero. The resulting 32-byte string is stored
658 as the UE key.
660 Args:
661 R:
662 password:
663 key:
665 Returns:
666 A tuple (u-value, ue value)
668 """
669 random_bytes = secrets.token_bytes(16)
670 val_salt = random_bytes[:8]
671 key_salt = random_bytes[8:]
672 u_value = AlgV5.calculate_hash(R, password, val_salt, b"") + val_salt + key_salt
674 tmp_key = AlgV5.calculate_hash(R, password, key_salt, b"")
675 iv = bytes(0 for _ in range(16))
676 ue_value = aes_cbc_encrypt(tmp_key, iv, key)
677 return u_value, ue_value
679 @staticmethod
680 def compute_O_value(
681 R: int, password: bytes, key: bytes, u_value: bytes
682 ) -> tuple[bytes, bytes]:
683 """
684 Algorithm 3.9 Computing the encryption dictionary’s O (owner password)
685 and OE (owner encryption key) values.
687 1. Generate 16 random bytes of data using a strong random number
688 generator. The first 8 bytes are the Owner Validation Salt. The
689 second 8 bytes are the Owner Key Salt. Compute the 32-byte SHA-256
690 hash of the password concatenated with the Owner Validation Salt and
691 then concatenated with the 48-byte U string as generated in
692 Algorithm 3.8. The 48-byte string consisting of the 32-byte hash
693 followed by the Owner Validation Salt followed by the Owner Key Salt
694 is stored as the O key.
695 2. Compute the 32-byte SHA-256 hash of the password concatenated with
696 the Owner Key Salt and then concatenated with the 48-byte U string as
697 generated in Algorithm 3.8. Using this hash as the key,
698 encrypt the file encryption key using AES-256 in CBC mode with
699 no padding and an initialization vector of zero.
700 The resulting 32-byte string is stored as the OE key.
702 Args:
703 R:
704 password:
705 key:
706 u_value: A 32-byte string, based on the user password, that shall be
707 used in determining whether to prompt the user for a password
708 and, if so, whether a valid user or owner password was entered.
710 Returns:
711 A tuple (O value, OE value)
713 """
714 random_bytes = secrets.token_bytes(16)
715 val_salt = random_bytes[:8]
716 key_salt = random_bytes[8:]
717 o_value = (
718 AlgV5.calculate_hash(R, password, val_salt, u_value) + val_salt + key_salt
719 )
720 tmp_key = AlgV5.calculate_hash(R, password, key_salt, u_value[:48])
721 iv = bytes(0 for _ in range(16))
722 oe_value = aes_cbc_encrypt(tmp_key, iv, key)
723 return o_value, oe_value
725 @staticmethod
726 def compute_Perms_value(key: bytes, p: int, metadata_encrypted: bool) -> bytes:
727 """
728 Algorithm 3.10 Computing the encryption dictionary’s Perms
729 (permissions) value.
731 1. Extend the permissions (contents of the P integer) to 64 bits by
732 setting the upper 32 bits to all 1’s.
733 (This allows for future extension without changing the format.)
734 2. Record the 8 bytes of permission in the bytes 0-7 of the block,
735 low order byte first.
736 3. Set byte 8 to the ASCII value ' T ' or ' F ' according to the
737 EncryptMetadata Boolean.
738 4. Set bytes 9-11 to the ASCII characters ' a ', ' d ', ' b '.
739 5. Set bytes 12-15 to 4 bytes of random data, which will be ignored.
740 6. Encrypt the 16-byte block using AES-256 in ECB mode with an
741 initialization vector of zero, using the file encryption key as the
742 key. The result (16 bytes) is stored as the Perms string, and checked
743 for validity when the file is opened.
745 Args:
746 key:
747 p: A set of flags specifying which operations shall be permitted
748 when the document is opened with user access. If bit 2 is set to 1,
749 all other bits are ignored and all operations are permitted.
750 If bit 2 is set to 0, permission for operations are based on the
751 values of the remaining flags defined in Table 24.
752 metadata_encrypted: A boolean indicating if the metadata is encrypted.
754 Returns:
755 The perms value
757 """
758 b8 = b"T" if metadata_encrypted else b"F"
759 rr = secrets.token_bytes(4)
760 data = struct.pack("<I", p) + b"\xff\xff\xff\xff" + b8 + b"adb" + rr
761 return aes_ecb_encrypt(key, data)
764class PasswordType(IntEnum):
765 NOT_DECRYPTED = 0
766 USER_PASSWORD = 1
767 OWNER_PASSWORD = 2
770class EncryptAlgorithm(tuple, Enum): # type: ignore # noqa: SLOT001
771 # V, R, Length
772 RC4_40 = (1, 2, 40)
773 RC4_128 = (2, 3, 128)
774 AES_128 = (4, 4, 128)
775 AES_256_R5 = (5, 5, 256)
776 AES_256 = (5, 6, 256)
779class EncryptionValues:
780 O: bytes # noqa: E741
781 U: bytes
782 OE: bytes
783 UE: bytes
784 Perms: bytes
787class Encryption:
788 """
789 Collects and manages parameters for PDF document encryption and decryption.
791 Args:
792 V: A code specifying the algorithm to be used in encrypting and
793 decrypting the document.
794 R: The revision of the standard security handler.
795 Length: The length of the encryption key in bits.
796 P: A set of flags specifying which operations shall be permitted
797 when the document is opened with user access
798 entry: The encryption dictionary object.
799 EncryptMetadata: Whether to encrypt metadata in the document.
800 first_id_entry: The first 16 bytes of the file's original ID.
801 StmF: The name of the crypt filter that shall be used by default
802 when decrypting streams.
803 StrF: The name of the crypt filter that shall be used when decrypting
804 all strings in the document.
805 EFF: The name of the crypt filter that shall be used when
806 encrypting embedded file streams that do not have their own
807 crypt filter specifier.
808 values: Additional encryption parameters.
810 """
812 def __init__(
813 self,
814 V: int,
815 R: int,
816 Length: int,
817 P: int,
818 entry: DictionaryObject,
819 EncryptMetadata: bool,
820 first_id_entry: bytes,
821 StmF: str,
822 StrF: str,
823 EFF: str,
824 values: Optional[EncryptionValues],
825 ) -> None:
826 # §7.6.2, entries common to all encryption dictionaries
827 # use same name as keys of encryption dictionaries entries
828 self.V = V
829 self.R = R
830 self.Length = Length # key_size
831 self.P = (P + 0x100000000) % 0x100000000 # maybe P < 0
832 self.EncryptMetadata = EncryptMetadata
833 self.id1_entry = first_id_entry
834 self.StmF = StmF
835 self.StrF = StrF
836 self.EFF = EFF
837 self.values: EncryptionValues = values or EncryptionValues()
839 self._password_type = PasswordType.NOT_DECRYPTED
840 self._key: Optional[bytes] = None
841 self._are_permissions_valid: bool = True
843 def is_decrypted(self) -> bool:
844 return self._password_type != PasswordType.NOT_DECRYPTED
846 def encrypt_object(self, obj: PdfObject, idnum: int, generation: int) -> PdfObject:
847 # skip calculate key
848 if not self._is_encryption_object(obj):
849 return obj
851 cf = self._make_crypt_filter(idnum, generation)
852 return cf.encrypt_object(obj)
854 def decrypt_object(self, obj: PdfObject, idnum: int, generation: int, *, strict: bool = True) -> PdfObject:
855 # skip calculate key
856 if not self._is_encryption_object(obj):
857 return obj
859 cf = self._make_crypt_filter(idnum, generation)
860 return cf.decrypt_object(obj, strict=strict)
862 @staticmethod
863 def _is_encryption_object(obj: PdfObject) -> bool:
864 return isinstance(
865 obj,
866 (
867 ByteStringObject,
868 TextStringObject,
869 StreamObject,
870 ArrayObject,
871 DictionaryObject,
872 ),
873 )
875 def _make_crypt_filter(self, idnum: int, generation: int) -> CryptFilter:
876 """
877 Algorithm 1: Encryption of data using the RC4 or AES algorithms.
879 a) Obtain the object number and generation number from the object
880 identifier of the string or stream to be encrypted
881 (see 7.3.10, "Indirect Objects"). If the string is a direct object,
882 use the identifier of the indirect object containing it.
883 b) For all strings and streams without crypt filter specifier; treating
884 the object number and generation number as binary integers, extend
885 the original n-byte encryption key to n + 5 bytes by appending the
886 low-order 3 bytes of the object number and the low-order 2 bytes of
887 the generation number in that order, low-order byte first.
888 (n is 5 unless the value of V in the encryption dictionary is greater
889 than 1, in which case n is the value of Length divided by 8.)
890 If using the AES algorithm, extend the encryption key an additional
891 4 bytes by adding the value “sAlT”, which corresponds to the
892 hexadecimal values 0x73, 0x41, 0x6C, 0x54. (This addition is done for
893 backward compatibility and is not intended to provide additional
894 security.)
895 c) Initialize the MD5 hash function and pass the result of step (b) as
896 input to this function.
897 d) Use the first (n + 5) bytes, up to a maximum of 16, of the output
898 from the MD5 hash as the key for the RC4 or AES symmetric key
899 algorithms, along with the string or stream data to be encrypted.
900 If using the AES algorithm, the Cipher Block Chaining (CBC) mode,
901 which requires an initialization vector, is used. The block size
902 parameter is set to 16 bytes, and the initialization vector is a
903 16-byte random number that is stored as the first 16 bytes of the
904 encrypted stream or string.
906 Algorithm 3.1a: Encryption of data using the AES-256 algorithm.
908 Note: Algorithm 3.1a does not use MD5 key derivation, so AES-256
909 encrypted files can be read on FIPS-enabled systems where MD5 is blocked.
911 1. Use the 32-byte file encryption key for the AES-256 symmetric key
912 algorithm, along with the string or stream data to be encrypted.
913 Use the AES algorithm in Cipher Block Chaining (CBC) mode, which
914 requires an initialization vector. The block size parameter is set to
915 16 bytes, and the initialization vector is a 16-byte random number
916 that is stored as the first 16 bytes of the encrypted stream or string.
917 The output is the encrypted data to be stored in the PDF file.
918 """
919 pack1 = struct.pack("<i", idnum)[:3]
920 pack2 = struct.pack("<i", generation)[:2]
922 assert self._key
923 key = self._key
925 # Algorithm 1 (V <= 4): MD5 key derivation. Algorithm 3.1a (V >= 5): key used directly.
926 if self.V <= 4:
927 n = 5 if self.V == 1 else self.Length // 8
928 key_data = key[:n] + pack1 + pack2
929 key_hash = hashlib.md5(key_data)
930 rc4_key = key_hash.digest()[: min(n + 5, 16)]
932 # for AES-128
933 key_hash.update(b"sAlT")
934 aes128_key = key_hash.digest()[: min(n + 5, 16)]
935 else:
936 rc4_key = b""
937 aes128_key = b""
939 # for AES-256
940 aes256_key = key
942 stm_crypt = self._get_crypt(self.StmF, rc4_key, aes128_key, aes256_key)
943 str_crypt = self._get_crypt(self.StrF, rc4_key, aes128_key, aes256_key)
944 ef_crypt = self._get_crypt(self.EFF, rc4_key, aes128_key, aes256_key)
946 return CryptFilter(stm_crypt, str_crypt, ef_crypt)
948 @staticmethod
949 def _get_crypt(
950 method: str, rc4_key: bytes, aes128_key: bytes, aes256_key: bytes
951 ) -> CryptBase:
952 if method == "/AESV2":
953 return CryptAES(aes128_key)
954 if method == "/AESV3":
955 return CryptAES(aes256_key)
956 if method == "/Identity":
957 return CryptIdentity()
959 return CryptRC4(rc4_key)
961 @staticmethod
962 def _encode_password(password: Union[bytes, str]) -> bytes:
963 if isinstance(password, str):
964 try:
965 pwd = password.encode("latin-1")
966 except Exception:
967 pwd = password.encode("utf-8")
968 else:
969 pwd = password
970 return pwd
972 def verify(self, password: Union[bytes, str]) -> PasswordType:
973 pwd = self._encode_password(password)
974 key, rc = self.verify_v4(pwd) if self.V <= 4 else self.verify_v5(pwd)
975 if rc != PasswordType.NOT_DECRYPTED:
976 self._password_type = rc
977 self._key = key
978 return rc
980 def verify_v4(self, password: bytes) -> tuple[bytes, PasswordType]:
981 # verify owner password first
982 key = AlgV4.verify_owner_password(
983 password,
984 self.R,
985 self.Length,
986 self.values.O,
987 self.values.U,
988 self.P,
989 self.id1_entry,
990 self.EncryptMetadata,
991 )
992 if key:
993 return key, PasswordType.OWNER_PASSWORD
994 key = AlgV4.verify_user_password(
995 password,
996 self.R,
997 self.Length,
998 self.values.O,
999 self.values.U,
1000 self.P,
1001 self.id1_entry,
1002 self.EncryptMetadata,
1003 )
1004 if key:
1005 return key, PasswordType.USER_PASSWORD
1006 return b"", PasswordType.NOT_DECRYPTED
1008 def verify_v5(self, password: bytes) -> tuple[bytes, PasswordType]:
1009 # TODO: use SASLprep process
1010 # verify owner password first
1011 key = AlgV5.verify_owner_password(
1012 self.R, password, self.values.O, self.values.OE, self.values.U
1013 )
1014 rc = PasswordType.OWNER_PASSWORD
1015 if not key:
1016 key = AlgV5.verify_user_password(
1017 self.R, password, self.values.U, self.values.UE
1018 )
1019 rc = PasswordType.USER_PASSWORD
1020 if not key:
1021 return b"", PasswordType.NOT_DECRYPTED
1023 # verify Perms
1024 self._are_permissions_valid = AlgV5.verify_perms(key, self.values.Perms, self.P, self.EncryptMetadata)
1025 if not self._are_permissions_valid:
1026 logger_warning("ignore '/Perms' verify failed", __name__)
1027 return key, rc
1029 def write_entry(
1030 self, user_password: str, owner_password: Optional[str]
1031 ) -> DictionaryObject:
1032 user_pwd = self._encode_password(user_password)
1033 owner_pwd = self._encode_password(owner_password) if owner_password else None
1034 if owner_pwd is None:
1035 owner_pwd = user_pwd
1037 if self.V <= 4:
1038 self.compute_values_v4(user_pwd, owner_pwd)
1039 else:
1040 self._key = secrets.token_bytes(self.Length // 8)
1041 values = AlgV5.generate_values(
1042 self.R, user_pwd, owner_pwd, self._key, self.P, self.EncryptMetadata
1043 )
1044 self.values.O = values["/O"]
1045 self.values.U = values["/U"]
1046 self.values.OE = values["/OE"]
1047 self.values.UE = values["/UE"]
1048 self.values.Perms = values["/Perms"]
1050 dict_obj = DictionaryObject()
1051 dict_obj[NameObject("/V")] = NumberObject(self.V)
1052 dict_obj[NameObject("/R")] = NumberObject(self.R)
1053 dict_obj[NameObject("/Length")] = NumberObject(self.Length)
1054 dict_obj[NameObject("/P")] = NumberObject(self.P)
1055 dict_obj[NameObject("/Filter")] = NameObject("/Standard")
1056 # ignore /EncryptMetadata
1058 dict_obj[NameObject("/O")] = ByteStringObject(self.values.O)
1059 dict_obj[NameObject("/U")] = ByteStringObject(self.values.U)
1061 if self.V >= 4:
1062 # TODO: allow different method
1063 std_cf = DictionaryObject()
1064 std_cf[NameObject("/AuthEvent")] = NameObject("/DocOpen")
1065 std_cf[NameObject("/CFM")] = NameObject(self.StmF)
1066 std_cf[NameObject("/Length")] = NumberObject(self.Length // 8)
1067 cf = DictionaryObject()
1068 cf[NameObject("/StdCF")] = std_cf
1069 dict_obj[NameObject("/CF")] = cf
1070 dict_obj[NameObject("/StmF")] = NameObject("/StdCF")
1071 dict_obj[NameObject("/StrF")] = NameObject("/StdCF")
1072 # ignore EFF
1073 # dict_obj[NameObject("/EFF")] = NameObject("/StdCF")
1075 if self.V >= 5:
1076 dict_obj[NameObject("/OE")] = ByteStringObject(self.values.OE)
1077 dict_obj[NameObject("/UE")] = ByteStringObject(self.values.UE)
1078 dict_obj[NameObject("/Perms")] = ByteStringObject(self.values.Perms)
1079 return dict_obj
1081 def compute_values_v4(self, user_password: bytes, owner_password: bytes) -> None:
1082 rc4_key = AlgV4.compute_O_value_key(owner_password, self.R, self.Length)
1083 o_value = AlgV4.compute_O_value(rc4_key, user_password, self.R)
1085 key = AlgV4.compute_key(
1086 user_password,
1087 self.R,
1088 self.Length,
1089 o_value,
1090 self.P,
1091 self.id1_entry,
1092 self.EncryptMetadata,
1093 )
1094 u_value = AlgV4.compute_U_value(key, self.R, self.id1_entry)
1096 self._key = key
1097 self.values.O = o_value
1098 self.values.U = u_value
1100 @staticmethod
1101 def read(encryption_entry: DictionaryObject, first_id_entry: bytes) -> "Encryption":
1102 if encryption_entry.get("/Filter") != "/Standard":
1103 raise NotImplementedError(
1104 "only Standard PDF encryption handler is available"
1105 )
1106 if "/SubFilter" in encryption_entry:
1107 raise NotImplementedError("/SubFilter NOT supported")
1109 stm_filter = "/V2"
1110 str_filter = "/V2"
1111 ef_filter = "/V2"
1113 alg_ver = encryption_entry.get("/V", 0)
1114 if alg_ver not in (1, 2, 3, 4, 5):
1115 raise NotImplementedError(f"Encryption V={alg_ver} NOT supported")
1116 if alg_ver >= 4:
1117 filters = encryption_entry["/CF"]
1119 stm_filter = encryption_entry.get("/StmF", "/Identity")
1120 str_filter = encryption_entry.get("/StrF", "/Identity")
1121 ef_filter = encryption_entry.get("/EFF", stm_filter)
1123 if stm_filter != "/Identity":
1124 stm_filter = filters[stm_filter]["/CFM"] # type: ignore
1125 if str_filter != "/Identity":
1126 str_filter = filters[str_filter]["/CFM"] # type: ignore
1127 if ef_filter != "/Identity":
1128 ef_filter = filters[ef_filter]["/CFM"] # type: ignore
1130 allowed_methods = ("/Identity", "/V2", "/AESV2", "/AESV3")
1131 if stm_filter not in allowed_methods:
1132 raise NotImplementedError(f"StmF Method {stm_filter} NOT supported!")
1133 if str_filter not in allowed_methods:
1134 raise NotImplementedError(f"StrF Method {str_filter} NOT supported!")
1135 if ef_filter not in allowed_methods:
1136 raise NotImplementedError(f"EFF Method {ef_filter} NOT supported!")
1138 alg_rev = cast(int, encryption_entry["/R"])
1139 perm_flags = cast(int, encryption_entry["/P"])
1140 key_bits = encryption_entry.get("/Length", 40)
1141 if alg_ver == 4 and stm_filter == "/AESV2":
1142 cf_dict = cast(DictionaryObject, filters[encryption_entry["/StmF"]]) # type: ignore[index]
1143 # CF /Length is in bytes (default 16 for AES-128), convert to bits
1144 key_bits = cast(int, cf_dict.get("/Length", 16)) * 8
1145 encrypt_metadata = encryption_entry.get("/EncryptMetadata")
1146 encrypt_metadata = (
1147 encrypt_metadata.value if encrypt_metadata is not None else True
1148 )
1149 values = EncryptionValues()
1150 values.O = cast(ByteStringObject, encryption_entry["/O"]).original_bytes
1151 values.U = cast(ByteStringObject, encryption_entry["/U"]).original_bytes
1152 values.OE = encryption_entry.get("/OE", ByteStringObject()).original_bytes
1153 values.UE = encryption_entry.get("/UE", ByteStringObject()).original_bytes
1154 values.Perms = encryption_entry.get("/Perms", ByteStringObject()).original_bytes
1155 return Encryption(
1156 V=alg_ver,
1157 R=alg_rev,
1158 Length=key_bits,
1159 P=perm_flags,
1160 EncryptMetadata=encrypt_metadata,
1161 first_id_entry=first_id_entry,
1162 values=values,
1163 StrF=str_filter,
1164 StmF=stm_filter,
1165 EFF=ef_filter,
1166 entry=encryption_entry, # Dummy entry for the moment; will get removed
1167 )
1169 @staticmethod
1170 def make(
1171 alg: EncryptAlgorithm, permissions: int, first_id_entry: bytes
1172 ) -> "Encryption":
1173 alg_ver, alg_rev, key_bits = alg
1175 stm_filter, str_filter, ef_filter = "/V2", "/V2", "/V2"
1177 if alg == EncryptAlgorithm.AES_128:
1178 stm_filter, str_filter, ef_filter = "/AESV2", "/AESV2", "/AESV2"
1179 elif alg in (EncryptAlgorithm.AES_256_R5, EncryptAlgorithm.AES_256):
1180 stm_filter, str_filter, ef_filter = "/AESV3", "/AESV3", "/AESV3"
1182 return Encryption(
1183 V=alg_ver,
1184 R=alg_rev,
1185 Length=key_bits,
1186 P=permissions,
1187 EncryptMetadata=True,
1188 first_id_entry=first_id_entry,
1189 values=None,
1190 StrF=str_filter,
1191 StmF=stm_filter,
1192 EFF=ef_filter,
1193 entry=DictionaryObject(), # Dummy entry for the moment; will get removed
1194 )