Coverage for /pythoncovmergedfiles/medio/medio/src/pdfminer.six/pdfminer/ascii85.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

19 statements  

1"""Python implementation of ASCII85/ASCIIHex decoder (Adobe version).""" 

2 

3import re 

4from base64 import a85decode 

5from binascii import unhexlify 

6 

7start_re = re.compile(rb"^\s*<?\s*~\s*") 

8end_re = re.compile(rb"\s*~\s*>?\s*$") 

9 

10 

11def ascii85decode(data: bytes) -> bytes: 

12 """In ASCII85 encoding, every four bytes are encoded with five ASCII 

13 letters, using 85 different types of characters (as 256**4 < 85**5). 

14 When the length of the original bytes is not a multiple of 4, a special 

15 rule is used for round up. 

16 

17 Adobe's ASCII85 implementation expects the input to be terminated 

18 by `b"~>"`, and (though this is absent from the PDF spec) it can 

19 also begin with `b"<~"`. We can't reliably expect this to be the 

20 case, and there can be off-by-one errors in stream lengths which 

21 mean we only see `~` at the end. Worse yet, `<` and `>` are 

22 ASCII85 digits, so we can't strip them. We settle on a compromise 

23 where we strip leading `<~` or `~` and trailing `~` or `~>`. 

24 """ 

25 data = start_re.sub(b"", data) 

26 data = end_re.sub(b"", data) 

27 return a85decode(data) 

28 

29 

30bws_re = re.compile(rb"\s") 

31 

32 

33def asciihexdecode(data: bytes) -> bytes: 

34 """ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1 

35 For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the 

36 ASCIIHexDecode filter produces one byte of binary data. All white-space 

37 characters are ignored. A right angle bracket character (>) indicates 

38 EOD. Any other characters will cause an error. If the filter encounters 

39 the EOD marker after reading an odd number of hexadecimal digits, it 

40 will behave as if a 0 followed the last digit. 

41 """ 

42 data = bws_re.sub(b"", data) 

43 idx = data.find(b">") 

44 if idx != -1: 

45 data = data[:idx] 

46 if idx % 2 == 1: 

47 data += b"0" 

48 return unhexlify(data)