Coverage for /pythoncovmergedfiles/medio/medio/src/pdfminer.six/pdfminer/ascii85.py: 17%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

36 statements  

1"""Python implementation of ASCII85/ASCIIHex decoder (Adobe version). 

2 

3This code is in the public domain. 

4 

5""" 

6 

7import re 

8import struct 

9 

10 

11# ascii85decode(data) 

12def ascii85decode(data: bytes) -> bytes: 

13 """In ASCII85 encoding, every four bytes are encoded with five ASCII 

14 letters, using 85 different types of characters (as 256**4 < 85**5). 

15 When the length of the original bytes is not a multiple of 4, a special 

16 rule is used for round up. 

17 

18 The Adobe's ASCII85 implementation is slightly different from 

19 its original in handling the last characters. 

20 

21 """ 

22 n = b = 0 

23 out = b"" 

24 for i in iter(data): 

25 c = bytes((i,)) 

26 if c >= b"!" and c <= b"u": 

27 n += 1 

28 b = b * 85 + (ord(c) - 33) 

29 if n == 5: 

30 out += struct.pack(">L", b) 

31 n = b = 0 

32 elif c == b"z": 

33 assert n == 0, str(n) 

34 out += b"\0\0\0\0" 

35 elif c == b"~": 

36 if n: 

37 for _ in range(5 - n): 

38 b = b * 85 + 84 

39 out += struct.pack(">L", b)[: n - 1] 

40 break 

41 return out 

42 

43 

44# asciihexdecode(data) 

45hex_re = re.compile(rb"([a-f\d]{2})", re.IGNORECASE) 

46trail_re = re.compile(rb"^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$", re.IGNORECASE) 

47 

48 

49def asciihexdecode(data: bytes) -> bytes: 

50 """ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1 

51 For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the 

52 ASCIIHexDecode filter produces one byte of binary data. All white-space 

53 characters are ignored. A right angle bracket character (>) indicates 

54 EOD. Any other characters will cause an error. If the filter encounters 

55 the EOD marker after reading an odd number of hexadecimal digits, it 

56 will behave as if a 0 followed the last digit. 

57 """ 

58 

59 def decode(x: bytes) -> bytes: 

60 i = int(x, 16) 

61 return bytes((i,)) 

62 

63 out = b"" 

64 for x in hex_re.findall(data): 

65 out += decode(x) 

66 

67 m = trail_re.search(data) 

68 if m: 

69 out += decode(m.group(1) + b"0") 

70 return out