Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pikepdf/models/ctm.py: 38%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

53 statements  

1# SPDX-FileCopyrightText: 2025 @rakurtz 

2# SPDX-FileCopyrightText: 2025 James R. Barlow 

3# SPDX-License-Identifier: MPL-2.0 

4 

5"""Parsing the matrixes in a PDF file.""" 

6 

7from __future__ import annotations 

8 

9from logging import getLogger 

10 

11from pikepdf._core import Matrix, Page 

12from pikepdf.models._content_stream import parse_content_stream 

13from pikepdf.objects import Operator 

14 

15logger = getLogger(__file__) 

16OPERATOR_CM = Operator( 

17 'cm' 

18) # "Concatenate Matrix": Changes the CTM (Current Transformation Matrix) 

19OPERATOR_DO = Operator('Do') # "Draw Object": 

20OPERATOR_STACK = Operator('q') # Stores the CTM to a stack 

21OPERATOR_POP = Operator('Q') # Restores the previous CTM 

22 

23 

24class MatrixStack: 

25 """Tracks the CTM (current transformation matrix) in a PDF content stream. 

26 

27 The CTM starts as the initial matrix and can be changed via the 'cm' 

28 (concatenate matrix) operator --> CTM = CTM x CM (with CTM and CM 

29 being 3x3 matrixes). Initial matrix is the identity matrix unless overridden. 

30 

31 Furthermore can the CTM be stored to the stack via the 'q' operator. 

32 This save the CTM and subsequent 'cm' operators change a copy of that CTM 

33 --> 'q 1 0 0 1 0 0 cm' 

34 --> Copy CTM onto the stack and change the copy via 'cm' 

35 

36 With the 'Q' operator the current CTM is replaced with the previous one from the 

37 stack. 

38 

39 Error handling: 

40 1. Popping from an empty stack results in CTM being set to the initial matrix 

41 2. Multiplying with invalid operands sets the CTM to invalid 

42 3. Multiplying an invalid CTM with a valid CM results in an invalid CTM 

43 4. Stacking an invalid CTM results in a copy of that invalid CTM onto the stack 

44 --> All operations with an invalid CTM result in an invalid CTM 

45 --> The CTM is valid again when all invalid CTMs are popped off the stack 

46 """ 

47 

48 def __init__(self, initial_matrix: Matrix = Matrix.identity()) -> None: 

49 """Initializing the stack with the initial matrix.""" 

50 self._initial_matrix = initial_matrix 

51 self._stack: list[Matrix | None] = [self._initial_matrix] 

52 

53 def stack(self): 

54 """Copying the current CTM onto the stack.""" 

55 self._stack.append(self._stack[-1]) 

56 

57 def pop(self): 

58 """Removing the current CTM from the stack. 

59 

60 The stack is not permitted to underflow. If popped too many times, the CTM 

61 is set to the initial matrix. Some PDFs contain invalid content streams 

62 that would result in an underflow, therefore the initial matrix is used 

63 as a safe fallback. 

64 """ 

65 assert len(self._stack) >= 1, "can't be empty" 

66 if len(self._stack) == 1: 

67 self._stack = [self._initial_matrix] 

68 else: 

69 self._stack.pop() 

70 

71 def multiply(self, matrix: Matrix): 

72 """Multiplies the CTM with `matrix`. The result is not returned.""" 

73 if self._stack[-1] is None: 

74 return 

75 else: 

76 self._stack[-1] = self._stack[-1] @ matrix 

77 

78 def invalidate_current_transformation_matrix(self): 

79 """Registers the occurence of an invalid CM. 

80 

81 See `# Error handling` for further informations. 

82 """ 

83 self._stack[-1] = None 

84 

85 @property 

86 def ctm(self) -> Matrix | None: 

87 """Returns the current transformation matrix or `None` if it's invalid.""" 

88 return self._stack[-1] 

89 

90 

91def get_objects_with_ctm( 

92 page: Page, initial_matrix: Matrix = Matrix.identity() 

93) -> list[tuple[str, Matrix]]: 

94 """Determines the current transformation matrix (CTM) for each drawn object. 

95 

96 Filters objects with an invalid CTM. 

97 """ 

98 objects_with_ctm: list[ 

99 tuple[str, Matrix] 

100 ] = [] # Stores the matrixes and the corresponding objects 

101 matrix_stack = MatrixStack(initial_matrix) 

102 for inst in parse_content_stream(page): 

103 operator, operands = inst.operator, inst.operands 

104 if operator == OPERATOR_STACK: 

105 matrix_stack.stack() 

106 

107 elif operator == OPERATOR_POP: 

108 matrix_stack.pop() 

109 

110 elif operator == OPERATOR_CM: 

111 try: 

112 matrix_stack.multiply(Matrix(*operands)) 

113 except TypeError: 

114 logger.debug(f"malformed operands for `cm` operator: {operands}") 

115 matrix_stack.invalidate_current_transformation_matrix() 

116 

117 elif operator == OPERATOR_DO: 

118 name = str(operands[0]) # Name of the image (or other object) 

119 if matrix_stack.ctm is not None: 

120 objects_with_ctm.append( 

121 (name, matrix_stack.ctm) 

122 ) # Explicit copying the CTM 

123 else: 

124 logger.debug( 

125 f"skipping `Do` operator due to invalid CTM for object: {name}" 

126 ) 

127 

128 return objects_with_ctm