1"""A dataclass that captures the CTM and Text State for a tj operation"""
2
3import math
4from dataclasses import dataclass, field
5from typing import Any, Union
6
7from ..._font import Font
8from .. import mult, orient
9
10
11@dataclass
12class TextStateParams:
13 """
14 Text state parameters and operator values for a single text value in a
15 TJ or Tj PDF operation.
16
17 Attributes:
18 value (bytes | str): the raw text to be rendered.
19 font (Font): font object
20 font_size (int | float): font size
21 Tc (float): character spacing. Defaults to 0.0.
22 Tw (float): word spacing. Defaults to 0.0.
23 Tz (float): horizontal scaling. Defaults to 100.0.
24 TL (float): leading, vertical displacement between text lines. Defaults to 0.0.
25 Ts (float): text rise. Used for super/subscripts. Defaults to 0.0.
26 transform (List[float]): effective transformation matrix.
27 tx (float): x cood of rendered text, i.e. self.transform[4]
28 ty (float): y cood of rendered text. May differ from self.transform[5] per self.Ts.
29 displaced_tx (float): x coord immediately following rendered text
30 space_tx (float): tx for a space character
31 font_height (float): effective font height accounting for CTM
32 flip_vertical (bool): True if y axis has been inverted (i.e. if self.transform[3] < 0.)
33 rotated (bool): True if the text orientation is rotated with respect to the page.
34
35 """
36
37 value: Union[bytes, str]
38 font: Font
39 font_size: Union[int, float]
40 Tc: float = 0.0
41 Tw: float = 0.0
42 Tz: float = 100.0
43 TL: float = 0.0
44 Ts: float = 0.0
45 transform: list[float] = field(
46 default_factory=lambda: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
47 )
48 tx: float = field(default=0.0, init=False)
49 ty: float = field(default=0.0, init=False)
50 displaced_tx: float = field(default=0.0, init=False)
51 space_tx: float = field(default=0.0, init=False)
52 font_height: float = field(default=0.0, init=False)
53 flip_vertical: bool = field(default=False, init=False)
54 rotated: bool = field(default=False, init=False)
55 text: str = ""
56 _decoded_value: str = ""
57
58 def __post_init__(self) -> None:
59 if isinstance(self.value, bytes):
60 try:
61 if isinstance(self.font.encoding, str):
62 self._decoded_value = self.value.decode(self.font.encoding, "surrogatepass")
63 else:
64 self._decoded_value = "".join(
65 self.font.encoding[x]
66 if x in self.font.encoding
67 else bytes((x,)).decode()
68 for x in self.value
69 )
70 except UnicodeDecodeError:
71 self._decoded_value = self.value.decode("utf-8", "replace")
72 self.text = "".join(
73 self.font.character_map.get(x, x) for x in self._decoded_value
74 )
75 else:
76 self.text = self.value
77
78 if orient(self.transform) in (90, 270):
79 self.transform = mult(
80 [1.0, -self.transform[1], -self.transform[2], 1.0, 0.0, 0.0],
81 self.transform,
82 )
83 self.rotated = True
84 # self.transform[0] AND self.transform[3] < 0 indicates true rotation.
85 # If only self.transform[3] < 0, the y coords are simply inverted.
86 if orient(self.transform) == 180 and self.transform[0] < -1e-6:
87 self.transform = mult([-1.0, 0.0, 0.0, -1.0, 0.0, 0.0], self.transform)
88 self.rotated = True
89 self.displaced_tx = self.displaced_transform()[4]
90 self.tx = self.transform[4]
91 self.ty = self.render_transform()[5]
92 self.space_tx = round(self.word_tx(self.font.space_char), 3)
93 if self.space_tx < 1e-6:
94 # if the " " char is assigned 0 width (e.g. for fine tuned spacing
95 # with TJ int operators a la crazyones.pdf), calculate space_tx as
96 # a td_offset of -1 * font.space_width where font.space_width is
97 # the space_width calculated in _font.py.
98 self.space_tx = round(self.word_tx("", -self.font.space_width), 3)
99 self.font_height = self.font_size * math.sqrt(
100 self.transform[1] ** 2 + self.transform[3] ** 2
101 )
102 # flip_vertical handles PDFs generated by Microsoft Word's "publish" command.
103 self.flip_vertical = self.transform[3] < -1e-6 # inverts y axis
104
105 def font_size_matrix(self) -> list[float]:
106 """Font size matrix"""
107 return [
108 self.font_size * (self.Tz / 100.0),
109 0.0,
110 0.0,
111 self.font_size,
112 0.0,
113 self.Ts,
114 ]
115
116 def displaced_transform(self) -> list[float]:
117 """Effective transform matrix after text has been rendered."""
118 return mult(self.displacement_matrix(), self.transform)
119
120 def render_transform(self) -> list[float]:
121 """Effective transform matrix accounting for font size, Tz, and Ts."""
122 return mult(self.font_size_matrix(), self.transform)
123
124 def displacement_matrix(
125 self, word: Union[bytes, str, None] = None, td_offset: float = 0.0
126 ) -> list[float]:
127 """
128 Text displacement matrix
129
130 Args:
131 word (bytes | str, optional): Defaults to None in which case self.text displacement is
132 returned.
133 td_offset (float, optional): translation applied by TD operator. Defaults to 0.0.
134
135 """
136 word = word if word is not None else self.value
137 return [1.0, 0.0, 0.0, 1.0, self.word_tx(word, td_offset), 0.0]
138
139 def word_tx(self, word: Union[bytes, str], td_offset: float = 0.0) -> float:
140 """Horizontal text displacement for any word according this text state"""
141 width: float = 0.0
142
143 if isinstance(word, bytes):
144 word = self._decoded_value
145
146 for char in word:
147 if char == self.font.space_char:
148 width += self.font.space_width
149 else:
150 width += self.font.get_text_width(char)
151
152 return (
153 (self.font_size * ((width - td_offset) / 1000.0))
154 + self.Tc
155 + word.count(self.font.space_char) * self.Tw
156 ) * (self.Tz / 100.0)
157
158 @staticmethod
159 def to_dict(inst: "TextStateParams") -> dict[str, Any]:
160 """Dataclass to dict for json.dumps serialization"""
161 return {k: getattr(inst, k) for k in inst.__dataclass_fields__ if k != "font"}