1"""A dataclass that captures the CTM and Text State for a tj operation"""
2
3import math
4from dataclasses import dataclass, field
5from typing import Any, Dict, List, Union
6
7from .. import mult, orient
8from ._font import Font
9
10
11@dataclass
12class TextStateParams:
13 """
14 Text state parameters and operator values for a single text value in a
15 TJ or Tj PDF operation.
16
17 Attributes:
18 txt (str): the text to be rendered.
19 font (Font): font object
20 font_size (int | float): font size
21 Tc (float): character spacing. Defaults to 0.0.
22 Tw (float): word spacing. Defaults to 0.0.
23 Tz (float): horizontal scaling. Defaults to 100.0.
24 TL (float): leading, vertical displacement between text lines. Defaults to 0.0.
25 Ts (float): text rise. Used for super/subscripts. Defaults to 0.0.
26 transform (List[float]): effective transformation matrix.
27 tx (float): x cood of rendered text, i.e. self.transform[4]
28 ty (float): y cood of rendered text. May differ from self.transform[5] per self.Ts.
29 displaced_tx (float): x coord immediately following rendered text
30 space_tx (float): tx for a space character
31 font_height (float): effective font height accounting for CTM
32 flip_vertical (bool): True if y axis has been inverted (i.e. if self.transform[3] < 0.)
33 rotated (bool): True if the text orientation is rotated with respect to the page.
34
35 """
36
37 txt: str
38 font: Font
39 font_size: Union[int, float]
40 Tc: float = 0.0
41 Tw: float = 0.0
42 Tz: float = 100.0
43 TL: float = 0.0
44 Ts: float = 0.0
45 transform: List[float] = field(
46 default_factory=lambda: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
47 )
48 tx: float = field(default=0.0, init=False)
49 ty: float = field(default=0.0, init=False)
50 displaced_tx: float = field(default=0.0, init=False)
51 space_tx: float = field(default=0.0, init=False)
52 font_height: float = field(default=0.0, init=False)
53 flip_vertical: bool = field(default=False, init=False)
54 rotated: bool = field(default=False, init=False)
55
56 def __post_init__(self) -> None:
57 if orient(self.transform) in (90, 270):
58 self.transform = mult(
59 [1.0, -self.transform[1], -self.transform[2], 1.0, 0.0, 0.0],
60 self.transform,
61 )
62 self.rotated = True
63 # self.transform[0] AND self.transform[3] < 0 indicates true rotation.
64 # If only self.transform[3] < 0, the y coords are simply inverted.
65 if orient(self.transform) == 180 and self.transform[0] < -1e-6:
66 self.transform = mult([-1.0, 0.0, 0.0, -1.0, 0.0, 0.0], self.transform)
67 self.rotated = True
68 self.displaced_tx = self.displaced_transform()[4]
69 self.tx = self.transform[4]
70 self.ty = self.render_transform()[5]
71 self.space_tx = round(self.word_tx(" "), 3)
72 if self.space_tx < 1e-6:
73 # if the " " char is assigned 0 width (e.g. for fine tuned spacing
74 # with TJ int operators a la crazyones.pdf), calculate space_tx as
75 # a TD_offset of -2 * font.space_width where font.space_width is
76 # the space_width calculated in _cmap.py.
77 self.space_tx = round(self.word_tx("", self.font.space_width * -2), 3)
78 self.font_height = self.font_size * math.sqrt(
79 self.transform[1] ** 2 + self.transform[3] ** 2
80 )
81 # flip_vertical handles PDFs generated by Microsoft Word's "publish" command.
82 self.flip_vertical = self.transform[3] < -1e-6 # inverts y axis
83
84 def font_size_matrix(self) -> List[float]:
85 """Font size matrix"""
86 return [
87 self.font_size * (self.Tz / 100.0),
88 0.0,
89 0.0,
90 self.font_size,
91 0.0,
92 self.Ts,
93 ]
94
95 def displaced_transform(self) -> List[float]:
96 """Effective transform matrix after text has been rendered."""
97 return mult(self.displacement_matrix(), self.transform)
98
99 def render_transform(self) -> List[float]:
100 """Effective transform matrix accounting for font size, Tz, and Ts."""
101 return mult(self.font_size_matrix(), self.transform)
102
103 def displacement_matrix(
104 self, word: Union[str, None] = None, TD_offset: float = 0.0
105 ) -> List[float]:
106 """
107 Text displacement matrix
108
109 Args:
110 word (str, optional): Defaults to None in which case self.txt displacement is
111 returned.
112 TD_offset (float, optional): translation applied by TD operator. Defaults to 0.0.
113
114 """
115 word = word if word is not None else self.txt
116 return [1.0, 0.0, 0.0, 1.0, self.word_tx(word, TD_offset), 0.0]
117
118 def word_tx(self, word: str, TD_offset: float = 0.0) -> float:
119 """Horizontal text displacement for any word according this text state"""
120 return (
121 (self.font_size * ((self.font.word_width(word) - TD_offset) / 1000.0))
122 + self.Tc
123 + word.count(" ") * self.Tw
124 ) * (self.Tz / 100.0)
125
126 @staticmethod
127 def to_dict(inst: "TextStateParams") -> Dict[str, Any]:
128 """Dataclass to dict for json.dumps serialization"""
129 return {k: getattr(inst, k) for k in inst.__dataclass_fields__ if k != "font"}