1import re
2from typing import Any, Optional, Union, cast
3
4from .._cmap import _default_fonts_space_width, build_char_map_from_dict
5from .._utils import logger_warning
6from ..constants import AnnotationDictionaryAttributes, FieldDictionaryAttributes
7from ..generic import (
8 DecodedStreamObject,
9 DictionaryObject,
10 NameObject,
11 NumberObject,
12 RectangleObject,
13)
14from ..generic._base import ByteStringObject, TextStringObject, is_null_or_none
15
16DEFAULT_FONT_SIZE_IN_MULTILINE = 12
17
18
19class TextStreamAppearance(DecodedStreamObject):
20 """
21 A class representing the appearance stream for a text-based form field.
22
23 This class generates the content stream (the `ap_stream_data`) that dictates
24 how text is rendered within a form field's bounding box. It handles properties
25 like font, font size, color, multiline text, and text selection highlighting.
26 """
27
28 def _generate_appearance_stream_data(
29 self,
30 text: str = "",
31 selection: Optional[list[str]] = None,
32 rectangle: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0),
33 font_glyph_byte_map: Optional[dict[str, bytes]] = None,
34 font_name: str = "/Helv",
35 font_size: float = 0.0,
36 font_color: str = "0 g",
37 is_multiline: bool = False
38 ) -> bytes:
39 """
40 Generates the raw bytes of the PDF appearance stream for a text field.
41
42 This private method assembles the PDF content stream operators to draw
43 the provided text within the specified rectangle. It handles text positioning,
44 font application, color, and special formatting like selected text.
45
46 Args:
47 text: The text to be rendered in the form field.
48 selection: An optional list of strings that should be highlighted as selected.
49 font_glyph_byte_map: An optional dictionary mapping characters to their
50 byte representation for glyph encoding.
51 rect: The bounding box of the form field. Can be a `RectangleObject`
52 or a tuple of four floats (x1, y1, x2, y2).
53 font_name: The name of the font resource to use (e.g., "/Helv").
54 font_size: The font size. If 0, it is automatically calculated
55 based on whether the field is multiline or not.
56 font_color: The color to apply to the font, represented as a PDF
57 graphics state string (e.g., "0 g" for black).
58 is_multiline: A boolean indicating if the text field is multiline.
59
60 Returns:
61 A byte string containing the PDF content stream data.
62
63 """
64 font_glyph_byte_map = font_glyph_byte_map or {}
65 if isinstance(rectangle, tuple):
66 rectangle = RectangleObject(rectangle)
67
68 # If font_size is 0, apply the logic for multiline or large-as-possible font
69 if font_size == 0:
70 if is_multiline:
71 font_size = DEFAULT_FONT_SIZE_IN_MULTILINE
72 else:
73 font_size = rectangle.height - 2
74
75 # Set the vertical offset
76 y_offset = rectangle.height - 1 - font_size
77 default_appearance = f"{font_name} {font_size} Tf {font_color}"
78
79 ap_stream = (
80 f"q\n/Tx BMC \nq\n1 1 {rectangle.width - 1} {rectangle.height - 1} "
81 f"re\nW\nBT\n{default_appearance}\n"
82 ).encode()
83
84 for line_number, line in enumerate(text.replace("\n", "\r").split("\r")):
85 if selection and line in selection:
86 # Might be improved, but cannot find how to get fill working => replaced with lined box
87 ap_stream += (
88 f"1 {y_offset - (line_number * font_size * 1.4) - 1} {rectangle.width - 2} {font_size + 2} re\n"
89 f"0.5 0.5 0.5 rg s\n{default_appearance}\n"
90 ).encode()
91 if line_number == 0:
92 ap_stream += f"2 {y_offset} Td\n".encode()
93 else:
94 # Td is a relative translation
95 ap_stream += f"0 {-font_size * 1.4} Td\n".encode()
96 encoded_line: list[bytes] = [
97 font_glyph_byte_map.get(c, c.encode("utf-16-be")) for c in line
98 ]
99 if any(len(c) >= 2 for c in encoded_line):
100 ap_stream += b"<" + (b"".join(encoded_line)).hex().encode() + b"> Tj\n"
101 else:
102 ap_stream += b"(" + b"".join(encoded_line) + b") Tj\n"
103 ap_stream += b"ET\nQ\nEMC\nQ\n"
104 return ap_stream
105
106 def __init__(
107 self,
108 text: str = "",
109 selection: Optional[list[str]] = None,
110 rectangle: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0),
111 font_resource: Optional[DictionaryObject] = None,
112 font_name: str = "/Helv",
113 font_size: float = 0.0,
114 font_color: str = "0 g",
115 is_multiline: bool = False
116 ) -> None:
117 """
118 Initializes a TextStreamAppearance object.
119
120 This constructor creates a new PDF stream object configured as an XObject
121 of subtype Form. It uses the `_appearance_stream_data` method to generate
122 the content for the stream.
123
124 Args:
125 text: The text to be rendered in the form field.
126 selection: An optional list of strings that should be highlighted as selected.
127 rect: The bounding box of the form field. Can be a `RectangleObject`
128 or a tuple of four floats (x1, y1, x2, y2).
129 font_resource: An optional variable that represents a PDF font dictionary.
130 font_name: The name of the font resource, e.g., "/Helv".
131 font_size: The font size. If 0, it's auto-calculated.
132 font_color: The font color string.
133 is_multiline: A boolean indicating if the text field is multiline.
134
135 """
136 super().__init__()
137
138 # If a font resource was added, get the font character map
139 if font_resource:
140 font_resource = cast(DictionaryObject, font_resource.get_object())
141 _font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
142 200, font_resource
143 )
144 try: # remove width stored in -1 key
145 del font_map[-1]
146 except KeyError:
147 pass
148 font_glyph_byte_map: dict[str, bytes]
149 if isinstance(font_encoding, str):
150 font_glyph_byte_map = {
151 v: k.encode(font_encoding) for k, v in font_map.items()
152 }
153 else:
154 font_glyph_byte_map = {v: bytes((k,)) for k, v in font_encoding.items()}
155 font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
156 for key, value in font_map.items():
157 font_glyph_byte_map[value] = font_encoding_rev.get(key, key)
158 else:
159 logger_warning(f"Font dictionary for {font_name} not found.", __name__)
160 font_glyph_byte_map = {}
161
162 ap_stream_data = self._generate_appearance_stream_data(
163 text,
164 selection,
165 rectangle,
166 font_glyph_byte_map,
167 font_name,
168 font_size,
169 font_color,
170 is_multiline
171 )
172
173 self[NameObject("/Type")] = NameObject("/XObject")
174 self[NameObject("/Subtype")] = NameObject("/Form")
175 self[NameObject("/BBox")] = RectangleObject(rectangle)
176 self.set_data(ByteStringObject(ap_stream_data))
177 self[NameObject("/Length")] = NumberObject(len(ap_stream_data))
178 # Update Resources with font information if necessary
179 if font_resource is not None:
180 self[NameObject("/Resources")] = DictionaryObject({
181 NameObject("/Font"): DictionaryObject({
182 NameObject(font_name): getattr(font_resource, "indirect_reference", font_resource)
183 })
184 })
185
186 @classmethod
187 def from_text_annotation(
188 cls,
189 acro_form: DictionaryObject, # _root_object[CatalogDictionary.ACRO_FORM])
190 field: DictionaryObject,
191 annotation: DictionaryObject,
192 user_font_name: str = "",
193 user_font_size: float = -1,
194 ) -> "TextStreamAppearance":
195 """
196 Creates a TextStreamAppearance object from a text field annotation.
197
198 This class method is a factory for creating a `TextStreamAppearance`
199 instance by extracting all necessary information (bounding box, font,
200 text content, etc.) from the PDF field and annotation dictionaries.
201 It respects inheritance for properties like default appearance (`/DA`).
202
203 Args:
204 acro_form: The root AcroForm dictionary from the PDF catalog.
205 field: The field dictionary object.
206 annotation: The widget annotation dictionary object associated with the field.
207 user_font_name: An optional user-provided font name to override the
208 default. Defaults to an empty string.
209 user_font_size: An optional user-provided font size to override the
210 default. A value of -1 indicates no override.
211
212 Returns:
213 A new `TextStreamAppearance` instance configured for the given field.
214
215 """
216 # Calculate rectangle dimensions
217 _rectangle = cast(RectangleObject, annotation[AnnotationDictionaryAttributes.Rect])
218 rectangle = RectangleObject((0, 0, abs(_rectangle[2] - _rectangle[0]), abs(_rectangle[3] - _rectangle[1])))
219
220 # Get default appearance dictionary from annotation
221 default_appearance = annotation.get_inherited(
222 AnnotationDictionaryAttributes.DA,
223 acro_form.get(AnnotationDictionaryAttributes.DA, None),
224 )
225 if not default_appearance:
226 # Create a default appearance if none was found in the annotation
227 default_appearance = TextStringObject("/Helv 0 Tf 0 g")
228 else:
229 default_appearance = default_appearance.get_object()
230
231 # Derive font name, size and color from the default appearance. Also set
232 # user-provided font name and font size in the default appearance, if given.
233 # For a font name, this presumes that we can find an associated font resource
234 # dictionary. Uses the variable font_properties as an intermediate.
235 # As per the PDF spec:
236 # "At a minimum, the string [that is, default_appearance] shall include a Tf (text
237 # font) operator along with its two operands, font and size" (Section 12.7.4.3
238 # "Variable text" of the PDF 2.0 specification).
239 font_properties = [prop for prop in re.split(r"\s", default_appearance) if prop]
240 font_name = font_properties.pop(font_properties.index("Tf") - 2)
241 font_size = float(font_properties.pop(font_properties.index("Tf") - 1))
242 font_properties.remove("Tf")
243 font_color = " ".join(font_properties)
244 # Determine the font name to use, prioritizing the user's input
245 if user_font_name:
246 font_name = user_font_name
247 # Determine the font size to use, prioritizing the user's input
248 if user_font_size > 0:
249 font_size = user_font_size
250
251 # Try to find a resource dictionary for the font
252 document_resources: Any = cast(
253 DictionaryObject,
254 cast(
255 DictionaryObject,
256 annotation.get_inherited(
257 "/DR",
258 acro_form.get("/DR", DictionaryObject()),
259 ),
260 ).get_object(),
261 )
262 document_font_resources = document_resources.get("/Font", DictionaryObject()).get_object()
263 # _default_fonts_space_width keys is the list of Standard fonts
264 if font_name not in document_font_resources and font_name not in _default_fonts_space_width:
265 # ...or AcroForm dictionary
266 document_resources = cast(
267 dict[Any, Any],
268 acro_form.get("/DR", {}),
269 )
270 document_font_resources = document_resources.get_object().get("/Font", DictionaryObject()).get_object()
271 font_resource = document_font_resources.get(font_name, None)
272 if not is_null_or_none(font_resource):
273 font_resource = cast(DictionaryObject, font_resource.get_object())
274
275 # Retrieve field text, selected values and formatting information
276 is_multiline = False
277 field_flags = field.get(FieldDictionaryAttributes.Ff, 0)
278 if field_flags & FieldDictionaryAttributes.FfBits.Multiline:
279 is_multiline = True
280 if (
281 field.get(FieldDictionaryAttributes.FT, "/Tx") == "/Ch" and
282 field_flags & FieldDictionaryAttributes.FfBits.Combo == 0
283 ):
284 text = "\n".join(annotation.get_inherited(FieldDictionaryAttributes.Opt, []))
285 selection = field.get("/V", [])
286 if not isinstance(selection, list):
287 selection = [selection]
288 else: # /Tx
289 text = field.get("/V", "")
290 selection = []
291
292 # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
293 text = text.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")
294
295 # Create the TextStreamAppearance instance
296 new_appearance_stream = cls(
297 text,
298 selection,
299 rectangle,
300 font_resource,
301 font_name,
302 font_size,
303 font_color,
304 is_multiline
305 )
306 if AnnotationDictionaryAttributes.AP in annotation:
307 for key, value in (
308 cast(DictionaryObject, annotation[AnnotationDictionaryAttributes.AP]).get("/N", {}).items()
309 ):
310 if key not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:
311 new_appearance_stream[key] = value
312
313 return new_appearance_stream