Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/generic/_appearance

1import re

2from typing import Any, Optional, Union, cast

4from .._cmap import _default_fonts_space_width, build_char_map_from_dict

5from .._utils import logger_warning

6from ..constants import AnnotationDictionaryAttributes, FieldDictionaryAttributes

7from ..generic import (

8 DecodedStreamObject,

9 DictionaryObject,

10 NameObject,

11 NumberObject,

12 RectangleObject,

13)

14from ..generic._base import ByteStringObject, TextStringObject, is_null_or_none

16DEFAULT_FONT_SIZE_IN_MULTILINE = 12

19class TextStreamAppearance(DecodedStreamObject):

20 """

21 A class representing the appearance stream for a text-based form field.

23 This class generates the content stream (the `ap_stream_data`) that dictates

24 how text is rendered within a form field's bounding box. It handles properties

25 like font, font size, color, multiline text, and text selection highlighting.

26 """

28 def _generate_appearance_stream_data(

29 self,

30 text: str = "",

31 selection: Optional[list[str]] = None,

32 rectangle: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0),

33 font_glyph_byte_map: Optional[dict[str, bytes]] = None,

34 font_name: str = "/Helv",

35 font_size: float = 0.0,

36 font_color: str = "0 g",

37 is_multiline: bool = False

38 ) -> bytes:

39 """

40 Generates the raw bytes of the PDF appearance stream for a text field.

42 This private method assembles the PDF content stream operators to draw

43 the provided text within the specified rectangle. It handles text positioning,

44 font application, color, and special formatting like selected text.

46 Args:

47 text: The text to be rendered in the form field.

48 selection: An optional list of strings that should be highlighted as selected.

49 font_glyph_byte_map: An optional dictionary mapping characters to their

50 byte representation for glyph encoding.

51 rect: The bounding box of the form field. Can be a `RectangleObject`

52 or a tuple of four floats (x1, y1, x2, y2).

53 font_name: The name of the font resource to use (e.g., "/Helv").

54 font_size: The font size. If 0, it is automatically calculated

55 based on whether the field is multiline or not.

56 font_color: The color to apply to the font, represented as a PDF

57 graphics state string (e.g., "0 g" for black).

58 is_multiline: A boolean indicating if the text field is multiline.

60 Returns:

61 A byte string containing the PDF content stream data.

63 """

64 font_glyph_byte_map = font_glyph_byte_map or {}

65 if isinstance(rectangle, tuple):

66 rectangle = RectangleObject(rectangle)

68 # If font_size is 0, apply the logic for multiline or large-as-possible font

69 if font_size == 0:

70 if is_multiline:

71 font_size = DEFAULT_FONT_SIZE_IN_MULTILINE

72 else:

73 font_size = rectangle.height - 2

75 # Set the vertical offset

76 y_offset = rectangle.height - 1 - font_size

77 default_appearance = f"{font_name} {font_size} Tf {font_color}"

79 ap_stream = (

80 f"q\n/Tx BMC \nq\n1 1 {rectangle.width - 1} {rectangle.height - 1} "

81 f"re\nW\nBT\n{default_appearance}\n"

82 ).encode()

84 for line_number, line in enumerate(text.replace("\n", "\r").split("\r")):

85 if selection and line in selection:

86 # Might be improved, but cannot find how to get fill working => replaced with lined box

87 ap_stream += (

88 f"1 {y_offset - (line_number * font_size * 1.4) - 1} {rectangle.width - 2} {font_size + 2} re\n"

89 f"0.5 0.5 0.5 rg s\n{default_appearance}\n"

90 ).encode()

91 if line_number == 0:

92 ap_stream += f"2 {y_offset} Td\n".encode()

93 else:

94 # Td is a relative translation

95 ap_stream += f"0 {-font_size * 1.4} Td\n".encode()

96 encoded_line: list[bytes] = [

97 font_glyph_byte_map.get(c, c.encode("utf-16-be")) for c in line

98 ]

99 if any(len(c) >= 2 for c in encoded_line):

100 ap_stream += b"<" + (b"".join(encoded_line)).hex().encode() + b"> Tj\n"

101 else:

102 ap_stream += b"(" + b"".join(encoded_line) + b") Tj\n"

103 ap_stream += b"ET\nQ\nEMC\nQ\n"

104 return ap_stream

105

106 def __init__(

107 self,

108 text: str = "",

109 selection: Optional[list[str]] = None,

110 rectangle: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0),

111 font_resource: Optional[DictionaryObject] = None,

112 font_name: str = "/Helv",

113 font_size: float = 0.0,

114 font_color: str = "0 g",

115 is_multiline: bool = False

116 ) -> None:

117 """

118 Initializes a TextStreamAppearance object.

119

120 This constructor creates a new PDF stream object configured as an XObject

121 of subtype Form. It uses the `_appearance_stream_data` method to generate

122 the content for the stream.

123

124 Args:

125 text: The text to be rendered in the form field.

126 selection: An optional list of strings that should be highlighted as selected.

127 rect: The bounding box of the form field. Can be a `RectangleObject`

128 or a tuple of four floats (x1, y1, x2, y2).

129 font_resource: An optional variable that represents a PDF font dictionary.

130 font_name: The name of the font resource, e.g., "/Helv".

131 font_size: The font size. If 0, it's auto-calculated.

132 font_color: The font color string.

133 is_multiline: A boolean indicating if the text field is multiline.

134

135 """

136 super().__init__()

137

138 # If a font resource was added, get the font character map

139 if font_resource:

140 font_resource = cast(DictionaryObject, font_resource.get_object())

141 _font_subtype, _, font_encoding, font_map = build_char_map_from_dict(

142 200, font_resource

143 )

144 try: # remove width stored in -1 key

145 del font_map[-1]

146 except KeyError:

147 pass

148 font_glyph_byte_map: dict[str, bytes]

149 if isinstance(font_encoding, str):

150 font_glyph_byte_map = {

151 v: k.encode(font_encoding) for k, v in font_map.items()

152 }

153 else:

154 font_glyph_byte_map = {v: bytes((k,)) for k, v in font_encoding.items()}

155 font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()}

156 for key, value in font_map.items():

157 font_glyph_byte_map[value] = font_encoding_rev.get(key, key)

158 else:

159 logger_warning(f"Font dictionary for {font_name} not found.", __name__)

160 font_glyph_byte_map = {}

161

162 ap_stream_data = self._generate_appearance_stream_data(

163 text,

164 selection,

165 rectangle,

166 font_glyph_byte_map,

167 font_name,

168 font_size,

169 font_color,

170 is_multiline

171 )

172

173 self[NameObject("/Type")] = NameObject("/XObject")

174 self[NameObject("/Subtype")] = NameObject("/Form")

175 self[NameObject("/BBox")] = RectangleObject(rectangle)

176 self.set_data(ByteStringObject(ap_stream_data))

177 self[NameObject("/Length")] = NumberObject(len(ap_stream_data))

178 # Update Resources with font information if necessary

179 if font_resource is not None:

180 self[NameObject("/Resources")] = DictionaryObject({

181 NameObject("/Font"): DictionaryObject({

182 NameObject(font_name): getattr(font_resource, "indirect_reference", font_resource)

183 })

184 })

185

186 @classmethod

187 def from_text_annotation(

188 cls,

189 acro_form: DictionaryObject, # _root_object[CatalogDictionary.ACRO_FORM])

190 field: DictionaryObject,

191 annotation: DictionaryObject,

192 user_font_name: str = "",

193 user_font_size: float = -1,

194 ) -> "TextStreamAppearance":

195 """

196 Creates a TextStreamAppearance object from a text field annotation.

197

198 This class method is a factory for creating a `TextStreamAppearance`

199 instance by extracting all necessary information (bounding box, font,

200 text content, etc.) from the PDF field and annotation dictionaries.

201 It respects inheritance for properties like default appearance (`/DA`).

202

203 Args:

204 acro_form: The root AcroForm dictionary from the PDF catalog.

205 field: The field dictionary object.

206 annotation: The widget annotation dictionary object associated with the field.

207 user_font_name: An optional user-provided font name to override the

208 default. Defaults to an empty string.

209 user_font_size: An optional user-provided font size to override the

210 default. A value of -1 indicates no override.

211

212 Returns:

213 A new `TextStreamAppearance` instance configured for the given field.

214

215 """

216 # Calculate rectangle dimensions

217 _rectangle = cast(RectangleObject, annotation[AnnotationDictionaryAttributes.Rect])

218 rectangle = RectangleObject((0, 0, abs(_rectangle[2] - _rectangle[0]), abs(_rectangle[3] - _rectangle[1])))

219

220 # Get default appearance dictionary from annotation

221 default_appearance = annotation.get_inherited(

222 AnnotationDictionaryAttributes.DA,

223 acro_form.get(AnnotationDictionaryAttributes.DA, None),

224 )

225 if not default_appearance:

226 # Create a default appearance if none was found in the annotation

227 default_appearance = TextStringObject("/Helv 0 Tf 0 g")

228 else:

229 default_appearance = default_appearance.get_object()

230

231 # Derive font name, size and color from the default appearance. Also set

232 # user-provided font name and font size in the default appearance, if given.

233 # For a font name, this presumes that we can find an associated font resource

234 # dictionary. Uses the variable font_properties as an intermediate.

235 # As per the PDF spec:

236 # "At a minimum, the string [that is, default_appearance] shall include a Tf (text

237 # font) operator along with its two operands, font and size" (Section 12.7.4.3

238 # "Variable text" of the PDF 2.0 specification).

239 font_properties = [prop for prop in re.split(r"\s", default_appearance) if prop]

240 font_name = font_properties.pop(font_properties.index("Tf") - 2)

241 font_size = float(font_properties.pop(font_properties.index("Tf") - 1))

242 font_properties.remove("Tf")

243 font_color = " ".join(font_properties)

244 # Determine the font name to use, prioritizing the user's input

245 if user_font_name:

246 font_name = user_font_name

247 # Determine the font size to use, prioritizing the user's input

248 if user_font_size > 0:

249 font_size = user_font_size

250

251 # Try to find a resource dictionary for the font

252 document_resources: Any = cast(

253 DictionaryObject,

254 cast(

255 DictionaryObject,

256 annotation.get_inherited(

257 "/DR",

258 acro_form.get("/DR", DictionaryObject()),

259 ),

260 ).get_object(),

261 )

262 document_font_resources = document_resources.get("/Font", DictionaryObject()).get_object()

263 # _default_fonts_space_width keys is the list of Standard fonts

264 if font_name not in document_font_resources and font_name not in _default_fonts_space_width:

265 # ...or AcroForm dictionary

266 document_resources = cast(

267 dict[Any, Any],

268 acro_form.get("/DR", {}),

269 )

270 document_font_resources = document_resources.get_object().get("/Font", DictionaryObject()).get_object()

271 font_resource = document_font_resources.get(font_name, None)

272 if not is_null_or_none(font_resource):

273 font_resource = cast(DictionaryObject, font_resource.get_object())

274

275 # Retrieve field text, selected values and formatting information

276 is_multiline = False

277 field_flags = field.get(FieldDictionaryAttributes.Ff, 0)

278 if field_flags & FieldDictionaryAttributes.FfBits.Multiline:

279 is_multiline = True

280 if (

281 field.get(FieldDictionaryAttributes.FT, "/Tx") == "/Ch" and

282 field_flags & FieldDictionaryAttributes.FfBits.Combo == 0

283 ):

284 text = "\n".join(annotation.get_inherited(FieldDictionaryAttributes.Opt, []))

285 selection = field.get("/V", [])

286 if not isinstance(selection, list):

287 selection = [selection]

288 else: # /Tx

289 text = field.get("/V", "")

290 selection = []

291

292 # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)

293 text = text.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")

294

295 # Create the TextStreamAppearance instance

296 new_appearance_stream = cls(

297 text,

298 selection,

299 rectangle,

300 font_resource,

301 font_name,

302 font_size,

303 font_color,

304 is_multiline

305 )

306 if AnnotationDictionaryAttributes.AP in annotation:

307 for key, value in (

308 cast(DictionaryObject, annotation[AnnotationDictionaryAttributes.AP]).get("/N", {}).items()

309 ):

310 if key not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:

311 new_appearance_stream[key] = value

312

313 return new_appearance_stream

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/generic/_appearance_stream.py: 13%

100 statements