Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/generic/_appearance_stream.py: 13%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

100 statements  

1import re 

2from typing import Any, Optional, Union, cast 

3 

4from .._cmap import _default_fonts_space_width, build_char_map_from_dict 

5from .._utils import logger_warning 

6from ..constants import AnnotationDictionaryAttributes, FieldDictionaryAttributes 

7from ..generic import ( 

8 DecodedStreamObject, 

9 DictionaryObject, 

10 NameObject, 

11 NumberObject, 

12 RectangleObject, 

13) 

14from ..generic._base import ByteStringObject, TextStringObject, is_null_or_none 

15 

16DEFAULT_FONT_SIZE_IN_MULTILINE = 12 

17 

18 

19class TextStreamAppearance(DecodedStreamObject): 

20 """ 

21 A class representing the appearance stream for a text-based form field. 

22 

23 This class generates the content stream (the `ap_stream_data`) that dictates 

24 how text is rendered within a form field's bounding box. It handles properties 

25 like font, font size, color, multiline text, and text selection highlighting. 

26 """ 

27 

28 def _generate_appearance_stream_data( 

29 self, 

30 text: str = "", 

31 selection: Optional[list[str]] = None, 

32 rectangle: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0), 

33 font_glyph_byte_map: Optional[dict[str, bytes]] = None, 

34 font_name: str = "/Helv", 

35 font_size: float = 0.0, 

36 font_color: str = "0 g", 

37 is_multiline: bool = False 

38 ) -> bytes: 

39 """ 

40 Generates the raw bytes of the PDF appearance stream for a text field. 

41 

42 This private method assembles the PDF content stream operators to draw 

43 the provided text within the specified rectangle. It handles text positioning, 

44 font application, color, and special formatting like selected text. 

45 

46 Args: 

47 text: The text to be rendered in the form field. 

48 selection: An optional list of strings that should be highlighted as selected. 

49 font_glyph_byte_map: An optional dictionary mapping characters to their 

50 byte representation for glyph encoding. 

51 rect: The bounding box of the form field. Can be a `RectangleObject` 

52 or a tuple of four floats (x1, y1, x2, y2). 

53 font_name: The name of the font resource to use (e.g., "/Helv"). 

54 font_size: The font size. If 0, it is automatically calculated 

55 based on whether the field is multiline or not. 

56 font_color: The color to apply to the font, represented as a PDF 

57 graphics state string (e.g., "0 g" for black). 

58 is_multiline: A boolean indicating if the text field is multiline. 

59 

60 Returns: 

61 A byte string containing the PDF content stream data. 

62 

63 """ 

64 font_glyph_byte_map = font_glyph_byte_map or {} 

65 if isinstance(rectangle, tuple): 

66 rectangle = RectangleObject(rectangle) 

67 

68 # If font_size is 0, apply the logic for multiline or large-as-possible font 

69 if font_size == 0: 

70 if is_multiline: 

71 font_size = DEFAULT_FONT_SIZE_IN_MULTILINE 

72 else: 

73 font_size = rectangle.height - 2 

74 

75 # Set the vertical offset 

76 y_offset = rectangle.height - 1 - font_size 

77 default_appearance = f"{font_name} {font_size} Tf {font_color}" 

78 

79 ap_stream = ( 

80 f"q\n/Tx BMC \nq\n1 1 {rectangle.width - 1} {rectangle.height - 1} " 

81 f"re\nW\nBT\n{default_appearance}\n" 

82 ).encode() 

83 

84 for line_number, line in enumerate(text.replace("\n", "\r").split("\r")): 

85 if selection and line in selection: 

86 # Might be improved, but cannot find how to get fill working => replaced with lined box 

87 ap_stream += ( 

88 f"1 {y_offset - (line_number * font_size * 1.4) - 1} {rectangle.width - 2} {font_size + 2} re\n" 

89 f"0.5 0.5 0.5 rg s\n{default_appearance}\n" 

90 ).encode() 

91 if line_number == 0: 

92 ap_stream += f"2 {y_offset} Td\n".encode() 

93 else: 

94 # Td is a relative translation 

95 ap_stream += f"0 {-font_size * 1.4} Td\n".encode() 

96 encoded_line: list[bytes] = [ 

97 font_glyph_byte_map.get(c, c.encode("utf-16-be")) for c in line 

98 ] 

99 if any(len(c) >= 2 for c in encoded_line): 

100 ap_stream += b"<" + (b"".join(encoded_line)).hex().encode() + b"> Tj\n" 

101 else: 

102 ap_stream += b"(" + b"".join(encoded_line) + b") Tj\n" 

103 ap_stream += b"ET\nQ\nEMC\nQ\n" 

104 return ap_stream 

105 

106 def __init__( 

107 self, 

108 text: str = "", 

109 selection: Optional[list[str]] = None, 

110 rectangle: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0), 

111 font_resource: Optional[DictionaryObject] = None, 

112 font_name: str = "/Helv", 

113 font_size: float = 0.0, 

114 font_color: str = "0 g", 

115 is_multiline: bool = False 

116 ) -> None: 

117 """ 

118 Initializes a TextStreamAppearance object. 

119 

120 This constructor creates a new PDF stream object configured as an XObject 

121 of subtype Form. It uses the `_appearance_stream_data` method to generate 

122 the content for the stream. 

123 

124 Args: 

125 text: The text to be rendered in the form field. 

126 selection: An optional list of strings that should be highlighted as selected. 

127 rect: The bounding box of the form field. Can be a `RectangleObject` 

128 or a tuple of four floats (x1, y1, x2, y2). 

129 font_resource: An optional variable that represents a PDF font dictionary. 

130 font_name: The name of the font resource, e.g., "/Helv". 

131 font_size: The font size. If 0, it's auto-calculated. 

132 font_color: The font color string. 

133 is_multiline: A boolean indicating if the text field is multiline. 

134 

135 """ 

136 super().__init__() 

137 

138 # If a font resource was added, get the font character map 

139 if font_resource: 

140 font_resource = cast(DictionaryObject, font_resource.get_object()) 

141 _font_subtype, _, font_encoding, font_map = build_char_map_from_dict( 

142 200, font_resource 

143 ) 

144 try: # remove width stored in -1 key 

145 del font_map[-1] 

146 except KeyError: 

147 pass 

148 font_glyph_byte_map: dict[str, bytes] 

149 if isinstance(font_encoding, str): 

150 font_glyph_byte_map = { 

151 v: k.encode(font_encoding) for k, v in font_map.items() 

152 } 

153 else: 

154 font_glyph_byte_map = {v: bytes((k,)) for k, v in font_encoding.items()} 

155 font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()} 

156 for key, value in font_map.items(): 

157 font_glyph_byte_map[value] = font_encoding_rev.get(key, key) 

158 else: 

159 logger_warning(f"Font dictionary for {font_name} not found.", __name__) 

160 font_glyph_byte_map = {} 

161 

162 ap_stream_data = self._generate_appearance_stream_data( 

163 text, 

164 selection, 

165 rectangle, 

166 font_glyph_byte_map, 

167 font_name, 

168 font_size, 

169 font_color, 

170 is_multiline 

171 ) 

172 

173 self[NameObject("/Type")] = NameObject("/XObject") 

174 self[NameObject("/Subtype")] = NameObject("/Form") 

175 self[NameObject("/BBox")] = RectangleObject(rectangle) 

176 self.set_data(ByteStringObject(ap_stream_data)) 

177 self[NameObject("/Length")] = NumberObject(len(ap_stream_data)) 

178 # Update Resources with font information if necessary 

179 if font_resource is not None: 

180 self[NameObject("/Resources")] = DictionaryObject({ 

181 NameObject("/Font"): DictionaryObject({ 

182 NameObject(font_name): getattr(font_resource, "indirect_reference", font_resource) 

183 }) 

184 }) 

185 

186 @classmethod 

187 def from_text_annotation( 

188 cls, 

189 acro_form: DictionaryObject, # _root_object[CatalogDictionary.ACRO_FORM]) 

190 field: DictionaryObject, 

191 annotation: DictionaryObject, 

192 user_font_name: str = "", 

193 user_font_size: float = -1, 

194 ) -> "TextStreamAppearance": 

195 """ 

196 Creates a TextStreamAppearance object from a text field annotation. 

197 

198 This class method is a factory for creating a `TextStreamAppearance` 

199 instance by extracting all necessary information (bounding box, font, 

200 text content, etc.) from the PDF field and annotation dictionaries. 

201 It respects inheritance for properties like default appearance (`/DA`). 

202 

203 Args: 

204 acro_form: The root AcroForm dictionary from the PDF catalog. 

205 field: The field dictionary object. 

206 annotation: The widget annotation dictionary object associated with the field. 

207 user_font_name: An optional user-provided font name to override the 

208 default. Defaults to an empty string. 

209 user_font_size: An optional user-provided font size to override the 

210 default. A value of -1 indicates no override. 

211 

212 Returns: 

213 A new `TextStreamAppearance` instance configured for the given field. 

214 

215 """ 

216 # Calculate rectangle dimensions 

217 _rectangle = cast(RectangleObject, annotation[AnnotationDictionaryAttributes.Rect]) 

218 rectangle = RectangleObject((0, 0, abs(_rectangle[2] - _rectangle[0]), abs(_rectangle[3] - _rectangle[1]))) 

219 

220 # Get default appearance dictionary from annotation 

221 default_appearance = annotation.get_inherited( 

222 AnnotationDictionaryAttributes.DA, 

223 acro_form.get(AnnotationDictionaryAttributes.DA, None), 

224 ) 

225 if not default_appearance: 

226 # Create a default appearance if none was found in the annotation 

227 default_appearance = TextStringObject("/Helv 0 Tf 0 g") 

228 else: 

229 default_appearance = default_appearance.get_object() 

230 

231 # Derive font name, size and color from the default appearance. Also set 

232 # user-provided font name and font size in the default appearance, if given. 

233 # For a font name, this presumes that we can find an associated font resource 

234 # dictionary. Uses the variable font_properties as an intermediate. 

235 # As per the PDF spec: 

236 # "At a minimum, the string [that is, default_appearance] shall include a Tf (text 

237 # font) operator along with its two operands, font and size" (Section 12.7.4.3 

238 # "Variable text" of the PDF 2.0 specification). 

239 font_properties = [prop for prop in re.split(r"\s", default_appearance) if prop] 

240 font_name = font_properties.pop(font_properties.index("Tf") - 2) 

241 font_size = float(font_properties.pop(font_properties.index("Tf") - 1)) 

242 font_properties.remove("Tf") 

243 font_color = " ".join(font_properties) 

244 # Determine the font name to use, prioritizing the user's input 

245 if user_font_name: 

246 font_name = user_font_name 

247 # Determine the font size to use, prioritizing the user's input 

248 if user_font_size > 0: 

249 font_size = user_font_size 

250 

251 # Try to find a resource dictionary for the font 

252 document_resources: Any = cast( 

253 DictionaryObject, 

254 cast( 

255 DictionaryObject, 

256 annotation.get_inherited( 

257 "/DR", 

258 acro_form.get("/DR", DictionaryObject()), 

259 ), 

260 ).get_object(), 

261 ) 

262 document_font_resources = document_resources.get("/Font", DictionaryObject()).get_object() 

263 # _default_fonts_space_width keys is the list of Standard fonts 

264 if font_name not in document_font_resources and font_name not in _default_fonts_space_width: 

265 # ...or AcroForm dictionary 

266 document_resources = cast( 

267 dict[Any, Any], 

268 acro_form.get("/DR", {}), 

269 ) 

270 document_font_resources = document_resources.get_object().get("/Font", DictionaryObject()).get_object() 

271 font_resource = document_font_resources.get(font_name, None) 

272 if not is_null_or_none(font_resource): 

273 font_resource = cast(DictionaryObject, font_resource.get_object()) 

274 

275 # Retrieve field text, selected values and formatting information 

276 is_multiline = False 

277 field_flags = field.get(FieldDictionaryAttributes.Ff, 0) 

278 if field_flags & FieldDictionaryAttributes.FfBits.Multiline: 

279 is_multiline = True 

280 if ( 

281 field.get(FieldDictionaryAttributes.FT, "/Tx") == "/Ch" and 

282 field_flags & FieldDictionaryAttributes.FfBits.Combo == 0 

283 ): 

284 text = "\n".join(annotation.get_inherited(FieldDictionaryAttributes.Opt, [])) 

285 selection = field.get("/V", []) 

286 if not isinstance(selection, list): 

287 selection = [selection] 

288 else: # /Tx 

289 text = field.get("/V", "") 

290 selection = [] 

291 

292 # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings) 

293 text = text.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)") 

294 

295 # Create the TextStreamAppearance instance 

296 new_appearance_stream = cls( 

297 text, 

298 selection, 

299 rectangle, 

300 font_resource, 

301 font_name, 

302 font_size, 

303 font_color, 

304 is_multiline 

305 ) 

306 if AnnotationDictionaryAttributes.AP in annotation: 

307 for key, value in ( 

308 cast(DictionaryObject, annotation[AnnotationDictionaryAttributes.AP]).get("/N", {}).items() 

309 ): 

310 if key not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}: 

311 new_appearance_stream[key] = value 

312 

313 return new_appearance_stream