Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pypdf/generic/_appearance_stream.py: 12%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

170 statements  

1import re 

2from enum import IntEnum 

3from typing import Any, Optional, Union, cast 

4 

5from .._cmap import build_char_map_from_dict 

6from .._codecs.core_fontmetrics import CORE_FONT_METRICS 

7from .._font import FontDescriptor 

8from .._utils import logger_warning 

9from ..constants import AnnotationDictionaryAttributes, FieldDictionaryAttributes 

10from ..generic import ( 

11 DecodedStreamObject, 

12 DictionaryObject, 

13 NameObject, 

14 NumberObject, 

15 RectangleObject, 

16) 

17from ..generic._base import ByteStringObject, TextStringObject, is_null_or_none 

18 

19DEFAULT_FONT_SIZE_IN_MULTILINE = 12 

20 

21 

22class TextAlignment(IntEnum): 

23 """Defines the alignment options for text within a form field's appearance stream.""" 

24 

25 LEFT = 0 

26 CENTER = 1 

27 RIGHT = 2 

28 

29 

30class TextStreamAppearance(DecodedStreamObject): 

31 """ 

32 A class representing the appearance stream for a text-based form field. 

33 

34 This class generates the content stream (the `ap_stream_data`) that dictates 

35 how text is rendered within a form field's bounding box. It handles properties 

36 like font, font size, color, multiline text, and text selection highlighting. 

37 """ 

38 

39 def _scale_text( 

40 self, 

41 font_descriptor: FontDescriptor, 

42 font_size: float, 

43 field_width: float, 

44 field_height: float, 

45 text: str, 

46 is_multiline: bool, 

47 min_font_size: float = 4.0, # Minimum font size to attempt 

48 font_size_step: float = 0.2 # How much to decrease font size by each step 

49 ) -> tuple[list[tuple[float, str]], float]: 

50 """ 

51 Takes a piece of text and scales it to field_width or field_height, given font_name 

52 and font_size. For multiline fields, adds newlines to wrap the text. 

53 

54 Args: 

55 font_descriptor: A FontDescriptor for the font to be used. 

56 font_size: The font size in points. 

57 field_width: The width of the field in which to fit the text. 

58 field_height: The height of the field in which to fit the text. 

59 text: The text to fit with the field. 

60 is_multiline: Whether to scale and wrap the text, or only to scale. 

61 min_font_size: The minimum font size at which to scale the text. 

62 font_size_step: The amount by which to decrement font size per step while scaling. 

63 

64 Returns: 

65 The text in the form of list of tuples, each tuple containing the length of a line 

66 and its contents, and the font_size for these lines and lengths. 

67 """ 

68 # Single line: 

69 if not is_multiline: 

70 test_width = font_descriptor.text_width(text) * font_size / 1000 

71 if test_width > field_width or font_size > field_height: 

72 new_font_size = font_size - font_size_step 

73 if new_font_size >= min_font_size: 

74 # Text overflows height; Retry with smaller font size. 

75 return self._scale_text( 

76 font_descriptor, 

77 round(new_font_size, 1), 

78 field_width, 

79 field_height, 

80 text, 

81 is_multiline, 

82 min_font_size, 

83 font_size_step 

84 ) 

85 return [(test_width, text)], font_size 

86 # Multiline: 

87 orig_text = text 

88 paragraphs = text.replace("\n", "\r").split("\r") 

89 wrapped_lines = [] 

90 current_line_words: list[str] = [] 

91 current_line_width: float = 0 

92 space_width = font_descriptor.text_width(" ") * font_size / 1000 

93 for paragraph in paragraphs: 

94 if not paragraph.strip(): 

95 wrapped_lines.append((0.0, "")) 

96 continue 

97 words = paragraph.split(" ") 

98 for i, word in enumerate(words): 

99 word_width = font_descriptor.text_width(word) * font_size / 1000 

100 test_width = current_line_width + word_width + (space_width if i else 0) 

101 if test_width > field_width and current_line_words: 

102 wrapped_lines.append((current_line_width, " ".join(current_line_words))) 

103 current_line_words = [word] 

104 current_line_width = word_width 

105 elif not current_line_words and word_width > field_width: 

106 wrapped_lines.append((word_width, word)) 

107 current_line_words = [] 

108 current_line_width = 0 

109 else: 

110 if current_line_words: 

111 current_line_width += space_width 

112 current_line_words.append(word) 

113 current_line_width += word_width 

114 if current_line_words: 

115 wrapped_lines.append((current_line_width, " ".join(current_line_words))) 

116 current_line_words = [] 

117 current_line_width = 0 

118 # Estimate total height. 

119 # Assumes line spacing of 1.4 

120 estimated_total_height = font_size + (len(wrapped_lines) - 1) * 1.4 * font_size 

121 if estimated_total_height > field_height: 

122 # Text overflows height; Retry with smaller font size. 

123 new_font_size = font_size - font_size_step 

124 if new_font_size >= min_font_size: 

125 return self._scale_text( 

126 font_descriptor, 

127 round(new_font_size, 1), 

128 field_width, 

129 field_height, 

130 orig_text, 

131 is_multiline, 

132 min_font_size, 

133 font_size_step 

134 ) 

135 return wrapped_lines, font_size 

136 

137 def _generate_appearance_stream_data( 

138 self, 

139 text: str = "", 

140 selection: Optional[list[str]] = None, 

141 rectangle: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0), 

142 font_descriptor: Optional[FontDescriptor] = None, 

143 font_glyph_byte_map: Optional[dict[str, bytes]] = None, 

144 font_name: str = "/Helv", 

145 font_size: float = 0.0, 

146 font_color: str = "0 g", 

147 is_multiline: bool = False, 

148 alignment: TextAlignment = TextAlignment.LEFT 

149 ) -> bytes: 

150 """ 

151 Generates the raw bytes of the PDF appearance stream for a text field. 

152 

153 This private method assembles the PDF content stream operators to draw 

154 the provided text within the specified rectangle. It handles text positioning, 

155 font application, color, and special formatting like selected text. 

156 

157 Args: 

158 text: The text to be rendered in the form field. 

159 selection: An optional list of strings that should be highlighted as selected. 

160 font_glyph_byte_map: An optional dictionary mapping characters to their 

161 byte representation for glyph encoding. 

162 rect: The bounding box of the form field. Can be a `RectangleObject` 

163 or a tuple of four floats (x1, y1, x2, y2). 

164 font_name: The name of the font resource to use (e.g., "/Helv"). 

165 font_size: The font size. If 0, it is automatically calculated 

166 based on whether the field is multiline or not. 

167 font_color: The color to apply to the font, represented as a PDF 

168 graphics state string (e.g., "0 g" for black). 

169 is_multiline: A boolean indicating if the text field is multiline. 

170 alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER. 

171 

172 Returns: 

173 A byte string containing the PDF content stream data. 

174 

175 """ 

176 font_glyph_byte_map = font_glyph_byte_map or {} 

177 if isinstance(rectangle, tuple): 

178 rectangle = RectangleObject(rectangle) 

179 font_descriptor = cast(FontDescriptor, font_descriptor) 

180 

181 # If font_size is 0, apply the logic for multiline or large-as-possible font 

182 if font_size == 0: 

183 if selection: # Don't wrap text when dealing with a /Ch field, in order to prevent problems 

184 is_multiline = False # with matching "selection" with "line" later on. 

185 if is_multiline: 

186 font_size = DEFAULT_FONT_SIZE_IN_MULTILINE 

187 else: 

188 font_size = rectangle.height - 2 

189 lines, font_size = self._scale_text( 

190 font_descriptor, 

191 font_size, 

192 rectangle.width - 3, # One point margin left and right, and an additional point because the first 

193 # offset takes one extra point (see below, "desired_abs_x_start") 

194 rectangle.height - 3, # One point margin for top and bottom, one point extra for the first line 

195 # (see y_offset) 

196 text, 

197 is_multiline, 

198 ) 

199 else: 

200 lines = [( 

201 font_descriptor.text_width(line) * font_size / 1000, 

202 line 

203 ) for line in text.replace("\n", "\r").split("\r")] 

204 

205 # Set the vertical offset 

206 y_offset = rectangle.height - 1 - font_size 

207 default_appearance = f"{font_name} {font_size} Tf {font_color}" 

208 

209 ap_stream = ( 

210 f"q\n/Tx BMC \nq\n1 1 {rectangle.width - 1} {rectangle.height - 1} " 

211 f"re\nW\nBT\n{default_appearance}\n" 

212 ).encode() 

213 current_x_pos: float = 0 # Initial virtual position within the text object. 

214 

215 for line_number, (line_width, line) in enumerate(lines): 

216 if selection and line in selection: 

217 # Might be improved, but cannot find how to get fill working => replaced with lined box 

218 ap_stream += ( 

219 f"1 {y_offset - (line_number * font_size * 1.4) - 1} {rectangle.width - 2} {font_size + 2} re\n" 

220 f"0.5 0.5 0.5 rg s\n{default_appearance}\n" 

221 ).encode() 

222 

223 # Calculate the desired absolute starting X for the current line 

224 desired_abs_x_start: float = 0 

225 if alignment == TextAlignment.RIGHT: 

226 desired_abs_x_start = rectangle.width - 2 - line_width 

227 elif alignment == TextAlignment.CENTER: 

228 desired_abs_x_start = (rectangle.width - line_width) / 2 

229 else: # Left aligned; default 

230 desired_abs_x_start = 2 

231 # Calculate x_rel_offset: how much to move from the current_x_pos 

232 # to reach the desired_abs_x_start. 

233 x_rel_offset = desired_abs_x_start - current_x_pos 

234 

235 # Y-offset: 

236 y_rel_offset: float = 0 

237 if line_number == 0: 

238 y_rel_offset = y_offset # Initial vertical position 

239 else: 

240 y_rel_offset = - font_size * 1.4 # Move down by line height 

241 

242 # Td is a relative translation (Tx and Ty). 

243 # It updates the current text position. 

244 ap_stream += f"{x_rel_offset} {y_rel_offset} Td\n".encode() 

245 # Update current_x_pos based on the Td operation for the next iteration. 

246 # This is the X position where the *current line* will start. 

247 current_x_pos = desired_abs_x_start 

248 

249 encoded_line: list[bytes] = [ 

250 font_glyph_byte_map.get(c, c.encode("utf-16-be")) for c in line 

251 ] 

252 if any(len(c) >= 2 for c in encoded_line): 

253 ap_stream += b"<" + (b"".join(encoded_line)).hex().encode() + b"> Tj\n" 

254 else: 

255 ap_stream += b"(" + b"".join(encoded_line) + b") Tj\n" 

256 ap_stream += b"ET\nQ\nEMC\nQ\n" 

257 return ap_stream 

258 

259 def __init__( 

260 self, 

261 text: str = "", 

262 selection: Optional[list[str]] = None, 

263 rectangle: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0), 

264 font_resource: Optional[DictionaryObject] = None, 

265 font_name: str = "/Helv", 

266 font_size: float = 0.0, 

267 font_color: str = "0 g", 

268 is_multiline: bool = False, 

269 alignment: TextAlignment = TextAlignment.LEFT 

270 ) -> None: 

271 """ 

272 Initializes a TextStreamAppearance object. 

273 

274 This constructor creates a new PDF stream object configured as an XObject 

275 of subtype Form. It uses the `_appearance_stream_data` method to generate 

276 the content for the stream. 

277 

278 Args: 

279 text: The text to be rendered in the form field. 

280 selection: An optional list of strings that should be highlighted as selected. 

281 rect: The bounding box of the form field. Can be a `RectangleObject` 

282 or a tuple of four floats (x1, y1, x2, y2). 

283 font_resource: An optional variable that represents a PDF font dictionary. 

284 font_name: The name of the font resource, e.g., "/Helv". 

285 font_size: The font size. If 0, it's auto-calculated. 

286 font_color: The font color string. 

287 is_multiline: A boolean indicating if the text field is multiline. 

288 alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER. 

289 

290 """ 

291 super().__init__() 

292 

293 # If a font resource was added, get the font character map 

294 if font_resource: 

295 font_resource = cast(DictionaryObject, font_resource.get_object()) 

296 font_descriptor = FontDescriptor.from_font_resource(font_resource) 

297 else: 

298 logger_warning(f"Font dictionary for {font_name} not found; defaulting to Helvetica.", __name__) 

299 font_name = "/Helv" 

300 font_resource = DictionaryObject({ 

301 NameObject("/Subtype"): NameObject("/Type1"), 

302 NameObject("/Name"): NameObject("/Helv"), 

303 NameObject("/Type"): NameObject("/Font"), 

304 NameObject("/BaseFont"): NameObject("/Helvetica"), 

305 NameObject("/Encoding"): NameObject("/WinAnsiEncoding") 

306 }) 

307 font_descriptor = CORE_FONT_METRICS["Helvetica"] 

308 

309 # Get the font glyph data 

310 _font_subtype, _, font_encoding, font_map = build_char_map_from_dict( 

311 200, font_resource 

312 ) 

313 try: # remove width stored in -1 key 

314 del font_map[-1] 

315 except KeyError: 

316 pass 

317 font_glyph_byte_map: dict[str, bytes] 

318 if isinstance(font_encoding, str): 

319 font_glyph_byte_map = { 

320 v: k.encode(font_encoding) for k, v in font_map.items() 

321 } 

322 else: 

323 font_glyph_byte_map = {v: bytes((k,)) for k, v in font_encoding.items()} 

324 font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()} 

325 for key, value in font_map.items(): 

326 font_glyph_byte_map[value] = font_encoding_rev.get(key, key) 

327 

328 ap_stream_data = self._generate_appearance_stream_data( 

329 text, 

330 selection, 

331 rectangle, 

332 font_descriptor, 

333 font_glyph_byte_map, 

334 font_name, 

335 font_size, 

336 font_color, 

337 is_multiline, 

338 alignment 

339 ) 

340 

341 self[NameObject("/Type")] = NameObject("/XObject") 

342 self[NameObject("/Subtype")] = NameObject("/Form") 

343 self[NameObject("/BBox")] = RectangleObject(rectangle) 

344 self.set_data(ByteStringObject(ap_stream_data)) 

345 self[NameObject("/Length")] = NumberObject(len(ap_stream_data)) 

346 # Update Resources with font information 

347 self[NameObject("/Resources")] = DictionaryObject({ 

348 NameObject("/Font"): DictionaryObject({ 

349 NameObject(font_name): getattr(font_resource, "indirect_reference", font_resource) 

350 }) 

351 }) 

352 

353 @classmethod 

354 def from_text_annotation( 

355 cls, 

356 acro_form: DictionaryObject, # _root_object[CatalogDictionary.ACRO_FORM]) 

357 field: DictionaryObject, 

358 annotation: DictionaryObject, 

359 user_font_name: str = "", 

360 user_font_size: float = -1, 

361 ) -> "TextStreamAppearance": 

362 """ 

363 Creates a TextStreamAppearance object from a text field annotation. 

364 

365 This class method is a factory for creating a `TextStreamAppearance` 

366 instance by extracting all necessary information (bounding box, font, 

367 text content, etc.) from the PDF field and annotation dictionaries. 

368 It respects inheritance for properties like default appearance (`/DA`). 

369 

370 Args: 

371 acro_form: The root AcroForm dictionary from the PDF catalog. 

372 field: The field dictionary object. 

373 annotation: The widget annotation dictionary object associated with the field. 

374 user_font_name: An optional user-provided font name to override the 

375 default. Defaults to an empty string. 

376 user_font_size: An optional user-provided font size to override the 

377 default. A value of -1 indicates no override. 

378 

379 Returns: 

380 A new `TextStreamAppearance` instance configured for the given field. 

381 

382 """ 

383 # Calculate rectangle dimensions 

384 _rectangle = cast(RectangleObject, annotation[AnnotationDictionaryAttributes.Rect]) 

385 rectangle = RectangleObject((0, 0, abs(_rectangle[2] - _rectangle[0]), abs(_rectangle[3] - _rectangle[1]))) 

386 

387 # Get default appearance dictionary from annotation 

388 default_appearance = annotation.get_inherited( 

389 AnnotationDictionaryAttributes.DA, 

390 acro_form.get(AnnotationDictionaryAttributes.DA, None), 

391 ) 

392 if not default_appearance: 

393 # Create a default appearance if none was found in the annotation 

394 default_appearance = TextStringObject("/Helv 0 Tf 0 g") 

395 else: 

396 default_appearance = default_appearance.get_object() 

397 

398 # Derive font name, size and color from the default appearance. Also set 

399 # user-provided font name and font size in the default appearance, if given. 

400 # For a font name, this presumes that we can find an associated font resource 

401 # dictionary. Uses the variable font_properties as an intermediate. 

402 # As per the PDF spec: 

403 # "At a minimum, the string [that is, default_appearance] shall include a Tf (text 

404 # font) operator along with its two operands, font and size" (Section 12.7.4.3 

405 # "Variable text" of the PDF 2.0 specification). 

406 font_properties = [prop for prop in re.split(r"\s", default_appearance) if prop] 

407 font_name = font_properties.pop(font_properties.index("Tf") - 2) 

408 font_size = float(font_properties.pop(font_properties.index("Tf") - 1)) 

409 font_properties.remove("Tf") 

410 font_color = " ".join(font_properties) 

411 # Determine the font name to use, prioritizing the user's input 

412 if user_font_name: 

413 font_name = user_font_name 

414 # Determine the font size to use, prioritizing the user's input 

415 if user_font_size > 0: 

416 font_size = user_font_size 

417 

418 # Try to find a resource dictionary for the font 

419 document_resources: Any = cast( 

420 DictionaryObject, 

421 cast( 

422 DictionaryObject, 

423 annotation.get_inherited( 

424 "/DR", 

425 acro_form.get("/DR", DictionaryObject()), 

426 ), 

427 ).get_object(), 

428 ) 

429 document_font_resources = document_resources.get("/Font", DictionaryObject()).get_object() 

430 # CORE_FONT_METRICS is the dict with Standard font metrics 

431 if font_name not in document_font_resources and font_name.removeprefix("/") not in CORE_FONT_METRICS: 

432 # ...or AcroForm dictionary 

433 document_resources = cast( 

434 dict[Any, Any], 

435 acro_form.get("/DR", {}), 

436 ) 

437 document_font_resources = document_resources.get_object().get("/Font", DictionaryObject()).get_object() 

438 font_resource = document_font_resources.get(font_name, None) 

439 if not is_null_or_none(font_resource): 

440 font_resource = cast(DictionaryObject, font_resource.get_object()) 

441 

442 # Retrieve field text, selected values and formatting information 

443 is_multiline = False 

444 field_flags = field.get(FieldDictionaryAttributes.Ff, 0) 

445 alignment = field.get("/Q", TextAlignment.LEFT) 

446 if field_flags & FieldDictionaryAttributes.FfBits.Multiline: 

447 is_multiline = True 

448 if ( 

449 field.get(FieldDictionaryAttributes.FT, "/Tx") == "/Ch" and 

450 field_flags & FieldDictionaryAttributes.FfBits.Combo == 0 

451 ): 

452 text = "\n".join(annotation.get_inherited(FieldDictionaryAttributes.Opt, [])) 

453 selection = field.get("/V", []) 

454 if not isinstance(selection, list): 

455 selection = [selection] 

456 else: # /Tx 

457 text = field.get("/V", "") 

458 selection = [] 

459 

460 # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings) 

461 text = text.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)") 

462 

463 # Create the TextStreamAppearance instance 

464 new_appearance_stream = cls( 

465 text, 

466 selection, 

467 rectangle, 

468 font_resource, 

469 font_name, 

470 font_size, 

471 font_color, 

472 is_multiline, 

473 alignment 

474 ) 

475 if AnnotationDictionaryAttributes.AP in annotation: 

476 for key, value in ( 

477 cast(DictionaryObject, annotation[AnnotationDictionaryAttributes.AP]).get("/N", {}).items() 

478 ): 

479 if key not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}: 

480 new_appearance_stream[key] = value 

481 

482 return new_appearance_stream