Coverage for /pythoncovmergedfiles/medio/medio/src/pdfminer.six/pdfminer/utils.py: 69%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

336 statements  

1"""Miscellaneous Routines.""" 

2 

3import io 

4import pathlib 

5import string 

6from collections.abc import Callable, Iterable, Iterator 

7from html import escape 

8from typing import ( 

9 TYPE_CHECKING, 

10 Any, 

11 BinaryIO, 

12 Generic, 

13 TextIO, 

14 TypeVar, 

15 Union, 

16 cast, 

17) 

18 

19from pdfminer.pdfexceptions import PDFTypeError, PDFValueError 

20 

21if TYPE_CHECKING: 

22 from pdfminer.layout import LTComponent 

23 

24import contextlib 

25 

26import charset_normalizer # For str encoding detection 

27 

28# from sys import maxint as INF doesn't work anymore under Python3, but PDF 

29# still uses 32 bits ints 

30INF = (1 << 31) - 1 

31 

32FileOrName = Union[pathlib.PurePath, str, io.IOBase] 

33AnyIO = Union[TextIO, BinaryIO] 

34 

35 

36class open_filename: 

37 """Context manager that allows opening a filename 

38 (str or pathlib.PurePath type is supported) and closes it on exit, 

39 (just like `open`), but does nothing for file-like objects. 

40 """ 

41 

42 def __init__(self, filename: FileOrName, *args: Any, **kwargs: Any) -> None: 

43 if isinstance(filename, pathlib.PurePath): 

44 filename = str(filename) 

45 if isinstance(filename, str): 

46 self.file_handler: AnyIO = open(filename, *args, **kwargs) # noqa: SIM115 

47 self.closing = True 

48 elif isinstance(filename, io.IOBase): 

49 self.file_handler = cast(AnyIO, filename) 

50 self.closing = False 

51 else: 

52 raise PDFTypeError(f"Unsupported input type: {type(filename)}") 

53 

54 def __enter__(self) -> AnyIO: 

55 return self.file_handler 

56 

57 def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None: 

58 if self.closing: 

59 self.file_handler.close() 

60 

61 

62def make_compat_bytes(in_str: str) -> bytes: 

63 """Converts to bytes, encoding to unicode.""" 

64 assert isinstance(in_str, str), str(type(in_str)) 

65 return in_str.encode() 

66 

67 

68def make_compat_str(o: object) -> str: 

69 """Converts everything to string, if bytes guessing the encoding.""" 

70 if isinstance(o, bytes): 

71 enc = charset_normalizer.detect(o) 

72 if enc["encoding"] is None: 

73 return str(o) 

74 try: 

75 return o.decode(enc["encoding"]) 

76 except UnicodeDecodeError: 

77 return str(o) 

78 else: 

79 return str(o) 

80 

81 

82def shorten_str(s: str, size: int) -> str: 

83 if size < 7: 

84 return s[:size] 

85 if len(s) > size: 

86 length = (size - 5) // 2 

87 return f"{s[:length]} ... {s[-length:]}" 

88 else: 

89 return s 

90 

91 

92def compatible_encode_method( 

93 bytesorstring: bytes | str, 

94 encoding: str = "utf-8", 

95 erraction: str = "ignore", 

96) -> str: 

97 """When Py2 str.encode is called, it often means bytes.encode in Py3. 

98 

99 This does either. 

100 """ 

101 if isinstance(bytesorstring, str): 

102 return bytesorstring 

103 assert isinstance(bytesorstring, bytes), str(type(bytesorstring)) 

104 return bytesorstring.decode(encoding, erraction) 

105 

106 

107def paeth_predictor(left: int, above: int, upper_left: int) -> int: 

108 # From http://www.libpng.org/pub/png/spec/1.2/PNG-Filters.html 

109 # Initial estimate 

110 p = left + above - upper_left 

111 # Distances to a,b,c 

112 pa = abs(p - left) 

113 pb = abs(p - above) 

114 pc = abs(p - upper_left) 

115 

116 # Return nearest of a,b,c breaking ties in order a,b,c 

117 if pa <= pb and pa <= pc: 

118 return left 

119 elif pb <= pc: 

120 return above 

121 else: 

122 return upper_left 

123 

124 

125def apply_tiff_predictor( 

126 colors: int, columns: int, bitspercomponent: int, data: bytes 

127) -> bytes: 

128 """Reverse the effect of the TIFF predictor 2 

129 

130 Documentation: 

131 https://www.itu.int/itudoc/itu-t/com16/tiff-fx/docs/tiff6.pdf 

132 (Section 14, page 64) 

133 """ 

134 if bitspercomponent != 8: 

135 error_msg = f"Unsupported `bitspercomponent': {bitspercomponent}" 

136 raise PDFValueError(error_msg) 

137 bpp = colors * (bitspercomponent // 8) 

138 nbytes = columns * bpp 

139 buf: list[int] = [] 

140 for scanline_i in range(0, len(data), nbytes): 

141 raw: list[int] = [] 

142 for i in range(nbytes): 

143 new_value = data[scanline_i + i] 

144 if i >= bpp: 

145 new_value += raw[i - bpp] 

146 new_value %= 256 

147 raw.append(new_value) 

148 buf.extend(raw) 

149 

150 return bytes(buf) 

151 

152 

153def apply_png_predictor( 

154 pred: int, 

155 colors: int, 

156 columns: int, 

157 bitspercomponent: int, 

158 data: bytes, 

159) -> bytes: 

160 """Reverse the effect of the PNG predictor 

161 

162 Documentation: http://www.libpng.org/pub/png/spec/1.2/PNG-Filters.html 

163 """ 

164 if bitspercomponent not in [8, 1]: 

165 msg = f"Unsupported `bitspercomponent': {bitspercomponent}" 

166 raise PDFValueError(msg) 

167 

168 nbytes = colors * columns * bitspercomponent // 8 

169 bpp = colors * bitspercomponent // 8 # number of bytes per complete pixel 

170 buf = [] 

171 line_above = list(b"\x00" * columns) 

172 for scanline_i in range(0, len(data), nbytes + 1): 

173 filter_type = data[scanline_i] 

174 line_encoded = data[scanline_i + 1 : scanline_i + 1 + nbytes] 

175 raw = [] 

176 

177 if filter_type == 0: 

178 # Filter type 0: None 

179 raw = list(line_encoded) 

180 

181 elif filter_type == 1: 

182 # Filter type 1: Sub 

183 # To reverse the effect of the Sub() filter after decompression, 

184 # output the following value: 

185 # Raw(x) = Sub(x) + Raw(x - bpp) 

186 # (computed mod 256), where Raw() refers to the bytes already 

187 # decoded. 

188 for j, sub_x in enumerate(line_encoded): 

189 raw_x_bpp = 0 if j - bpp < 0 else int(raw[j - bpp]) 

190 raw_x = (sub_x + raw_x_bpp) & 255 

191 raw.append(raw_x) 

192 

193 elif filter_type == 2: 

194 # Filter type 2: Up 

195 # To reverse the effect of the Up() filter after decompression, 

196 # output the following value: 

197 # Raw(x) = Up(x) + Prior(x) 

198 # (computed mod 256), where Prior() refers to the decoded bytes of 

199 # the prior scanline. 

200 for up_x, prior_x in zip(line_encoded, line_above, strict=False): 

201 raw_x = (up_x + prior_x) & 255 

202 raw.append(raw_x) 

203 

204 elif filter_type == 3: 

205 # Filter type 3: Average 

206 # To reverse the effect of the Average() filter after 

207 # decompression, output the following value: 

208 # Raw(x) = Average(x) + floor((Raw(x-bpp)+Prior(x))/2) 

209 # where the result is computed mod 256, but the prediction is 

210 # calculated in the same way as for encoding. Raw() refers to the 

211 # bytes already decoded, and Prior() refers to the decoded bytes of 

212 # the prior scanline. 

213 for j, average_x in enumerate(line_encoded): 

214 raw_x_bpp = 0 if j - bpp < 0 else int(raw[j - bpp]) 

215 prior_x = int(line_above[j]) 

216 raw_x = (average_x + (raw_x_bpp + prior_x) // 2) & 255 

217 raw.append(raw_x) 

218 

219 elif filter_type == 4: 

220 # Filter type 4: Paeth 

221 # To reverse the effect of the Paeth() filter after decompression, 

222 # output the following value: 

223 # Raw(x) = Paeth(x) 

224 # + PaethPredictor(Raw(x-bpp), Prior(x), Prior(x-bpp)) 

225 # (computed mod 256), where Raw() and Prior() refer to bytes 

226 # already decoded. Exactly the same PaethPredictor() function is 

227 # used by both encoder and decoder. 

228 for j, paeth_x in enumerate(line_encoded): 

229 if j - bpp < 0: 

230 raw_x_bpp = 0 

231 prior_x_bpp = 0 

232 else: 

233 raw_x_bpp = int(raw[j - bpp]) 

234 prior_x_bpp = int(line_above[j - bpp]) 

235 prior_x = int(line_above[j]) 

236 paeth = paeth_predictor(raw_x_bpp, prior_x, prior_x_bpp) 

237 raw_x = (paeth_x + paeth) & 255 

238 raw.append(raw_x) 

239 

240 else: 

241 raise PDFValueError(f"Unsupported predictor value: {filter_type}") 

242 

243 buf.extend(raw) 

244 line_above = raw 

245 return bytes(buf) 

246 

247 

248Point = tuple[float, float] 

249Rect = tuple[float, float, float, float] 

250Matrix = tuple[float, float, float, float, float, float] 

251PathSegment = Union[ 

252 tuple[str], # Literal['h'] 

253 tuple[str, float, float], # Literal['m', 'l'] 

254 tuple[str, float, float, float, float], # Literal['v', 'y'] 

255 tuple[str, float, float, float, float, float, float], 

256] # Literal['c'] 

257 

258# Matrix operations 

259MATRIX_IDENTITY: Matrix = (1, 0, 0, 1, 0, 0) 

260 

261 

262def parse_rect(o: Any) -> Rect: 

263 try: 

264 (x0, y0, x1, y1) = o 

265 return float(x0), float(y0), float(x1), float(y1) 

266 except ValueError as err: 

267 raise PDFValueError("Could not parse rectangle") from err 

268 

269 

270def mult_matrix(m1: Matrix, m0: Matrix) -> Matrix: 

271 (a1, b1, c1, d1, e1, f1) = m1 

272 (a0, b0, c0, d0, e0, f0) = m0 

273 """Returns the multiplication of two matrices.""" 

274 return ( 

275 a0 * a1 + c0 * b1, 

276 b0 * a1 + d0 * b1, 

277 a0 * c1 + c0 * d1, 

278 b0 * c1 + d0 * d1, 

279 a0 * e1 + c0 * f1 + e0, 

280 b0 * e1 + d0 * f1 + f0, 

281 ) 

282 

283 

284def translate_matrix(m: Matrix, v: Point) -> Matrix: 

285 """Translates a matrix by (x, y) inside the projection. 

286 

287 The matrix is changed so that its origin is at the specified point in its own 

288 coordinate system. Note that this is different from translating it within the 

289 original coordinate system.""" 

290 (a, b, c, d, e, f) = m 

291 (x, y) = v 

292 return a, b, c, d, x * a + y * c + e, x * b + y * d + f 

293 

294 

295def apply_matrix_pt(m: Matrix, v: Point) -> Point: 

296 """Applies a matrix to a point.""" 

297 (a, b, c, d, e, f) = m 

298 (x, y) = v 

299 return a * x + c * y + e, b * x + d * y + f 

300 

301 

302def apply_matrix_rect(m: Matrix, rect: Rect) -> Rect: 

303 """Applies a matrix to a rectangle. 

304 

305 Note that the result is not a rotated rectangle, but a rectangle with the same 

306 orientation that tightly fits the outside of the rotated content. 

307 

308 :param m: The rotation matrix. 

309 :param rect: The rectangle coordinates (x0, y0, x1, y1), where x0 < x1 and y0 < y1. 

310 :returns a rectangle with the same orientation, but that would fit the rotated 

311 content. 

312 """ 

313 (x0, y0, x1, y1) = rect 

314 left_bottom = (x0, y0) 

315 right_bottom = (x1, y0) 

316 right_top = (x1, y1) 

317 left_top = (x0, y1) 

318 

319 (left1, bottom1) = apply_matrix_pt(m, left_bottom) 

320 (right1, bottom2) = apply_matrix_pt(m, right_bottom) 

321 (right2, top1) = apply_matrix_pt(m, right_top) 

322 (left2, top2) = apply_matrix_pt(m, left_top) 

323 

324 return ( 

325 min(left1, left2, right1, right2), 

326 min(bottom1, bottom2, top1, top2), 

327 max(left1, left2, right1, right2), 

328 max(bottom1, bottom2, top1, top2), 

329 ) 

330 

331 

332def apply_matrix_norm(m: Matrix, v: Point) -> Point: 

333 """Equivalent to apply_matrix_pt(M, (p,q)) - apply_matrix_pt(M, (0,0))""" 

334 (a, b, c, d, _e, _f) = m 

335 (p, q) = v 

336 return a * p + c * q, b * p + d * q 

337 

338 

339# Utility functions 

340 

341 

342def isnumber(x: object) -> bool: 

343 return isinstance(x, (int, float)) 

344 

345 

346_T = TypeVar("_T") 

347 

348 

349def uniq(objs: Iterable[_T]) -> Iterator[_T]: 

350 """Eliminates duplicated elements.""" 

351 done = set() 

352 for obj in objs: 

353 if obj in done: 

354 continue 

355 done.add(obj) 

356 yield obj 

357 

358 

359def fsplit(pred: Callable[[_T], bool], objs: Iterable[_T]) -> tuple[list[_T], list[_T]]: 

360 """Split a list into two classes according to the predicate.""" 

361 t = [] 

362 f = [] 

363 for obj in objs: 

364 if pred(obj): 

365 t.append(obj) 

366 else: 

367 f.append(obj) 

368 return t, f 

369 

370 

371def drange(v0: float, v1: float, d: int) -> range: 

372 """Returns a discrete range.""" 

373 return range(int(v0) // d, int(v1 + d) // d) 

374 

375 

376def get_bound(pts: Iterable[Point]) -> Rect: 

377 """Compute a minimal rectangle that covers all the points.""" 

378 limit: Rect = (INF, INF, -INF, -INF) 

379 (x0, y0, x1, y1) = limit 

380 for x, y in pts: 

381 x0 = min(x0, x) 

382 y0 = min(y0, y) 

383 x1 = max(x1, x) 

384 y1 = max(y1, y) 

385 return x0, y0, x1, y1 

386 

387 

388def pick( 

389 seq: Iterable[_T], 

390 func: Callable[[_T], float], 

391 maxobj: _T | None = None, 

392) -> _T | None: 

393 """Picks the object obj where func(obj) has the highest value.""" 

394 maxscore = None 

395 for obj in seq: 

396 score = func(obj) 

397 if maxscore is None or maxscore < score: 

398 (maxscore, maxobj) = (score, obj) 

399 return maxobj 

400 

401 

402def choplist(n: int, seq: Iterable[_T]) -> Iterator[tuple[_T, ...]]: 

403 """Groups every n elements of the list.""" 

404 r = [] 

405 for x in seq: 

406 r.append(x) 

407 if len(r) == n: 

408 yield tuple(r) 

409 r = [] 

410 

411 

412def nunpack(s: bytes, default: int = 0) -> int: 

413 """Unpacks variable-length unsigned integers (big endian).""" 

414 length = len(s) 

415 if not length: 

416 return default 

417 else: 

418 return int.from_bytes(s, byteorder="big", signed=False) 

419 

420 

421PDFDocEncoding = "".join( 

422 chr(x) 

423 for x in ( 

424 0x0000, 

425 0x0001, 

426 0x0002, 

427 0x0003, 

428 0x0004, 

429 0x0005, 

430 0x0006, 

431 0x0007, 

432 0x0008, 

433 0x0009, 

434 0x000A, 

435 0x000B, 

436 0x000C, 

437 0x000D, 

438 0x000E, 

439 0x000F, 

440 0x0010, 

441 0x0011, 

442 0x0012, 

443 0x0013, 

444 0x0014, 

445 0x0015, 

446 0x0017, 

447 0x0017, 

448 0x02D8, 

449 0x02C7, 

450 0x02C6, 

451 0x02D9, 

452 0x02DD, 

453 0x02DB, 

454 0x02DA, 

455 0x02DC, 

456 0x0020, 

457 0x0021, 

458 0x0022, 

459 0x0023, 

460 0x0024, 

461 0x0025, 

462 0x0026, 

463 0x0027, 

464 0x0028, 

465 0x0029, 

466 0x002A, 

467 0x002B, 

468 0x002C, 

469 0x002D, 

470 0x002E, 

471 0x002F, 

472 0x0030, 

473 0x0031, 

474 0x0032, 

475 0x0033, 

476 0x0034, 

477 0x0035, 

478 0x0036, 

479 0x0037, 

480 0x0038, 

481 0x0039, 

482 0x003A, 

483 0x003B, 

484 0x003C, 

485 0x003D, 

486 0x003E, 

487 0x003F, 

488 0x0040, 

489 0x0041, 

490 0x0042, 

491 0x0043, 

492 0x0044, 

493 0x0045, 

494 0x0046, 

495 0x0047, 

496 0x0048, 

497 0x0049, 

498 0x004A, 

499 0x004B, 

500 0x004C, 

501 0x004D, 

502 0x004E, 

503 0x004F, 

504 0x0050, 

505 0x0051, 

506 0x0052, 

507 0x0053, 

508 0x0054, 

509 0x0055, 

510 0x0056, 

511 0x0057, 

512 0x0058, 

513 0x0059, 

514 0x005A, 

515 0x005B, 

516 0x005C, 

517 0x005D, 

518 0x005E, 

519 0x005F, 

520 0x0060, 

521 0x0061, 

522 0x0062, 

523 0x0063, 

524 0x0064, 

525 0x0065, 

526 0x0066, 

527 0x0067, 

528 0x0068, 

529 0x0069, 

530 0x006A, 

531 0x006B, 

532 0x006C, 

533 0x006D, 

534 0x006E, 

535 0x006F, 

536 0x0070, 

537 0x0071, 

538 0x0072, 

539 0x0073, 

540 0x0074, 

541 0x0075, 

542 0x0076, 

543 0x0077, 

544 0x0078, 

545 0x0079, 

546 0x007A, 

547 0x007B, 

548 0x007C, 

549 0x007D, 

550 0x007E, 

551 0x0000, 

552 0x2022, 

553 0x2020, 

554 0x2021, 

555 0x2026, 

556 0x2014, 

557 0x2013, 

558 0x0192, 

559 0x2044, 

560 0x2039, 

561 0x203A, 

562 0x2212, 

563 0x2030, 

564 0x201E, 

565 0x201C, 

566 0x201D, 

567 0x2018, 

568 0x2019, 

569 0x201A, 

570 0x2122, 

571 0xFB01, 

572 0xFB02, 

573 0x0141, 

574 0x0152, 

575 0x0160, 

576 0x0178, 

577 0x017D, 

578 0x0131, 

579 0x0142, 

580 0x0153, 

581 0x0161, 

582 0x017E, 

583 0x0000, 

584 0x20AC, 

585 0x00A1, 

586 0x00A2, 

587 0x00A3, 

588 0x00A4, 

589 0x00A5, 

590 0x00A6, 

591 0x00A7, 

592 0x00A8, 

593 0x00A9, 

594 0x00AA, 

595 0x00AB, 

596 0x00AC, 

597 0x0000, 

598 0x00AE, 

599 0x00AF, 

600 0x00B0, 

601 0x00B1, 

602 0x00B2, 

603 0x00B3, 

604 0x00B4, 

605 0x00B5, 

606 0x00B6, 

607 0x00B7, 

608 0x00B8, 

609 0x00B9, 

610 0x00BA, 

611 0x00BB, 

612 0x00BC, 

613 0x00BD, 

614 0x00BE, 

615 0x00BF, 

616 0x00C0, 

617 0x00C1, 

618 0x00C2, 

619 0x00C3, 

620 0x00C4, 

621 0x00C5, 

622 0x00C6, 

623 0x00C7, 

624 0x00C8, 

625 0x00C9, 

626 0x00CA, 

627 0x00CB, 

628 0x00CC, 

629 0x00CD, 

630 0x00CE, 

631 0x00CF, 

632 0x00D0, 

633 0x00D1, 

634 0x00D2, 

635 0x00D3, 

636 0x00D4, 

637 0x00D5, 

638 0x00D6, 

639 0x00D7, 

640 0x00D8, 

641 0x00D9, 

642 0x00DA, 

643 0x00DB, 

644 0x00DC, 

645 0x00DD, 

646 0x00DE, 

647 0x00DF, 

648 0x00E0, 

649 0x00E1, 

650 0x00E2, 

651 0x00E3, 

652 0x00E4, 

653 0x00E5, 

654 0x00E6, 

655 0x00E7, 

656 0x00E8, 

657 0x00E9, 

658 0x00EA, 

659 0x00EB, 

660 0x00EC, 

661 0x00ED, 

662 0x00EE, 

663 0x00EF, 

664 0x00F0, 

665 0x00F1, 

666 0x00F2, 

667 0x00F3, 

668 0x00F4, 

669 0x00F5, 

670 0x00F6, 

671 0x00F7, 

672 0x00F8, 

673 0x00F9, 

674 0x00FA, 

675 0x00FB, 

676 0x00FC, 

677 0x00FD, 

678 0x00FE, 

679 0x00FF, 

680 ) 

681) 

682 

683 

684def decode_text(s: bytes) -> str: 

685 """Decodes a PDFDocEncoding string to Unicode.""" 

686 if s.startswith(b"\xfe\xff"): 

687 return str(s[2:], "utf-16be", "ignore") 

688 else: 

689 return "".join(PDFDocEncoding[c] for c in s) 

690 

691 

692def enc(x: str) -> str: 

693 """Encodes a string for SGML/XML/HTML""" 

694 if isinstance(x, bytes): 

695 return "" 

696 return escape(x) 

697 

698 

699def bbox2str(bbox: Rect) -> str: 

700 (x0, y0, x1, y1) = bbox 

701 return f"{x0:.3f},{y0:.3f},{x1:.3f},{y1:.3f}" 

702 

703 

704def matrix2str(m: Matrix) -> str: 

705 (a, b, c, d, e, f) = m 

706 return f"[{a:.2f},{b:.2f},{c:.2f},{d:.2f}, ({e:.2f},{f:.2f})]" 

707 

708 

709def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point: 

710 """A distance function between two TextBoxes. 

711 

712 Consider the bounding rectangle for obj1 and obj2. 

713 Return vector between 2 boxes boundaries if they don't overlap, otherwise 

714 returns vector between boxes centers 

715 

716 +------+..........+ (x1, y1) 

717 | obj1 | : 

718 +------+www+------+ 

719 : | obj2 | 

720 (x0, y0) +..........+------+ 

721 """ 

722 (x0, y0) = (min(obj1.x0, obj2.x0), min(obj1.y0, obj2.y0)) 

723 (x1, y1) = (max(obj1.x1, obj2.x1), max(obj1.y1, obj2.y1)) 

724 (ow, oh) = (x1 - x0, y1 - y0) 

725 (iw, ih) = (ow - obj1.width - obj2.width, oh - obj1.height - obj2.height) 

726 if iw < 0 and ih < 0: 

727 # if one is inside another we compute euclidean distance 

728 (xc1, yc1) = ((obj1.x0 + obj1.x1) / 2, (obj1.y0 + obj1.y1) / 2) 

729 (xc2, yc2) = ((obj2.x0 + obj2.x1) / 2, (obj2.y0 + obj2.y1) / 2) 

730 return xc1 - xc2, yc1 - yc2 

731 else: 

732 return max(0, iw), max(0, ih) 

733 

734 

735LTComponentT = TypeVar("LTComponentT", bound="LTComponent") 

736 

737 

738class Plane(Generic[LTComponentT]): 

739 """A set-like data structure for objects placed on a plane. 

740 

741 Can efficiently find objects in a certain rectangular area. 

742 It maintains two parallel lists of objects, each of 

743 which is sorted by its x or y coordinate. 

744 """ 

745 

746 def __init__(self, bbox: Rect, gridsize: int = 50) -> None: 

747 self._seq: list[LTComponentT] = [] # preserve the object order. 

748 self._objs: set[LTComponentT] = set() 

749 self._grid: dict[Point, list[LTComponentT]] = {} 

750 self.gridsize = gridsize 

751 (self.x0, self.y0, self.x1, self.y1) = bbox 

752 

753 def __repr__(self) -> str: 

754 return f"<Plane objs={list(self)!r}>" 

755 

756 def __iter__(self) -> Iterator[LTComponentT]: 

757 return (obj for obj in self._seq if obj in self._objs) 

758 

759 def __len__(self) -> int: 

760 return len(self._objs) 

761 

762 def __contains__(self, obj: object) -> bool: 

763 return obj in self._objs 

764 

765 def _getrange(self, bbox: Rect) -> Iterator[Point]: 

766 (x0, y0, x1, y1) = bbox 

767 if x1 <= self.x0 or self.x1 <= x0 or y1 <= self.y0 or self.y1 <= y0: 

768 return 

769 x0 = max(self.x0, x0) 

770 y0 = max(self.y0, y0) 

771 x1 = min(self.x1, x1) 

772 y1 = min(self.y1, y1) 

773 for grid_y in drange(y0, y1, self.gridsize): 

774 for grid_x in drange(x0, x1, self.gridsize): 

775 yield (grid_x, grid_y) 

776 

777 def extend(self, objs: Iterable[LTComponentT]) -> None: 

778 for obj in objs: 

779 self.add(obj) 

780 

781 def add(self, obj: LTComponentT) -> None: 

782 """Place an object.""" 

783 for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)): 

784 if k not in self._grid: 

785 r: list[LTComponentT] = [] 

786 self._grid[k] = r 

787 else: 

788 r = self._grid[k] 

789 r.append(obj) 

790 self._seq.append(obj) 

791 self._objs.add(obj) 

792 

793 def remove(self, obj: LTComponentT) -> None: 

794 """Displace an object.""" 

795 for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)): 

796 with contextlib.suppress(KeyError, ValueError): 

797 self._grid[k].remove(obj) 

798 self._objs.remove(obj) 

799 

800 def find(self, bbox: Rect) -> Iterator[LTComponentT]: 

801 """Finds objects that are in a certain area.""" 

802 (x0, y0, x1, y1) = bbox 

803 done = set() 

804 for k in self._getrange(bbox): 

805 if k not in self._grid: 

806 continue 

807 for obj in self._grid[k]: 

808 if obj in done: 

809 continue 

810 done.add(obj) 

811 if obj.x1 <= x0 or x1 <= obj.x0 or obj.y1 <= y0 or y1 <= obj.y0: 

812 continue 

813 yield obj 

814 

815 

816ROMAN_ONES = ["i", "x", "c", "m"] 

817ROMAN_FIVES = ["v", "l", "d"] 

818 

819 

820def format_int_roman(value: int) -> str: 

821 """Format a number as lowercase Roman numerals.""" 

822 assert 0 < value < 4000 

823 result: list[str] = [] 

824 index = 0 

825 

826 while value != 0: 

827 value, remainder = divmod(value, 10) 

828 if remainder == 9: 

829 result.insert(0, ROMAN_ONES[index]) 

830 result.insert(1, ROMAN_ONES[index + 1]) 

831 elif remainder == 4: 

832 result.insert(0, ROMAN_ONES[index]) 

833 result.insert(1, ROMAN_FIVES[index]) 

834 else: 

835 over_five = remainder >= 5 

836 if over_five: 

837 result.insert(0, ROMAN_FIVES[index]) 

838 remainder -= 5 

839 result.insert(1 if over_five else 0, ROMAN_ONES[index] * remainder) 

840 index += 1 

841 

842 return "".join(result) 

843 

844 

845def format_int_alpha(value: int) -> str: 

846 """Format a number as lowercase letters a-z, aa-zz, etc.""" 

847 assert value > 0 

848 result: list[str] = [] 

849 

850 while value != 0: 

851 value, remainder = divmod(value - 1, len(string.ascii_lowercase)) 

852 result.append(string.ascii_lowercase[remainder]) 

853 

854 result.reverse() 

855 return "".join(result) 

856 

857 

858def unpad_aes(padded: bytes) -> bytes: 

859 """Remove block padding as described in PDF 1.7 section 7.6.2: 

860 

861 > For an original message length of M, the pad shall consist of 16 - 

862 (M mod 16) bytes whose value shall also be 16 - (M mod 16). 

863 > Note that the pad is present when M is evenly divisible by 16; 

864 it contains 16 bytes of 0x10. 

865 """ 

866 if len(padded) == 0: 

867 return padded 

868 # Check for a potential padding byte (bytes are unsigned) 

869 padding = padded[-1] 

870 if padding > 16: 

871 return padded 

872 # A valid padding byte is the length of the padding 

873 if padding > len(padded): # Obviously invalid 

874 return padded 

875 # Every byte of padding is equal to the length of padding 

876 if all(x == padding for x in padded[-padding:]): 

877 return padded[:-padding] 

878 return padded