Coverage for /pythoncovmergedfiles/medio/medio/src/pdfminer.six/pdfminer/utils.py: 69%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Miscellaneous Routines."""
3import io
4import pathlib
5import string
6from collections.abc import Callable, Iterable, Iterator
7from html import escape
8from typing import (
9 TYPE_CHECKING,
10 Any,
11 BinaryIO,
12 Generic,
13 TextIO,
14 TypeVar,
15 Union,
16 cast,
17)
19from pdfminer.pdfexceptions import PDFTypeError, PDFValueError
21if TYPE_CHECKING:
22 from pdfminer.layout import LTComponent
24import contextlib
26import charset_normalizer # For str encoding detection
28# from sys import maxint as INF doesn't work anymore under Python3, but PDF
29# still uses 32 bits ints
30INF = (1 << 31) - 1
32FileOrName = Union[pathlib.PurePath, str, io.IOBase]
33AnyIO = Union[TextIO, BinaryIO]
36class open_filename:
37 """Context manager that allows opening a filename
38 (str or pathlib.PurePath type is supported) and closes it on exit,
39 (just like `open`), but does nothing for file-like objects.
40 """
42 def __init__(self, filename: FileOrName, *args: Any, **kwargs: Any) -> None:
43 if isinstance(filename, pathlib.PurePath):
44 filename = str(filename)
45 if isinstance(filename, str):
46 self.file_handler: AnyIO = open(filename, *args, **kwargs) # noqa: SIM115
47 self.closing = True
48 elif isinstance(filename, io.IOBase):
49 self.file_handler = cast(AnyIO, filename)
50 self.closing = False
51 else:
52 raise PDFTypeError(f"Unsupported input type: {type(filename)}")
54 def __enter__(self) -> AnyIO:
55 return self.file_handler
57 def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
58 if self.closing:
59 self.file_handler.close()
62def make_compat_bytes(in_str: str) -> bytes:
63 """Converts to bytes, encoding to unicode."""
64 assert isinstance(in_str, str), str(type(in_str))
65 return in_str.encode()
68def make_compat_str(o: object) -> str:
69 """Converts everything to string, if bytes guessing the encoding."""
70 if isinstance(o, bytes):
71 enc = charset_normalizer.detect(o)
72 if enc["encoding"] is None:
73 return str(o)
74 try:
75 return o.decode(enc["encoding"])
76 except UnicodeDecodeError:
77 return str(o)
78 else:
79 return str(o)
82def shorten_str(s: str, size: int) -> str:
83 if size < 7:
84 return s[:size]
85 if len(s) > size:
86 length = (size - 5) // 2
87 return f"{s[:length]} ... {s[-length:]}"
88 else:
89 return s
92def compatible_encode_method(
93 bytesorstring: bytes | str,
94 encoding: str = "utf-8",
95 erraction: str = "ignore",
96) -> str:
97 """When Py2 str.encode is called, it often means bytes.encode in Py3.
99 This does either.
100 """
101 if isinstance(bytesorstring, str):
102 return bytesorstring
103 assert isinstance(bytesorstring, bytes), str(type(bytesorstring))
104 return bytesorstring.decode(encoding, erraction)
107def paeth_predictor(left: int, above: int, upper_left: int) -> int:
108 # From http://www.libpng.org/pub/png/spec/1.2/PNG-Filters.html
109 # Initial estimate
110 p = left + above - upper_left
111 # Distances to a,b,c
112 pa = abs(p - left)
113 pb = abs(p - above)
114 pc = abs(p - upper_left)
116 # Return nearest of a,b,c breaking ties in order a,b,c
117 if pa <= pb and pa <= pc:
118 return left
119 elif pb <= pc:
120 return above
121 else:
122 return upper_left
125def apply_tiff_predictor(
126 colors: int, columns: int, bitspercomponent: int, data: bytes
127) -> bytes:
128 """Reverse the effect of the TIFF predictor 2
130 Documentation:
131 https://www.itu.int/itudoc/itu-t/com16/tiff-fx/docs/tiff6.pdf
132 (Section 14, page 64)
133 """
134 if bitspercomponent != 8:
135 error_msg = f"Unsupported `bitspercomponent': {bitspercomponent}"
136 raise PDFValueError(error_msg)
137 bpp = colors * (bitspercomponent // 8)
138 nbytes = columns * bpp
139 buf: list[int] = []
140 for scanline_i in range(0, len(data), nbytes):
141 raw: list[int] = []
142 for i in range(nbytes):
143 new_value = data[scanline_i + i]
144 if i >= bpp:
145 new_value += raw[i - bpp]
146 new_value %= 256
147 raw.append(new_value)
148 buf.extend(raw)
150 return bytes(buf)
153def apply_png_predictor(
154 pred: int,
155 colors: int,
156 columns: int,
157 bitspercomponent: int,
158 data: bytes,
159) -> bytes:
160 """Reverse the effect of the PNG predictor
162 Documentation: http://www.libpng.org/pub/png/spec/1.2/PNG-Filters.html
163 """
164 if bitspercomponent not in [8, 1]:
165 msg = f"Unsupported `bitspercomponent': {bitspercomponent}"
166 raise PDFValueError(msg)
168 nbytes = colors * columns * bitspercomponent // 8
169 bpp = colors * bitspercomponent // 8 # number of bytes per complete pixel
170 buf = []
171 line_above = list(b"\x00" * columns)
172 for scanline_i in range(0, len(data), nbytes + 1):
173 filter_type = data[scanline_i]
174 line_encoded = data[scanline_i + 1 : scanline_i + 1 + nbytes]
175 raw = []
177 if filter_type == 0:
178 # Filter type 0: None
179 raw = list(line_encoded)
181 elif filter_type == 1:
182 # Filter type 1: Sub
183 # To reverse the effect of the Sub() filter after decompression,
184 # output the following value:
185 # Raw(x) = Sub(x) + Raw(x - bpp)
186 # (computed mod 256), where Raw() refers to the bytes already
187 # decoded.
188 for j, sub_x in enumerate(line_encoded):
189 raw_x_bpp = 0 if j - bpp < 0 else int(raw[j - bpp])
190 raw_x = (sub_x + raw_x_bpp) & 255
191 raw.append(raw_x)
193 elif filter_type == 2:
194 # Filter type 2: Up
195 # To reverse the effect of the Up() filter after decompression,
196 # output the following value:
197 # Raw(x) = Up(x) + Prior(x)
198 # (computed mod 256), where Prior() refers to the decoded bytes of
199 # the prior scanline.
200 for up_x, prior_x in zip(line_encoded, line_above, strict=False):
201 raw_x = (up_x + prior_x) & 255
202 raw.append(raw_x)
204 elif filter_type == 3:
205 # Filter type 3: Average
206 # To reverse the effect of the Average() filter after
207 # decompression, output the following value:
208 # Raw(x) = Average(x) + floor((Raw(x-bpp)+Prior(x))/2)
209 # where the result is computed mod 256, but the prediction is
210 # calculated in the same way as for encoding. Raw() refers to the
211 # bytes already decoded, and Prior() refers to the decoded bytes of
212 # the prior scanline.
213 for j, average_x in enumerate(line_encoded):
214 raw_x_bpp = 0 if j - bpp < 0 else int(raw[j - bpp])
215 prior_x = int(line_above[j])
216 raw_x = (average_x + (raw_x_bpp + prior_x) // 2) & 255
217 raw.append(raw_x)
219 elif filter_type == 4:
220 # Filter type 4: Paeth
221 # To reverse the effect of the Paeth() filter after decompression,
222 # output the following value:
223 # Raw(x) = Paeth(x)
224 # + PaethPredictor(Raw(x-bpp), Prior(x), Prior(x-bpp))
225 # (computed mod 256), where Raw() and Prior() refer to bytes
226 # already decoded. Exactly the same PaethPredictor() function is
227 # used by both encoder and decoder.
228 for j, paeth_x in enumerate(line_encoded):
229 if j - bpp < 0:
230 raw_x_bpp = 0
231 prior_x_bpp = 0
232 else:
233 raw_x_bpp = int(raw[j - bpp])
234 prior_x_bpp = int(line_above[j - bpp])
235 prior_x = int(line_above[j])
236 paeth = paeth_predictor(raw_x_bpp, prior_x, prior_x_bpp)
237 raw_x = (paeth_x + paeth) & 255
238 raw.append(raw_x)
240 else:
241 raise PDFValueError(f"Unsupported predictor value: {filter_type}")
243 buf.extend(raw)
244 line_above = raw
245 return bytes(buf)
248Point = tuple[float, float]
249Rect = tuple[float, float, float, float]
250Matrix = tuple[float, float, float, float, float, float]
251PathSegment = Union[
252 tuple[str], # Literal['h']
253 tuple[str, float, float], # Literal['m', 'l']
254 tuple[str, float, float, float, float], # Literal['v', 'y']
255 tuple[str, float, float, float, float, float, float],
256] # Literal['c']
258# Matrix operations
259MATRIX_IDENTITY: Matrix = (1, 0, 0, 1, 0, 0)
262def parse_rect(o: Any) -> Rect:
263 try:
264 (x0, y0, x1, y1) = o
265 return float(x0), float(y0), float(x1), float(y1)
266 except ValueError as err:
267 raise PDFValueError("Could not parse rectangle") from err
270def mult_matrix(m1: Matrix, m0: Matrix) -> Matrix:
271 (a1, b1, c1, d1, e1, f1) = m1
272 (a0, b0, c0, d0, e0, f0) = m0
273 """Returns the multiplication of two matrices."""
274 return (
275 a0 * a1 + c0 * b1,
276 b0 * a1 + d0 * b1,
277 a0 * c1 + c0 * d1,
278 b0 * c1 + d0 * d1,
279 a0 * e1 + c0 * f1 + e0,
280 b0 * e1 + d0 * f1 + f0,
281 )
284def translate_matrix(m: Matrix, v: Point) -> Matrix:
285 """Translates a matrix by (x, y) inside the projection.
287 The matrix is changed so that its origin is at the specified point in its own
288 coordinate system. Note that this is different from translating it within the
289 original coordinate system."""
290 (a, b, c, d, e, f) = m
291 (x, y) = v
292 return a, b, c, d, x * a + y * c + e, x * b + y * d + f
295def apply_matrix_pt(m: Matrix, v: Point) -> Point:
296 """Applies a matrix to a point."""
297 (a, b, c, d, e, f) = m
298 (x, y) = v
299 return a * x + c * y + e, b * x + d * y + f
302def apply_matrix_rect(m: Matrix, rect: Rect) -> Rect:
303 """Applies a matrix to a rectangle.
305 Note that the result is not a rotated rectangle, but a rectangle with the same
306 orientation that tightly fits the outside of the rotated content.
308 :param m: The rotation matrix.
309 :param rect: The rectangle coordinates (x0, y0, x1, y1), where x0 < x1 and y0 < y1.
310 :returns a rectangle with the same orientation, but that would fit the rotated
311 content.
312 """
313 (x0, y0, x1, y1) = rect
314 left_bottom = (x0, y0)
315 right_bottom = (x1, y0)
316 right_top = (x1, y1)
317 left_top = (x0, y1)
319 (left1, bottom1) = apply_matrix_pt(m, left_bottom)
320 (right1, bottom2) = apply_matrix_pt(m, right_bottom)
321 (right2, top1) = apply_matrix_pt(m, right_top)
322 (left2, top2) = apply_matrix_pt(m, left_top)
324 return (
325 min(left1, left2, right1, right2),
326 min(bottom1, bottom2, top1, top2),
327 max(left1, left2, right1, right2),
328 max(bottom1, bottom2, top1, top2),
329 )
332def apply_matrix_norm(m: Matrix, v: Point) -> Point:
333 """Equivalent to apply_matrix_pt(M, (p,q)) - apply_matrix_pt(M, (0,0))"""
334 (a, b, c, d, _e, _f) = m
335 (p, q) = v
336 return a * p + c * q, b * p + d * q
339# Utility functions
342def isnumber(x: object) -> bool:
343 return isinstance(x, (int, float))
346_T = TypeVar("_T")
349def uniq(objs: Iterable[_T]) -> Iterator[_T]:
350 """Eliminates duplicated elements."""
351 done = set()
352 for obj in objs:
353 if obj in done:
354 continue
355 done.add(obj)
356 yield obj
359def fsplit(pred: Callable[[_T], bool], objs: Iterable[_T]) -> tuple[list[_T], list[_T]]:
360 """Split a list into two classes according to the predicate."""
361 t = []
362 f = []
363 for obj in objs:
364 if pred(obj):
365 t.append(obj)
366 else:
367 f.append(obj)
368 return t, f
371def drange(v0: float, v1: float, d: int) -> range:
372 """Returns a discrete range."""
373 return range(int(v0) // d, int(v1 + d) // d)
376def get_bound(pts: Iterable[Point]) -> Rect:
377 """Compute a minimal rectangle that covers all the points."""
378 limit: Rect = (INF, INF, -INF, -INF)
379 (x0, y0, x1, y1) = limit
380 for x, y in pts:
381 x0 = min(x0, x)
382 y0 = min(y0, y)
383 x1 = max(x1, x)
384 y1 = max(y1, y)
385 return x0, y0, x1, y1
388def pick(
389 seq: Iterable[_T],
390 func: Callable[[_T], float],
391 maxobj: _T | None = None,
392) -> _T | None:
393 """Picks the object obj where func(obj) has the highest value."""
394 maxscore = None
395 for obj in seq:
396 score = func(obj)
397 if maxscore is None or maxscore < score:
398 (maxscore, maxobj) = (score, obj)
399 return maxobj
402def choplist(n: int, seq: Iterable[_T]) -> Iterator[tuple[_T, ...]]:
403 """Groups every n elements of the list."""
404 r = []
405 for x in seq:
406 r.append(x)
407 if len(r) == n:
408 yield tuple(r)
409 r = []
412def nunpack(s: bytes, default: int = 0) -> int:
413 """Unpacks variable-length unsigned integers (big endian)."""
414 length = len(s)
415 if not length:
416 return default
417 else:
418 return int.from_bytes(s, byteorder="big", signed=False)
421PDFDocEncoding = "".join(
422 chr(x)
423 for x in (
424 0x0000,
425 0x0001,
426 0x0002,
427 0x0003,
428 0x0004,
429 0x0005,
430 0x0006,
431 0x0007,
432 0x0008,
433 0x0009,
434 0x000A,
435 0x000B,
436 0x000C,
437 0x000D,
438 0x000E,
439 0x000F,
440 0x0010,
441 0x0011,
442 0x0012,
443 0x0013,
444 0x0014,
445 0x0015,
446 0x0017,
447 0x0017,
448 0x02D8,
449 0x02C7,
450 0x02C6,
451 0x02D9,
452 0x02DD,
453 0x02DB,
454 0x02DA,
455 0x02DC,
456 0x0020,
457 0x0021,
458 0x0022,
459 0x0023,
460 0x0024,
461 0x0025,
462 0x0026,
463 0x0027,
464 0x0028,
465 0x0029,
466 0x002A,
467 0x002B,
468 0x002C,
469 0x002D,
470 0x002E,
471 0x002F,
472 0x0030,
473 0x0031,
474 0x0032,
475 0x0033,
476 0x0034,
477 0x0035,
478 0x0036,
479 0x0037,
480 0x0038,
481 0x0039,
482 0x003A,
483 0x003B,
484 0x003C,
485 0x003D,
486 0x003E,
487 0x003F,
488 0x0040,
489 0x0041,
490 0x0042,
491 0x0043,
492 0x0044,
493 0x0045,
494 0x0046,
495 0x0047,
496 0x0048,
497 0x0049,
498 0x004A,
499 0x004B,
500 0x004C,
501 0x004D,
502 0x004E,
503 0x004F,
504 0x0050,
505 0x0051,
506 0x0052,
507 0x0053,
508 0x0054,
509 0x0055,
510 0x0056,
511 0x0057,
512 0x0058,
513 0x0059,
514 0x005A,
515 0x005B,
516 0x005C,
517 0x005D,
518 0x005E,
519 0x005F,
520 0x0060,
521 0x0061,
522 0x0062,
523 0x0063,
524 0x0064,
525 0x0065,
526 0x0066,
527 0x0067,
528 0x0068,
529 0x0069,
530 0x006A,
531 0x006B,
532 0x006C,
533 0x006D,
534 0x006E,
535 0x006F,
536 0x0070,
537 0x0071,
538 0x0072,
539 0x0073,
540 0x0074,
541 0x0075,
542 0x0076,
543 0x0077,
544 0x0078,
545 0x0079,
546 0x007A,
547 0x007B,
548 0x007C,
549 0x007D,
550 0x007E,
551 0x0000,
552 0x2022,
553 0x2020,
554 0x2021,
555 0x2026,
556 0x2014,
557 0x2013,
558 0x0192,
559 0x2044,
560 0x2039,
561 0x203A,
562 0x2212,
563 0x2030,
564 0x201E,
565 0x201C,
566 0x201D,
567 0x2018,
568 0x2019,
569 0x201A,
570 0x2122,
571 0xFB01,
572 0xFB02,
573 0x0141,
574 0x0152,
575 0x0160,
576 0x0178,
577 0x017D,
578 0x0131,
579 0x0142,
580 0x0153,
581 0x0161,
582 0x017E,
583 0x0000,
584 0x20AC,
585 0x00A1,
586 0x00A2,
587 0x00A3,
588 0x00A4,
589 0x00A5,
590 0x00A6,
591 0x00A7,
592 0x00A8,
593 0x00A9,
594 0x00AA,
595 0x00AB,
596 0x00AC,
597 0x0000,
598 0x00AE,
599 0x00AF,
600 0x00B0,
601 0x00B1,
602 0x00B2,
603 0x00B3,
604 0x00B4,
605 0x00B5,
606 0x00B6,
607 0x00B7,
608 0x00B8,
609 0x00B9,
610 0x00BA,
611 0x00BB,
612 0x00BC,
613 0x00BD,
614 0x00BE,
615 0x00BF,
616 0x00C0,
617 0x00C1,
618 0x00C2,
619 0x00C3,
620 0x00C4,
621 0x00C5,
622 0x00C6,
623 0x00C7,
624 0x00C8,
625 0x00C9,
626 0x00CA,
627 0x00CB,
628 0x00CC,
629 0x00CD,
630 0x00CE,
631 0x00CF,
632 0x00D0,
633 0x00D1,
634 0x00D2,
635 0x00D3,
636 0x00D4,
637 0x00D5,
638 0x00D6,
639 0x00D7,
640 0x00D8,
641 0x00D9,
642 0x00DA,
643 0x00DB,
644 0x00DC,
645 0x00DD,
646 0x00DE,
647 0x00DF,
648 0x00E0,
649 0x00E1,
650 0x00E2,
651 0x00E3,
652 0x00E4,
653 0x00E5,
654 0x00E6,
655 0x00E7,
656 0x00E8,
657 0x00E9,
658 0x00EA,
659 0x00EB,
660 0x00EC,
661 0x00ED,
662 0x00EE,
663 0x00EF,
664 0x00F0,
665 0x00F1,
666 0x00F2,
667 0x00F3,
668 0x00F4,
669 0x00F5,
670 0x00F6,
671 0x00F7,
672 0x00F8,
673 0x00F9,
674 0x00FA,
675 0x00FB,
676 0x00FC,
677 0x00FD,
678 0x00FE,
679 0x00FF,
680 )
681)
684def decode_text(s: bytes) -> str:
685 """Decodes a PDFDocEncoding string to Unicode."""
686 if s.startswith(b"\xfe\xff"):
687 return str(s[2:], "utf-16be", "ignore")
688 else:
689 return "".join(PDFDocEncoding[c] for c in s)
692def enc(x: str) -> str:
693 """Encodes a string for SGML/XML/HTML"""
694 if isinstance(x, bytes):
695 return ""
696 return escape(x)
699def bbox2str(bbox: Rect) -> str:
700 (x0, y0, x1, y1) = bbox
701 return f"{x0:.3f},{y0:.3f},{x1:.3f},{y1:.3f}"
704def matrix2str(m: Matrix) -> str:
705 (a, b, c, d, e, f) = m
706 return f"[{a:.2f},{b:.2f},{c:.2f},{d:.2f}, ({e:.2f},{f:.2f})]"
709def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point:
710 """A distance function between two TextBoxes.
712 Consider the bounding rectangle for obj1 and obj2.
713 Return vector between 2 boxes boundaries if they don't overlap, otherwise
714 returns vector between boxes centers
716 +------+..........+ (x1, y1)
717 | obj1 | :
718 +------+www+------+
719 : | obj2 |
720 (x0, y0) +..........+------+
721 """
722 (x0, y0) = (min(obj1.x0, obj2.x0), min(obj1.y0, obj2.y0))
723 (x1, y1) = (max(obj1.x1, obj2.x1), max(obj1.y1, obj2.y1))
724 (ow, oh) = (x1 - x0, y1 - y0)
725 (iw, ih) = (ow - obj1.width - obj2.width, oh - obj1.height - obj2.height)
726 if iw < 0 and ih < 0:
727 # if one is inside another we compute euclidean distance
728 (xc1, yc1) = ((obj1.x0 + obj1.x1) / 2, (obj1.y0 + obj1.y1) / 2)
729 (xc2, yc2) = ((obj2.x0 + obj2.x1) / 2, (obj2.y0 + obj2.y1) / 2)
730 return xc1 - xc2, yc1 - yc2
731 else:
732 return max(0, iw), max(0, ih)
735LTComponentT = TypeVar("LTComponentT", bound="LTComponent")
738class Plane(Generic[LTComponentT]):
739 """A set-like data structure for objects placed on a plane.
741 Can efficiently find objects in a certain rectangular area.
742 It maintains two parallel lists of objects, each of
743 which is sorted by its x or y coordinate.
744 """
746 def __init__(self, bbox: Rect, gridsize: int = 50) -> None:
747 self._seq: list[LTComponentT] = [] # preserve the object order.
748 self._objs: set[LTComponentT] = set()
749 self._grid: dict[Point, list[LTComponentT]] = {}
750 self.gridsize = gridsize
751 (self.x0, self.y0, self.x1, self.y1) = bbox
753 def __repr__(self) -> str:
754 return f"<Plane objs={list(self)!r}>"
756 def __iter__(self) -> Iterator[LTComponentT]:
757 return (obj for obj in self._seq if obj in self._objs)
759 def __len__(self) -> int:
760 return len(self._objs)
762 def __contains__(self, obj: object) -> bool:
763 return obj in self._objs
765 def _getrange(self, bbox: Rect) -> Iterator[Point]:
766 (x0, y0, x1, y1) = bbox
767 if x1 <= self.x0 or self.x1 <= x0 or y1 <= self.y0 or self.y1 <= y0:
768 return
769 x0 = max(self.x0, x0)
770 y0 = max(self.y0, y0)
771 x1 = min(self.x1, x1)
772 y1 = min(self.y1, y1)
773 for grid_y in drange(y0, y1, self.gridsize):
774 for grid_x in drange(x0, x1, self.gridsize):
775 yield (grid_x, grid_y)
777 def extend(self, objs: Iterable[LTComponentT]) -> None:
778 for obj in objs:
779 self.add(obj)
781 def add(self, obj: LTComponentT) -> None:
782 """Place an object."""
783 for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
784 if k not in self._grid:
785 r: list[LTComponentT] = []
786 self._grid[k] = r
787 else:
788 r = self._grid[k]
789 r.append(obj)
790 self._seq.append(obj)
791 self._objs.add(obj)
793 def remove(self, obj: LTComponentT) -> None:
794 """Displace an object."""
795 for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
796 with contextlib.suppress(KeyError, ValueError):
797 self._grid[k].remove(obj)
798 self._objs.remove(obj)
800 def find(self, bbox: Rect) -> Iterator[LTComponentT]:
801 """Finds objects that are in a certain area."""
802 (x0, y0, x1, y1) = bbox
803 done = set()
804 for k in self._getrange(bbox):
805 if k not in self._grid:
806 continue
807 for obj in self._grid[k]:
808 if obj in done:
809 continue
810 done.add(obj)
811 if obj.x1 <= x0 or x1 <= obj.x0 or obj.y1 <= y0 or y1 <= obj.y0:
812 continue
813 yield obj
816ROMAN_ONES = ["i", "x", "c", "m"]
817ROMAN_FIVES = ["v", "l", "d"]
820def format_int_roman(value: int) -> str:
821 """Format a number as lowercase Roman numerals."""
822 assert 0 < value < 4000
823 result: list[str] = []
824 index = 0
826 while value != 0:
827 value, remainder = divmod(value, 10)
828 if remainder == 9:
829 result.insert(0, ROMAN_ONES[index])
830 result.insert(1, ROMAN_ONES[index + 1])
831 elif remainder == 4:
832 result.insert(0, ROMAN_ONES[index])
833 result.insert(1, ROMAN_FIVES[index])
834 else:
835 over_five = remainder >= 5
836 if over_five:
837 result.insert(0, ROMAN_FIVES[index])
838 remainder -= 5
839 result.insert(1 if over_five else 0, ROMAN_ONES[index] * remainder)
840 index += 1
842 return "".join(result)
845def format_int_alpha(value: int) -> str:
846 """Format a number as lowercase letters a-z, aa-zz, etc."""
847 assert value > 0
848 result: list[str] = []
850 while value != 0:
851 value, remainder = divmod(value - 1, len(string.ascii_lowercase))
852 result.append(string.ascii_lowercase[remainder])
854 result.reverse()
855 return "".join(result)
858def unpad_aes(padded: bytes) -> bytes:
859 """Remove block padding as described in PDF 1.7 section 7.6.2:
861 > For an original message length of M, the pad shall consist of 16 -
862 (M mod 16) bytes whose value shall also be 16 - (M mod 16).
863 > Note that the pad is present when M is evenly divisible by 16;
864 it contains 16 bytes of 0x10.
865 """
866 if len(padded) == 0:
867 return padded
868 # Check for a potential padding byte (bytes are unsigned)
869 padding = padded[-1]
870 if padding > 16:
871 return padded
872 # A valid padding byte is the length of the padding
873 if padding > len(padded): # Obviously invalid
874 return padded
875 # Every byte of padding is equal to the length of padding
876 if all(x == padding for x in padded[-padding:]):
877 return padded[:-padding]
878 return padded