Coverage for /pythoncovmergedfiles/medio/medio/src/pdfminer.six/pdfminer/utils.py: 71%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Miscellaneous Routines."""
3import io
4import pathlib
5import string
6from html import escape
7from typing import (
8 TYPE_CHECKING,
9 Any,
10 BinaryIO,
11 Callable,
12 Dict,
13 Generic,
14 Iterable,
15 Iterator,
16 List,
17 Optional,
18 Set,
19 TextIO,
20 Tuple,
21 TypeVar,
22 Union,
23 cast,
24)
26from pdfminer.pdfexceptions import PDFTypeError, PDFValueError
28if TYPE_CHECKING:
29 from pdfminer.layout import LTComponent
31import charset_normalizer # For str encoding detection
33# from sys import maxint as INF doesn't work anymore under Python3, but PDF
34# still uses 32 bits ints
35INF = (1 << 31) - 1
37FileOrName = Union[pathlib.PurePath, str, io.IOBase]
38AnyIO = Union[TextIO, BinaryIO]
41class open_filename:
42 """Context manager that allows opening a filename
43 (str or pathlib.PurePath type is supported) and closes it on exit,
44 (just like `open`), but does nothing for file-like objects.
45 """
47 def __init__(self, filename: FileOrName, *args: Any, **kwargs: Any) -> None:
48 if isinstance(filename, pathlib.PurePath):
49 filename = str(filename)
50 if isinstance(filename, str):
51 self.file_handler: AnyIO = open(filename, *args, **kwargs)
52 self.closing = True
53 elif isinstance(filename, io.IOBase):
54 self.file_handler = cast(AnyIO, filename)
55 self.closing = False
56 else:
57 raise PDFTypeError("Unsupported input type: %s" % type(filename))
59 def __enter__(self) -> AnyIO:
60 return self.file_handler
62 def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
63 if self.closing:
64 self.file_handler.close()
67def make_compat_bytes(in_str: str) -> bytes:
68 """Converts to bytes, encoding to unicode."""
69 assert isinstance(in_str, str), str(type(in_str))
70 return in_str.encode()
73def make_compat_str(o: object) -> str:
74 """Converts everything to string, if bytes guessing the encoding."""
75 if isinstance(o, bytes):
76 enc = charset_normalizer.detect(o)
77 try:
78 return o.decode(enc["encoding"])
79 except UnicodeDecodeError:
80 return str(o)
81 else:
82 return str(o)
85def shorten_str(s: str, size: int) -> str:
86 if size < 7:
87 return s[:size]
88 if len(s) > size:
89 length = (size - 5) // 2
90 return f"{s[:length]} ... {s[-length:]}"
91 else:
92 return s
95def compatible_encode_method(
96 bytesorstring: Union[bytes, str],
97 encoding: str = "utf-8",
98 erraction: str = "ignore",
99) -> str:
100 """When Py2 str.encode is called, it often means bytes.encode in Py3.
102 This does either.
103 """
104 if isinstance(bytesorstring, str):
105 return bytesorstring
106 assert isinstance(bytesorstring, bytes), str(type(bytesorstring))
107 return bytesorstring.decode(encoding, erraction)
110def paeth_predictor(left: int, above: int, upper_left: int) -> int:
111 # From http://www.libpng.org/pub/png/spec/1.2/PNG-Filters.html
112 # Initial estimate
113 p = left + above - upper_left
114 # Distances to a,b,c
115 pa = abs(p - left)
116 pb = abs(p - above)
117 pc = abs(p - upper_left)
119 # Return nearest of a,b,c breaking ties in order a,b,c
120 if pa <= pb and pa <= pc:
121 return left
122 elif pb <= pc:
123 return above
124 else:
125 return upper_left
128def apply_tiff_predictor(
129 colors: int, columns: int, bitspercomponent: int, data: bytes
130) -> bytes:
131 """Reverse the effect of the TIFF predictor 2
133 Documentation: https://www.itu.int/itudoc/itu-t/com16/tiff-fx/docs/tiff6.pdf (Section 14, page 64)
134 """
135 if bitspercomponent != 8:
136 error_msg = f"Unsupported `bitspercomponent': {bitspercomponent}"
137 raise PDFValueError(error_msg)
138 bpp = colors * (bitspercomponent // 8)
139 nbytes = columns * bpp
140 buf: List[int] = []
141 for scanline_i in range(0, len(data), nbytes):
142 raw: List[int] = []
143 for i in range(nbytes):
144 new_value = data[scanline_i + i]
145 if i >= bpp:
146 new_value += raw[i - bpp]
147 new_value %= 256
148 raw.append(new_value)
149 buf.extend(raw)
151 return bytes(buf)
154def apply_png_predictor(
155 pred: int,
156 colors: int,
157 columns: int,
158 bitspercomponent: int,
159 data: bytes,
160) -> bytes:
161 """Reverse the effect of the PNG predictor
163 Documentation: http://www.libpng.org/pub/png/spec/1.2/PNG-Filters.html
164 """
165 if bitspercomponent not in [8, 1]:
166 msg = "Unsupported `bitspercomponent': %d" % bitspercomponent
167 raise PDFValueError(msg)
169 nbytes = colors * columns * bitspercomponent // 8
170 bpp = colors * bitspercomponent // 8 # number of bytes per complete pixel
171 buf = []
172 line_above = list(b"\x00" * columns)
173 for scanline_i in range(0, len(data), nbytes + 1):
174 filter_type = data[scanline_i]
175 line_encoded = data[scanline_i + 1 : scanline_i + 1 + nbytes]
176 raw = []
178 if filter_type == 0:
179 # Filter type 0: None
180 raw = list(line_encoded)
182 elif filter_type == 1:
183 # Filter type 1: Sub
184 # To reverse the effect of the Sub() filter after decompression,
185 # output the following value:
186 # Raw(x) = Sub(x) + Raw(x - bpp)
187 # (computed mod 256), where Raw() refers to the bytes already
188 # decoded.
189 for j, sub_x in enumerate(line_encoded):
190 if j - bpp < 0:
191 raw_x_bpp = 0
192 else:
193 raw_x_bpp = int(raw[j - bpp])
194 raw_x = (sub_x + raw_x_bpp) & 255
195 raw.append(raw_x)
197 elif filter_type == 2:
198 # Filter type 2: Up
199 # To reverse the effect of the Up() filter after decompression,
200 # output the following value:
201 # Raw(x) = Up(x) + Prior(x)
202 # (computed mod 256), where Prior() refers to the decoded bytes of
203 # the prior scanline.
204 for up_x, prior_x in zip(line_encoded, line_above):
205 raw_x = (up_x + prior_x) & 255
206 raw.append(raw_x)
208 elif filter_type == 3:
209 # Filter type 3: Average
210 # To reverse the effect of the Average() filter after
211 # decompression, output the following value:
212 # Raw(x) = Average(x) + floor((Raw(x-bpp)+Prior(x))/2)
213 # where the result is computed mod 256, but the prediction is
214 # calculated in the same way as for encoding. Raw() refers to the
215 # bytes already decoded, and Prior() refers to the decoded bytes of
216 # the prior scanline.
217 for j, average_x in enumerate(line_encoded):
218 if j - bpp < 0:
219 raw_x_bpp = 0
220 else:
221 raw_x_bpp = int(raw[j - bpp])
222 prior_x = int(line_above[j])
223 raw_x = (average_x + (raw_x_bpp + prior_x) // 2) & 255
224 raw.append(raw_x)
226 elif filter_type == 4:
227 # Filter type 4: Paeth
228 # To reverse the effect of the Paeth() filter after decompression,
229 # output the following value:
230 # Raw(x) = Paeth(x)
231 # + PaethPredictor(Raw(x-bpp), Prior(x), Prior(x-bpp))
232 # (computed mod 256), where Raw() and Prior() refer to bytes
233 # already decoded. Exactly the same PaethPredictor() function is
234 # used by both encoder and decoder.
235 for j, paeth_x in enumerate(line_encoded):
236 if j - bpp < 0:
237 raw_x_bpp = 0
238 prior_x_bpp = 0
239 else:
240 raw_x_bpp = int(raw[j - bpp])
241 prior_x_bpp = int(line_above[j - bpp])
242 prior_x = int(line_above[j])
243 paeth = paeth_predictor(raw_x_bpp, prior_x, prior_x_bpp)
244 raw_x = (paeth_x + paeth) & 255
245 raw.append(raw_x)
247 else:
248 raise PDFValueError("Unsupported predictor value: %d" % filter_type)
250 buf.extend(raw)
251 line_above = raw
252 return bytes(buf)
255Point = Tuple[float, float]
256Rect = Tuple[float, float, float, float]
257Matrix = Tuple[float, float, float, float, float, float]
258PathSegment = Union[
259 Tuple[str], # Literal['h']
260 Tuple[str, float, float], # Literal['m', 'l']
261 Tuple[str, float, float, float, float], # Literal['v', 'y']
262 Tuple[str, float, float, float, float, float, float],
263] # Literal['c']
265# Matrix operations
266MATRIX_IDENTITY: Matrix = (1, 0, 0, 1, 0, 0)
269def parse_rect(o: Any) -> Rect:
270 try:
271 (x0, y0, x1, y1) = o
272 return float(x0), float(y0), float(x1), float(y1)
273 except ValueError:
274 raise PDFValueError("Could not parse rectangle")
277def mult_matrix(m1: Matrix, m0: Matrix) -> Matrix:
278 (a1, b1, c1, d1, e1, f1) = m1
279 (a0, b0, c0, d0, e0, f0) = m0
280 """Returns the multiplication of two matrices."""
281 return (
282 a0 * a1 + c0 * b1,
283 b0 * a1 + d0 * b1,
284 a0 * c1 + c0 * d1,
285 b0 * c1 + d0 * d1,
286 a0 * e1 + c0 * f1 + e0,
287 b0 * e1 + d0 * f1 + f0,
288 )
291def translate_matrix(m: Matrix, v: Point) -> Matrix:
292 """Translates a matrix by (x, y) inside the projection.
294 The matrix is changed so that its origin is at the specified point in its own
295 coordinate system. Note that this is different from translating it within the
296 original coordinate system."""
297 (a, b, c, d, e, f) = m
298 (x, y) = v
299 return a, b, c, d, x * a + y * c + e, x * b + y * d + f
302def apply_matrix_pt(m: Matrix, v: Point) -> Point:
303 """Applies a matrix to a point."""
304 (a, b, c, d, e, f) = m
305 (x, y) = v
306 return a * x + c * y + e, b * x + d * y + f
309def apply_matrix_rect(m: Matrix, rect: Rect) -> Rect:
310 """Applies a matrix to a rectangle.
312 Note that the result is not a rotated rectangle, but a rectangle with the same
313 orientation that tightly fits the outside of the rotated content.
315 :param m: The rotation matrix.
316 :param rect: The rectangle coordinates (x0, y0, x1, y1), where x0 < x1 and y0 < y1.
317 :returns a rectangle with the same orientation, but that would fit the rotated
318 content.
319 """
320 (x0, y0, x1, y1) = rect
321 left_bottom = (x0, y0)
322 right_bottom = (x1, y0)
323 right_top = (x1, y1)
324 left_top = (x0, y1)
326 (left1, bottom1) = apply_matrix_pt(m, left_bottom)
327 (right1, bottom2) = apply_matrix_pt(m, right_bottom)
328 (right2, top1) = apply_matrix_pt(m, right_top)
329 (left2, top2) = apply_matrix_pt(m, left_top)
331 return (
332 min(left1, left2, right1, right2),
333 min(bottom1, bottom2, top1, top2),
334 max(left1, left2, right1, right2),
335 max(bottom1, bottom2, top1, top2),
336 )
339def apply_matrix_norm(m: Matrix, v: Point) -> Point:
340 """Equivalent to apply_matrix_pt(M, (p,q)) - apply_matrix_pt(M, (0,0))"""
341 (a, b, c, d, e, f) = m
342 (p, q) = v
343 return a * p + c * q, b * p + d * q
346# Utility functions
349def isnumber(x: object) -> bool:
350 return isinstance(x, (int, float))
353_T = TypeVar("_T")
356def uniq(objs: Iterable[_T]) -> Iterator[_T]:
357 """Eliminates duplicated elements."""
358 done = set()
359 for obj in objs:
360 if obj in done:
361 continue
362 done.add(obj)
363 yield obj
366def fsplit(pred: Callable[[_T], bool], objs: Iterable[_T]) -> Tuple[List[_T], List[_T]]:
367 """Split a list into two classes according to the predicate."""
368 t = []
369 f = []
370 for obj in objs:
371 if pred(obj):
372 t.append(obj)
373 else:
374 f.append(obj)
375 return t, f
378def drange(v0: float, v1: float, d: int) -> range:
379 """Returns a discrete range."""
380 return range(int(v0) // d, int(v1 + d) // d)
383def get_bound(pts: Iterable[Point]) -> Rect:
384 """Compute a minimal rectangle that covers all the points."""
385 limit: Rect = (INF, INF, -INF, -INF)
386 (x0, y0, x1, y1) = limit
387 for x, y in pts:
388 x0 = min(x0, x)
389 y0 = min(y0, y)
390 x1 = max(x1, x)
391 y1 = max(y1, y)
392 return x0, y0, x1, y1
395def pick(
396 seq: Iterable[_T],
397 func: Callable[[_T], float],
398 maxobj: Optional[_T] = None,
399) -> Optional[_T]:
400 """Picks the object obj where func(obj) has the highest value."""
401 maxscore = None
402 for obj in seq:
403 score = func(obj)
404 if maxscore is None or maxscore < score:
405 (maxscore, maxobj) = (score, obj)
406 return maxobj
409def choplist(n: int, seq: Iterable[_T]) -> Iterator[Tuple[_T, ...]]:
410 """Groups every n elements of the list."""
411 r = []
412 for x in seq:
413 r.append(x)
414 if len(r) == n:
415 yield tuple(r)
416 r = []
419def nunpack(s: bytes, default: int = 0) -> int:
420 """Unpacks variable-length unsigned integers (big endian)."""
421 length = len(s)
422 if not length:
423 return default
424 else:
425 return int.from_bytes(s, byteorder="big", signed=False)
428PDFDocEncoding = "".join(
429 chr(x)
430 for x in (
431 0x0000,
432 0x0001,
433 0x0002,
434 0x0003,
435 0x0004,
436 0x0005,
437 0x0006,
438 0x0007,
439 0x0008,
440 0x0009,
441 0x000A,
442 0x000B,
443 0x000C,
444 0x000D,
445 0x000E,
446 0x000F,
447 0x0010,
448 0x0011,
449 0x0012,
450 0x0013,
451 0x0014,
452 0x0015,
453 0x0017,
454 0x0017,
455 0x02D8,
456 0x02C7,
457 0x02C6,
458 0x02D9,
459 0x02DD,
460 0x02DB,
461 0x02DA,
462 0x02DC,
463 0x0020,
464 0x0021,
465 0x0022,
466 0x0023,
467 0x0024,
468 0x0025,
469 0x0026,
470 0x0027,
471 0x0028,
472 0x0029,
473 0x002A,
474 0x002B,
475 0x002C,
476 0x002D,
477 0x002E,
478 0x002F,
479 0x0030,
480 0x0031,
481 0x0032,
482 0x0033,
483 0x0034,
484 0x0035,
485 0x0036,
486 0x0037,
487 0x0038,
488 0x0039,
489 0x003A,
490 0x003B,
491 0x003C,
492 0x003D,
493 0x003E,
494 0x003F,
495 0x0040,
496 0x0041,
497 0x0042,
498 0x0043,
499 0x0044,
500 0x0045,
501 0x0046,
502 0x0047,
503 0x0048,
504 0x0049,
505 0x004A,
506 0x004B,
507 0x004C,
508 0x004D,
509 0x004E,
510 0x004F,
511 0x0050,
512 0x0051,
513 0x0052,
514 0x0053,
515 0x0054,
516 0x0055,
517 0x0056,
518 0x0057,
519 0x0058,
520 0x0059,
521 0x005A,
522 0x005B,
523 0x005C,
524 0x005D,
525 0x005E,
526 0x005F,
527 0x0060,
528 0x0061,
529 0x0062,
530 0x0063,
531 0x0064,
532 0x0065,
533 0x0066,
534 0x0067,
535 0x0068,
536 0x0069,
537 0x006A,
538 0x006B,
539 0x006C,
540 0x006D,
541 0x006E,
542 0x006F,
543 0x0070,
544 0x0071,
545 0x0072,
546 0x0073,
547 0x0074,
548 0x0075,
549 0x0076,
550 0x0077,
551 0x0078,
552 0x0079,
553 0x007A,
554 0x007B,
555 0x007C,
556 0x007D,
557 0x007E,
558 0x0000,
559 0x2022,
560 0x2020,
561 0x2021,
562 0x2026,
563 0x2014,
564 0x2013,
565 0x0192,
566 0x2044,
567 0x2039,
568 0x203A,
569 0x2212,
570 0x2030,
571 0x201E,
572 0x201C,
573 0x201D,
574 0x2018,
575 0x2019,
576 0x201A,
577 0x2122,
578 0xFB01,
579 0xFB02,
580 0x0141,
581 0x0152,
582 0x0160,
583 0x0178,
584 0x017D,
585 0x0131,
586 0x0142,
587 0x0153,
588 0x0161,
589 0x017E,
590 0x0000,
591 0x20AC,
592 0x00A1,
593 0x00A2,
594 0x00A3,
595 0x00A4,
596 0x00A5,
597 0x00A6,
598 0x00A7,
599 0x00A8,
600 0x00A9,
601 0x00AA,
602 0x00AB,
603 0x00AC,
604 0x0000,
605 0x00AE,
606 0x00AF,
607 0x00B0,
608 0x00B1,
609 0x00B2,
610 0x00B3,
611 0x00B4,
612 0x00B5,
613 0x00B6,
614 0x00B7,
615 0x00B8,
616 0x00B9,
617 0x00BA,
618 0x00BB,
619 0x00BC,
620 0x00BD,
621 0x00BE,
622 0x00BF,
623 0x00C0,
624 0x00C1,
625 0x00C2,
626 0x00C3,
627 0x00C4,
628 0x00C5,
629 0x00C6,
630 0x00C7,
631 0x00C8,
632 0x00C9,
633 0x00CA,
634 0x00CB,
635 0x00CC,
636 0x00CD,
637 0x00CE,
638 0x00CF,
639 0x00D0,
640 0x00D1,
641 0x00D2,
642 0x00D3,
643 0x00D4,
644 0x00D5,
645 0x00D6,
646 0x00D7,
647 0x00D8,
648 0x00D9,
649 0x00DA,
650 0x00DB,
651 0x00DC,
652 0x00DD,
653 0x00DE,
654 0x00DF,
655 0x00E0,
656 0x00E1,
657 0x00E2,
658 0x00E3,
659 0x00E4,
660 0x00E5,
661 0x00E6,
662 0x00E7,
663 0x00E8,
664 0x00E9,
665 0x00EA,
666 0x00EB,
667 0x00EC,
668 0x00ED,
669 0x00EE,
670 0x00EF,
671 0x00F0,
672 0x00F1,
673 0x00F2,
674 0x00F3,
675 0x00F4,
676 0x00F5,
677 0x00F6,
678 0x00F7,
679 0x00F8,
680 0x00F9,
681 0x00FA,
682 0x00FB,
683 0x00FC,
684 0x00FD,
685 0x00FE,
686 0x00FF,
687 )
688)
691def decode_text(s: bytes) -> str:
692 """Decodes a PDFDocEncoding string to Unicode."""
693 if s.startswith(b"\xfe\xff"):
694 return str(s[2:], "utf-16be", "ignore")
695 else:
696 return "".join(PDFDocEncoding[c] for c in s)
699def enc(x: str) -> str:
700 """Encodes a string for SGML/XML/HTML"""
701 if isinstance(x, bytes):
702 return ""
703 return escape(x)
706def bbox2str(bbox: Rect) -> str:
707 (x0, y0, x1, y1) = bbox
708 return f"{x0:.3f},{y0:.3f},{x1:.3f},{y1:.3f}"
711def matrix2str(m: Matrix) -> str:
712 (a, b, c, d, e, f) = m
713 return f"[{a:.2f},{b:.2f},{c:.2f},{d:.2f}, ({e:.2f},{f:.2f})]"
716def vecBetweenBoxes(obj1: "LTComponent", obj2: "LTComponent") -> Point:
717 """A distance function between two TextBoxes.
719 Consider the bounding rectangle for obj1 and obj2.
720 Return vector between 2 boxes boundaries if they don't overlap, otherwise
721 returns vector betweeen boxes centers
723 +------+..........+ (x1, y1)
724 | obj1 | :
725 +------+www+------+
726 : | obj2 |
727 (x0, y0) +..........+------+
728 """
729 (x0, y0) = (min(obj1.x0, obj2.x0), min(obj1.y0, obj2.y0))
730 (x1, y1) = (max(obj1.x1, obj2.x1), max(obj1.y1, obj2.y1))
731 (ow, oh) = (x1 - x0, y1 - y0)
732 (iw, ih) = (ow - obj1.width - obj2.width, oh - obj1.height - obj2.height)
733 if iw < 0 and ih < 0:
734 # if one is inside another we compute euclidean distance
735 (xc1, yc1) = ((obj1.x0 + obj1.x1) / 2, (obj1.y0 + obj1.y1) / 2)
736 (xc2, yc2) = ((obj2.x0 + obj2.x1) / 2, (obj2.y0 + obj2.y1) / 2)
737 return xc1 - xc2, yc1 - yc2
738 else:
739 return max(0, iw), max(0, ih)
742LTComponentT = TypeVar("LTComponentT", bound="LTComponent")
745class Plane(Generic[LTComponentT]):
746 """A set-like data structure for objects placed on a plane.
748 Can efficiently find objects in a certain rectangular area.
749 It maintains two parallel lists of objects, each of
750 which is sorted by its x or y coordinate.
751 """
753 def __init__(self, bbox: Rect, gridsize: int = 50) -> None:
754 self._seq: List[LTComponentT] = [] # preserve the object order.
755 self._objs: Set[LTComponentT] = set()
756 self._grid: Dict[Point, List[LTComponentT]] = {}
757 self.gridsize = gridsize
758 (self.x0, self.y0, self.x1, self.y1) = bbox
760 def __repr__(self) -> str:
761 return "<Plane objs=%r>" % list(self)
763 def __iter__(self) -> Iterator[LTComponentT]:
764 return (obj for obj in self._seq if obj in self._objs)
766 def __len__(self) -> int:
767 return len(self._objs)
769 def __contains__(self, obj: object) -> bool:
770 return obj in self._objs
772 def _getrange(self, bbox: Rect) -> Iterator[Point]:
773 (x0, y0, x1, y1) = bbox
774 if x1 <= self.x0 or self.x1 <= x0 or y1 <= self.y0 or self.y1 <= y0:
775 return
776 x0 = max(self.x0, x0)
777 y0 = max(self.y0, y0)
778 x1 = min(self.x1, x1)
779 y1 = min(self.y1, y1)
780 for grid_y in drange(y0, y1, self.gridsize):
781 for grid_x in drange(x0, x1, self.gridsize):
782 yield (grid_x, grid_y)
784 def extend(self, objs: Iterable[LTComponentT]) -> None:
785 for obj in objs:
786 self.add(obj)
788 def add(self, obj: LTComponentT) -> None:
789 """Place an object."""
790 for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
791 if k not in self._grid:
792 r: List[LTComponentT] = []
793 self._grid[k] = r
794 else:
795 r = self._grid[k]
796 r.append(obj)
797 self._seq.append(obj)
798 self._objs.add(obj)
800 def remove(self, obj: LTComponentT) -> None:
801 """Displace an object."""
802 for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
803 try:
804 self._grid[k].remove(obj)
805 except (KeyError, ValueError):
806 pass
807 self._objs.remove(obj)
809 def find(self, bbox: Rect) -> Iterator[LTComponentT]:
810 """Finds objects that are in a certain area."""
811 (x0, y0, x1, y1) = bbox
812 done = set()
813 for k in self._getrange(bbox):
814 if k not in self._grid:
815 continue
816 for obj in self._grid[k]:
817 if obj in done:
818 continue
819 done.add(obj)
820 if obj.x1 <= x0 or x1 <= obj.x0 or obj.y1 <= y0 or y1 <= obj.y0:
821 continue
822 yield obj
825ROMAN_ONES = ["i", "x", "c", "m"]
826ROMAN_FIVES = ["v", "l", "d"]
829def format_int_roman(value: int) -> str:
830 """Format a number as lowercase Roman numerals."""
831 assert 0 < value < 4000
832 result: List[str] = []
833 index = 0
835 while value != 0:
836 value, remainder = divmod(value, 10)
837 if remainder == 9:
838 result.insert(0, ROMAN_ONES[index])
839 result.insert(1, ROMAN_ONES[index + 1])
840 elif remainder == 4:
841 result.insert(0, ROMAN_ONES[index])
842 result.insert(1, ROMAN_FIVES[index])
843 else:
844 over_five = remainder >= 5
845 if over_five:
846 result.insert(0, ROMAN_FIVES[index])
847 remainder -= 5
848 result.insert(1 if over_five else 0, ROMAN_ONES[index] * remainder)
849 index += 1
851 return "".join(result)
854def format_int_alpha(value: int) -> str:
855 """Format a number as lowercase letters a-z, aa-zz, etc."""
856 assert value > 0
857 result: List[str] = []
859 while value != 0:
860 value, remainder = divmod(value - 1, len(string.ascii_lowercase))
861 result.append(string.ascii_lowercase[remainder])
863 result.reverse()
864 return "".join(result)
867def unpad_aes(padded: bytes) -> bytes:
868 """Remove block padding as described in PDF 1.7 section 7.6.2:
870 > For an original message length of M, the pad shall consist of 16 -
871 (M mod 16) bytes whose value shall also be 16 - (M mod 16).
872 > Note that the pad is present when M is evenly divisible by 16;
873 it contains 16 bytes of 0x10.
874 """
875 if len(padded) == 0:
876 return padded
877 # Check for a potential padding byte (bytes are unsigned)
878 padding = padded[-1]
879 if padding > 16:
880 return padded
881 # A valid padding byte is the length of the padding
882 if padding > len(padded): # Obviously invalid
883 return padded
884 # Every byte of padding is equal to the length of padding
885 if all(x == padding for x in padded[-padding:]):
886 return padded[:-padding]
887 return padded