1"""
2ELF file parser.
3
4This provides a class ``ELFFile`` that parses an ELF executable in a similar
5interface to ``ZipFile``. Only the read interface is implemented.
6
7ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
8"""
9
10from __future__ import annotations
11
12import enum
13import os
14import struct
15from typing import IO
16
17
18class ELFInvalid(ValueError):
19 pass
20
21
22class EIClass(enum.IntEnum):
23 C32 = 1
24 C64 = 2
25
26
27class EIData(enum.IntEnum):
28 Lsb = 1
29 Msb = 2
30
31
32class EMachine(enum.IntEnum):
33 I386 = 3
34 S390 = 22
35 Arm = 40
36 X8664 = 62
37 AArc64 = 183
38
39
40class ELFFile:
41 """
42 Representation of an ELF executable.
43 """
44
45 def __init__(self, f: IO[bytes]) -> None:
46 self._f = f
47
48 try:
49 ident = self._read("16B")
50 except struct.error as e:
51 raise ELFInvalid("unable to parse identification") from e
52 magic = bytes(ident[:4])
53 if magic != b"\x7fELF":
54 raise ELFInvalid(f"invalid magic: {magic!r}")
55
56 self.capacity = ident[4] # Format for program header (bitness).
57 self.encoding = ident[5] # Data structure encoding (endianness).
58
59 try:
60 # e_fmt: Format for program header.
61 # p_fmt: Format for section header.
62 # p_idx: Indexes to find p_type, p_offset, and p_filesz.
63 e_fmt, self._p_fmt, self._p_idx = {
64 (1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB.
65 (1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB.
66 (2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB.
67 (2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB.
68 }[(self.capacity, self.encoding)]
69 except KeyError as e:
70 raise ELFInvalid(
71 f"unrecognized capacity ({self.capacity}) or encoding ({self.encoding})"
72 ) from e
73
74 try:
75 (
76 _,
77 self.machine, # Architecture type.
78 _,
79 _,
80 self._e_phoff, # Offset of program header.
81 _,
82 self.flags, # Processor-specific flags.
83 _,
84 self._e_phentsize, # Size of section.
85 self._e_phnum, # Number of sections.
86 ) = self._read(e_fmt)
87 except struct.error as e:
88 raise ELFInvalid("unable to parse machine and section information") from e
89
90 def _read(self, fmt: str) -> tuple[int, ...]:
91 return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
92
93 @property
94 def interpreter(self) -> str | None:
95 """
96 The path recorded in the ``PT_INTERP`` section header.
97 """
98 for index in range(self._e_phnum):
99 self._f.seek(self._e_phoff + self._e_phentsize * index)
100 try:
101 data = self._read(self._p_fmt)
102 except struct.error:
103 continue
104 if data[self._p_idx[0]] != 3: # Not PT_INTERP.
105 continue
106 self._f.seek(data[self._p_idx[1]])
107 return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
108 return None