Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/unblob/handlers/executable/elf.py: 58%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

184 statements  

1import io 

2import shutil 

3import zlib 

4from pathlib import Path 

5 

6import attrs 

7import lief 

8from structlog import get_logger 

9 

10from unblob.extractor import carve_chunk_to_file 

11from unblob.extractors import Command 

12from unblob.file_utils import ( 

13 Endian, 

14 File, 

15 InvalidInputFormat, 

16 StructParser, 

17 convert_int8, 

18 convert_int32, 

19 convert_int64, 

20 iterate_file, 

21 iterate_patterns, 

22 read_until_past, 

23 round_up, 

24) 

25from unblob.models import ( 

26 HandlerDoc, 

27 HandlerType, 

28 HexString, 

29 Reference, 

30 StructHandler, 

31 ValidChunk, 

32) 

33 

34lief.logging.disable() 

35 

36logger = get_logger() 

37 

38KERNEL_MODULE_SIGNATURE_INFO_LEN = 12 

39KERNEL_MODULE_SIGNATURE_FOOTER = b"~Module signature appended~\n" 

40 

41KERNEL_INIT_DATA_SECTION = ".init.data" 

42 

43 

44# [Ref] https://github.com/upx/upx/blob/devel/src/stub/src/include/linux.h 

45UPX_C_DEFINITIONS = r""" 

46typedef struct packhead{ 

47 char magic[4]; 

48 uint8_t version; 

49 uint8_t format; 

50 uint8_t level; 

51 uint8_t method; 

52 uint64_t unknown1; 

53 uint32_t u_filesize1; 

54 uint32_t c_filesize; 

55 uint32_t u_filesize2; 

56 uint32_t unknown2; 

57 uint32_t l_info_offset; 

58} packhead_t; 

59 

60typedef struct l_info{ 

61 uint32_t l_checksum; 

62 char l_magic[4]; 

63 uint16_t l_lsize; 

64 uint8_t l_version; 

65 uint8_t l_format; 

66} l_info_t; 

67""" 

68upx_parser = StructParser(UPX_C_DEFINITIONS) 

69 

70 

71def parse_upx_packhead(file: File): 

72 return upx_parser.parse("packhead_t", file, Endian.LITTLE) 

73 

74 

75def parse_upx_l_info(file: File): 

76 return upx_parser.parse("l_info_t", file, Endian.LITTLE) 

77 

78 

79@attrs.define(repr=False) 

80class ElfChunk(ValidChunk): 

81 @staticmethod 

82 def upx_checksum_validates(file: File, l_info, elf) -> bool: 

83 size_pack2 = elf.last_offset_segment - l_info.l_lsize 

84 size_aligment = round_up(size_pack2, 4) # Forces to be mod 4 

85 xct_off = any(section.name == "init" for section in elf.sections) 

86 size_aligment += (4 & size_aligment) ^ (int(bool(xct_off)) << 2) # 4 or 0 

87 size_aligment += 8 # Added 2 times 4 byte (size of disp) 

88 if xct_off: 

89 size_aligment += 12 

90 alignment = size_aligment - size_pack2 

91 checksum_offset = elf.last_offset_segment - (l_info.l_lsize - alignment) 

92 file.seek(checksum_offset, io.SEEK_SET) 

93 adler32_checksum = 1 

94 for chunk in iterate_file(file, checksum_offset, l_info.l_lsize - alignment): 

95 adler32_checksum = zlib.adler32(chunk, adler32_checksum) 

96 return adler32_checksum == l_info.l_checksum 

97 

98 def is_valid_upx(self, inpath: Path, elf) -> bool: 

99 file = File.from_path(inpath) 

100 file.seek(-4, io.SEEK_END) # last 4 bytes indicates where linfo ends 

101 l_info_start_offset = abs( 

102 convert_int32(file.read(4), Endian.LITTLE) 

103 - upx_parser.cparser_le.l_info_t.size 

104 ) 

105 if l_info_start_offset > file.size(): 

106 return False 

107 file.seek(l_info_start_offset, io.SEEK_SET) 

108 upx_header = parse_upx_l_info(file) 

109 if upx_header.l_magic != b"UPX!": # Magic 

110 return False 

111 if not self.upx_checksum_validates(file, upx_header, elf): 

112 raise InvalidInputFormat("Invalid UPX checksum") 

113 return True 

114 

115 def extract(self, inpath: Path, outdir: Path): 

116 # ELF file extraction is special in that in the general case no new files are extracted, thus 

117 # when we want to clean up all carves to save place, carved ELF files would be deleted as well, 

118 # however we want to keep carved out ELF files, as they are the interesting stuff! 

119 elf = lief.ELF.parse(str(inpath)) 

120 

121 if elf is None: 

122 logger.error( 

123 "Trying to extract an invalid ELF file.", inpath=inpath, outdir=outdir 

124 ) 

125 return 

126 

127 is_kernel = ( 

128 elf.header.file_type == lief.ELF.Header.FILE_TYPE.EXEC 

129 and elf.has_section(KERNEL_INIT_DATA_SECTION) 

130 ) 

131 if is_kernel: 

132 with File.from_path(inpath) as file: 

133 extract_initramfs(elf, file, outdir) 

134 

135 elif self.is_valid_upx(inpath=inpath, elf=elf): 

136 extract_upx(inpath, outdir) 

137 

138 elif not self.is_whole_file: 

139 # make a copy, and let the carved chunk be deleted 

140 outdir.mkdir(parents=True, exist_ok=False) 

141 shutil.copy2(inpath, outdir / "carved.elf") 

142 # more work will be done, when outdir is picked up by processing, 

143 # and the ELF file is processed as a whole file. 

144 # As a performance side effect, ELF files will be searched for chunks twice. 

145 # Even though the second chunk search one is short-circuited, 

146 # because the ELF handler will recognize it as a whole file 

147 # other handlers might burn some cycles on the file as well. 

148 

149 

150def extract_upx(inpath: Path, outdir: Path): 

151 extractor = Command("upx", "-d", "{inpath}", "-o{outdir}/{inpath.stem}.elf") 

152 outdir.mkdir(parents=True, exist_ok=False) 

153 extractor.extract(inpath, outdir) 

154 

155 

156def extract_initramfs(elf, file: File, outdir): 

157 """Extract the initramfs part, with a potentially 4 extra bytes. 

158 

159 Due to alignment definition of initramfs the start-end offsets can not be exactly calculated, 

160 so the output could have a 4 extra bytes before or after the initramfs. 

161 """ 

162 if not elf.has_section(KERNEL_INIT_DATA_SECTION): 

163 return 

164 

165 init_data = elf.get_section(KERNEL_INIT_DATA_SECTION) 

166 

167 if not init_data.size: 

168 return 

169 

170 is_64bit = elf.header.identity_class == lief.ELF.Header.CLASS.ELF64 

171 endian = ( 

172 Endian.LITTLE 

173 if elf.header.identity_data == lief.ELF.Header.ELF_DATA.LSB 

174 else Endian.BIG 

175 ) 

176 

177 init_data_end_offset = init_data.file_offset + init_data.size 

178 

179 # initramfs size is at the end of the section either 64bit or 32bit depending on the platform 

180 # see usr/initramfs_data.S in the kernel 

181 # The size is padded to 8 bytes, see include/asm-generic/vmlinux.lds.h 

182 # The actual initramfs is right before the size 

183 if is_64bit: 

184 initramfs_size_offset = init_data.file_offset + init_data.size - 8 

185 initramfs_size = convert_int64( 

186 file[initramfs_size_offset:init_data_end_offset], 

187 endian=endian, 

188 ) 

189 else: 

190 initramfs_size_offset = init_data.file_offset + init_data.size - 4 

191 initramfs_size = convert_int32( 

192 file[initramfs_size_offset:init_data_end_offset], 

193 endian=endian, 

194 ) 

195 

196 # initramfs start is aligned to 4 bytes, initramfs_size_offset is aligned to 8 bytes 

197 # this is unfortunate, as we do not know the start, only the padded end 

198 # unfortunately we have two valid values for the padding of the initramfs end: 

199 # 0 and 4, 1 and 5, 2 and 6, 3 and 7 

200 # let's calculate the offsets for the smaller padding values 

201 initramfs_start = initramfs_size_offset - round_up(initramfs_size, 4) 

202 initramfs_end = initramfs_start + initramfs_size 

203 padding = initramfs_size_offset - initramfs_end 

204 

205 # initramfs can be turned off (https://www.linux.com/training-tutorials/kernel-newbie-corner-initrd-and-initramfs-whats/) 

206 # in which case the above calculations most probably end up with bogus chunk offsets 

207 if not ( 

208 init_data.file_offset <= initramfs_start < initramfs_end <= init_data_end_offset 

209 and (bytes(padding) == file[initramfs_end:initramfs_size_offset]) 

210 ): 

211 return 

212 

213 # when bigger padding is also a possibility, include 4 more bytes from the beginning 

214 if (init_data.file_offset <= initramfs_start - 4) and ( 

215 bytes(padding + 4) == file[initramfs_end - 4 : initramfs_size_offset] 

216 ): 

217 initramfs_start -= 4 

218 

219 carve_chunk_to_file( 

220 outdir / "initramfs", 

221 file, 

222 ValidChunk(start_offset=initramfs_start, end_offset=initramfs_end), 

223 ) 

224 

225 

226class _ELFBase(StructHandler): 

227 EXTRACTOR = None 

228 SECTION_HEADER_STRUCT = "elf_shdr_t" 

229 PROGRAM_HEADER_STRUCT = "elf_phdr_t" 

230 

231 def is_valid_header(self, header) -> bool: 

232 # check that header fields have valid values 

233 try: 

234 lief.ELF.Header.FILE_TYPE(header.e_type) 

235 lief.ELF.ARCH(header.e_machine) 

236 lief.ELF.Header.VERSION(header.e_version) 

237 except ValueError: 

238 return False 

239 return True 

240 

241 @staticmethod 

242 def get_endianness(file: File, start_offset: int) -> Endian: 

243 file.seek(start_offset + 5, io.SEEK_SET) 

244 e_ident_data = convert_int8(file.read(1), Endian.LITTLE) 

245 return Endian.LITTLE if e_ident_data == 0x1 else Endian.BIG 

246 

247 def get_last_section_end( 

248 self, file: File, sections_start_offset: int, sections_num: int, endian 

249 ) -> int: 

250 last_section_end = 0 

251 file.seek(sections_start_offset) 

252 

253 for _ in range(sections_num): 

254 section_header = self._struct_parser.parse( 

255 self.SECTION_HEADER_STRUCT, file, endian 

256 ) 

257 

258 try: 

259 if ( 

260 lief.ELF.Section.TYPE(section_header.sh_type) 

261 == lief.ELF.Section.TYPE.NOBITS 

262 ): 

263 continue 

264 except ValueError: 

265 continue 

266 

267 section_end = section_header.sh_offset + section_header.sh_size 

268 last_section_end = max(section_end, last_section_end) 

269 

270 return last_section_end 

271 

272 def get_last_program_end( 

273 self, file: File, programs_start_offset: int, programs_num: int, endian 

274 ) -> int: 

275 last_program_end = 0 

276 file.seek(programs_start_offset) 

277 

278 for _ in range(programs_num): 

279 program_header = self._struct_parser.parse( 

280 self.PROGRAM_HEADER_STRUCT, file, endian 

281 ) 

282 

283 program_end = program_header.p_offset + program_header.p_filesz 

284 last_program_end = max(program_end, last_program_end) 

285 

286 return last_program_end 

287 

288 def get_end_offset(self, file: File, start_offset: int, header, endian) -> int: 

289 # Usually the section header is the last, but in some cases the program headers are 

290 # put to the end of the file, and in some cases sections header and actual sections 

291 # can be also intermixed, so we need also to check the end of the last section and 

292 # also the last program segment. 

293 # We check which one is the last and use it as a file size. 

294 section_headers_end = header.e_shoff + (header.e_shnum * header.e_shentsize) 

295 program_headers_end = header.e_phoff + (header.e_phnum * header.e_phentsize) 

296 

297 last_section_end = self.get_last_section_end( 

298 file, start_offset + header.e_shoff, header.e_shnum, endian 

299 ) 

300 

301 last_program_end = self.get_last_program_end( 

302 file, start_offset + header.e_phoff, header.e_phnum, endian 

303 ) 

304 

305 return start_offset + max( 

306 section_headers_end, program_headers_end, last_section_end, last_program_end 

307 ) 

308 

309 def get_signed_kernel_module_end_offset(self, file: File, end_offset: int) -> int: 

310 # signed kernel modules are ELF files followed by: 

311 # - a PKCS7 signature 

312 # - a module_signature structure 

313 # - a custom footer value '~~Module signature appended~\n~' 

314 # we check if a valid kernel module signature is present after the ELF file 

315 # and returns an end_offset that includes that whole signature part. 

316 

317 file.seek(end_offset, io.SEEK_SET) 

318 for footer_offset in iterate_patterns(file, KERNEL_MODULE_SIGNATURE_FOOTER): 

319 file.seek( 

320 footer_offset - KERNEL_MODULE_SIGNATURE_INFO_LEN, 

321 io.SEEK_SET, 

322 ) 

323 module_signature = self._struct_parser.parse( 

324 "module_signature_t", file, Endian.BIG 

325 ) 

326 logger.debug( 

327 "module_signature_t", 

328 module_signature=module_signature, 

329 _verbosity=3, 

330 ) 

331 if ( 

332 footer_offset 

333 == end_offset 

334 + module_signature.sig_len 

335 + KERNEL_MODULE_SIGNATURE_INFO_LEN 

336 ): 

337 end_offset = footer_offset + len(KERNEL_MODULE_SIGNATURE_FOOTER) 

338 

339 # We stop at the first SIGNATURE FOOTER match 

340 break 

341 

342 return end_offset 

343 

344 def is_upx(self, file: File, start_offset: int, end_offset: int) -> bool: 

345 """Check if UPX magic is present after ELF header.""" 

346 return b"UPX!" in file[start_offset : min(end_offset, start_offset + 4096)] 

347 

348 def get_upx_end_offset(self, file: File, start_offset: int, end_offset: int) -> int: 

349 """Locate UPX footer in ELF file and returns UPX end offset or original end offset.""" 

350 upx_footer = b"\xff\x00\x00\x00\x00UPX!\x00\x00\x00\x00" 

351 for packhead_offset in iterate_patterns(file=file, pattern=upx_footer): 

352 file.seek( 

353 packhead_offset + len(upx_footer), io.SEEK_SET 

354 ) # seek to end of footer 

355 file.seek( 

356 read_until_past(file=file, pattern=b"\x00") 

357 ) # sometimes more NULL bytes are added 

358 packheader = parse_upx_packhead(file) 

359 file_size_compressed = packheader.c_filesize + packheader.size 

360 packhead_is_valid = ( 

361 ( 

362 packheader.magic == b"UPX!" 

363 and packheader.u_filesize1 == packheader.u_filesize2 

364 ) 

365 and (file_size_compressed == file.tell() - start_offset) 

366 and (file_size_compressed % 4 == 0) 

367 ) 

368 if packhead_is_valid: 

369 return start_offset + file_size_compressed 

370 # no matching UPX footer found 

371 return end_offset 

372 

373 def calculate_chunk(self, file: File, start_offset: int) -> ElfChunk | None: 

374 endian = self.get_endianness(file, start_offset) 

375 file.seek(start_offset, io.SEEK_SET) 

376 header = self.parse_header(file, endian) 

377 if not self.is_valid_header(header): 

378 return None 

379 end_offset = self.get_end_offset(file, start_offset, header, endian) 

380 

381 # kernel modules are always relocatable 

382 if header.e_type == lief.ELF.Header.FILE_TYPE.REL.value: 

383 end_offset = self.get_signed_kernel_module_end_offset(file, end_offset) 

384 

385 if self.is_upx(file=file, start_offset=start_offset, end_offset=end_offset): 

386 end_offset = self.get_upx_end_offset(file, start_offset, end_offset) 

387 

388 # do a special extraction of ELF files with ElfChunk 

389 return ElfChunk( 

390 start_offset=start_offset, 

391 end_offset=end_offset, 

392 ) 

393 

394 

395class ELF32Handler(_ELFBase): 

396 NAME = "elf32" 

397 

398 PATTERNS = [ 

399 HexString( 

400 """ 

401 // uint32 e_ident_magic; 

402 7F 45 4C 46 

403 // e_ident_class must be 0x1 (32bit) or 0x2 (64bit) 

404 01 

405 // e_ident_data must be 0x1 (little-endian) or 0x2 (big-endian) 

406 (01 | 02) 

407 // e_ident_version must be 0x1. 

408 01 

409 """ 

410 ) 

411 ] 

412 

413 C_DEFINITIONS = r""" 

414 typedef struct elf_header_32 { 

415 uint32 e_ident_magic; 

416 uint8 e_ident_class; 

417 uint8 e_ident_data; 

418 uint8 e_ident_version; 

419 uint8 e_ident_osabi; 

420 uint8 e_ident_abi_version; 

421 uint8 e_ident_pad[7]; 

422 uint16 e_type; 

423 uint16 e_machine; 

424 uint32 e_version; 

425 uint32 e_entry; 

426 uint32 e_phoff; 

427 uint32 e_shoff; 

428 uint32 e_flags; 

429 uint16 e_ehsize; 

430 uint16 e_phentsize; 

431 uint16 e_phnum; 

432 uint16 e_shentsize; 

433 uint16 e_shnum; 

434 uint16 e_shstrndx; 

435 } elf_header_32_t; 

436 

437 typedef struct elf32_shdr { 

438 uint32 sh_name; 

439 uint32 sh_type; 

440 uint32 sh_flags; 

441 uint32 sh_addr; 

442 uint32 sh_offset; 

443 uint32 sh_size; 

444 uint32 sh_link; 

445 uint32 sh_info; 

446 uint32 sh_addralign; 

447 uint32 sh_entsize; 

448 } elf_shdr_t; 

449 

450 typedef struct elf32_phdr { 

451 uint32 p_type; 

452 uint32 p_offset; 

453 uint32 p_vaddr; 

454 uint32 p_paddr; 

455 uint32 p_filesz; 

456 uint32 p_memsz; 

457 uint32 p_flags; 

458 uint32 p_align; 

459 } elf_phdr_t; 

460 

461 typedef struct module_signature { 

462 uint8 algo; /* Public-key crypto algorithm [0] */ 

463 uint8 hash; /* Digest algorithm [0] */ 

464 uint8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */ 

465 uint8 signer_len; /* Length of signer's name [0] */ 

466 uint8 key_id_len; /* Length of key identifier [0] */ 

467 uint8 __pad[3]; 

468 uint32 sig_len; /* Length of signature data */ 

469 } module_signature_t; 

470 """ 

471 HEADER_STRUCT = "elf_header_32_t" 

472 

473 DOC = HandlerDoc( 

474 name="ELF (32-bit)", 

475 description="The 32-bit ELF (Executable and Linkable Format) is a binary file format used for executables, object code, shared libraries, and core dumps. It supports 32-bit addressing and includes headers for program and section information.", 

476 handler_type=HandlerType.EXECUTABLE, 

477 vendor=None, 

478 references=[ 

479 Reference( 

480 title="ELF File Format Specification", 

481 url="https://refspecs.linuxfoundation.org/elf/elf.pdf", 

482 ), 

483 Reference( 

484 title="ELF Wikipedia", 

485 url="https://en.wikipedia.org/wiki/Executable_and_Linkable_Format", 

486 ), 

487 ], 

488 limitations=[], 

489 ) 

490 

491 

492class ELF64Handler(_ELFBase): 

493 NAME = "elf64" 

494 

495 PATTERNS = [ 

496 HexString( 

497 """ 

498 // uint32 e_ident_magic; 

499 7F 45 4C 46 

500 // e_ident_class must be 0x1 (32bit) or 0x2 (64bit) 

501 02 

502 // e_ident_data must be 0x1 (little-endian) or 0x2 (big-endian) 

503 (01 | 02) 

504 // e_ident_version must be 0x1. 

505 01 

506 """ 

507 ) 

508 ] 

509 

510 C_DEFINITIONS = r""" 

511 typedef struct elf_header_64 { 

512 uint32 e_ident_magic; 

513 uint8 e_ident_class; 

514 uint8 e_ident_data; 

515 uint8 e_ident_version; 

516 uint8 e_ident_osabi; 

517 uint8 e_ident_abi_version; 

518 uint8 e_ident_pad[7]; 

519 uint16 e_type; 

520 uint16 e_machine; 

521 uint32 e_version; 

522 uint64 e_entry; 

523 uint64 e_phoff; 

524 uint64 e_shoff; 

525 uint32 e_flags; 

526 uint16 e_ehsize; 

527 uint16 e_phentsize; 

528 uint16 e_phnum; 

529 uint16 e_shentsize; 

530 uint16 e_shnum; 

531 uint16 e_shstrndx; 

532 } elf_header_64_t; 

533 

534 typedef struct elf64_shdr { 

535 uint32 sh_name; 

536 uint32 sh_type; 

537 uint64 sh_flags; 

538 uint64 sh_addr; 

539 uint64 sh_offset; 

540 uint64 sh_size; 

541 uint32 sh_link; 

542 uint32 sh_info; 

543 uint64 sh_addralign; 

544 uint64 sh_entsize; 

545 } elf_shdr_t; 

546 

547 typedef struct elf64_phdr { 

548 uint32 p_type; 

549 uint32 p_flags; 

550 uint64 p_offset; 

551 uint64 p_vaddr; 

552 uint64 p_paddr; 

553 uint64 p_filesz; 

554 uint64 p_memsz; 

555 uint64 p_align; 

556 } elf_phdr_t; 

557 

558 typedef struct module_signature { 

559 uint8 algo; /* Public-key crypto algorithm [0] */ 

560 uint8 hash; /* Digest algorithm [0] */ 

561 uint8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */ 

562 uint8 signer_len; /* Length of signer's name [0] */ 

563 uint8 key_id_len; /* Length of key identifier [0] */ 

564 uint8 __pad[3]; 

565 uint32 sig_len; /* Length of signature data */ 

566 } module_signature_t; 

567 """ 

568 HEADER_STRUCT = "elf_header_64_t" 

569 

570 DOC = HandlerDoc( 

571 name="ELF (64-bit)", 

572 description="The 64-bit ELF (Executable and Linkable Format) is a binary file format used for executables, object code, shared libraries, and core dumps. It supports 64-bit addressing and includes headers for program and section information.", 

573 handler_type=HandlerType.EXECUTABLE, 

574 vendor=None, 

575 references=[ 

576 Reference( 

577 title="ELF File Format Specification", 

578 url="https://refspecs.linuxfoundation.org/elf/elf.pdf", 

579 ), 

580 Reference( 

581 title="ELF Wikipedia", 

582 url="https://en.wikipedia.org/wiki/Executable_and_Linkable_Format", 

583 ), 

584 ], 

585 limitations=[], 

586 )