Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/unblob/handlers/executable/elf.py: 58%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

190 statements  

1import io 

2import shutil 

3import zlib 

4from pathlib import Path 

5 

6import attrs 

7import lief 

8from structlog import get_logger 

9 

10from unblob.extractor import carve_chunk_to_file 

11from unblob.extractors import Command 

12from unblob.file_utils import ( 

13 Endian, 

14 File, 

15 InvalidInputFormat, 

16 StructParser, 

17 convert_int8, 

18 convert_int32, 

19 convert_int64, 

20 iterate_file, 

21 iterate_patterns, 

22 read_until_past, 

23 round_up, 

24) 

25from unblob.models import ( 

26 HandlerDoc, 

27 HandlerType, 

28 HexString, 

29 Reference, 

30 StructHandler, 

31 ValidChunk, 

32) 

33 

34lief.logging.disable() 

35 

36logger = get_logger() 

37 

38KERNEL_MODULE_SIGNATURE_INFO_LEN = 12 

39KERNEL_MODULE_SIGNATURE_FOOTER = b"~Module signature appended~\n" 

40 

41KERNEL_INIT_DATA_SECTION = ".init.data" 

42 

43QNX_IFS_MARKER = b"\xeb\x7e\xff\x00" 

44QNX_IFS_MARKER_OFFSET = 0x3000 

45 

46# [Ref] https://github.com/upx/upx/blob/devel/src/stub/src/include/linux.h 

47UPX_C_DEFINITIONS = r""" 

48typedef struct packhead{ 

49 char magic[4]; 

50 uint8_t version; 

51 uint8_t format; 

52 uint8_t level; 

53 uint8_t method; 

54 uint64_t unknown1; 

55 uint32_t u_filesize1; 

56 uint32_t c_filesize; 

57 uint32_t u_filesize2; 

58 uint32_t unknown2; 

59 uint32_t l_info_offset; 

60} packhead_t; 

61 

62typedef struct l_info{ 

63 uint32_t l_checksum; 

64 char l_magic[4]; 

65 uint16_t l_lsize; 

66 uint8_t l_version; 

67 uint8_t l_format; 

68} l_info_t; 

69""" 

70upx_parser = StructParser(UPX_C_DEFINITIONS) 

71 

72 

73def parse_upx_packhead(file: File): 

74 return upx_parser.parse("packhead_t", file, Endian.LITTLE) 

75 

76 

77def parse_upx_l_info(file: File): 

78 return upx_parser.parse("l_info_t", file, Endian.LITTLE) 

79 

80 

81@attrs.define(repr=False) 

82class ElfChunk(ValidChunk): 

83 @staticmethod 

84 def upx_checksum_validates(file: File, l_info, elf) -> bool: 

85 size_pack2 = elf.last_offset_segment - l_info.l_lsize 

86 size_aligment = round_up(size_pack2, 4) # Forces to be mod 4 

87 xct_off = any(section.name == "init" for section in elf.sections) 

88 size_aligment += (4 & size_aligment) ^ (int(bool(xct_off)) << 2) # 4 or 0 

89 size_aligment += 8 # Added 2 times 4 byte (size of disp) 

90 if xct_off: 

91 size_aligment += 12 

92 alignment = size_aligment - size_pack2 

93 checksum_offset = elf.last_offset_segment - (l_info.l_lsize - alignment) 

94 file.seek(checksum_offset, io.SEEK_SET) 

95 adler32_checksum = 1 

96 for chunk in iterate_file(file, checksum_offset, l_info.l_lsize - alignment): 

97 adler32_checksum = zlib.adler32(chunk, adler32_checksum) 

98 return adler32_checksum == l_info.l_checksum 

99 

100 def is_valid_upx(self, inpath: Path, elf) -> bool: 

101 file = File.from_path(inpath) 

102 file.seek(-4, io.SEEK_END) # last 4 bytes indicates where linfo ends 

103 l_info_start_offset = abs( 

104 convert_int32(file.read(4), Endian.LITTLE) 

105 - upx_parser.cparser_le.l_info_t.size 

106 ) 

107 if l_info_start_offset > file.size(): 

108 return False 

109 file.seek(l_info_start_offset, io.SEEK_SET) 

110 upx_header = parse_upx_l_info(file) 

111 if upx_header.l_magic != b"UPX!": # Magic 

112 return False 

113 if not self.upx_checksum_validates(file, upx_header, elf): 

114 raise InvalidInputFormat("Invalid UPX checksum") 

115 return True 

116 

117 def extract(self, inpath: Path, outdir: Path): 

118 # ELF file extraction is special in that in the general case no new files are extracted, thus 

119 # when we want to clean up all carves to save place, carved ELF files would be deleted as well, 

120 # however we want to keep carved out ELF files, as they are the interesting stuff! 

121 elf = lief.ELF.parse(str(inpath)) 

122 

123 if elf is None: 

124 logger.error( 

125 "Trying to extract an invalid ELF file.", inpath=inpath, outdir=outdir 

126 ) 

127 return 

128 

129 is_kernel = ( 

130 elf.header.file_type == lief.ELF.Header.FILE_TYPE.EXEC 

131 and elf.has_section(KERNEL_INIT_DATA_SECTION) 

132 ) 

133 if is_kernel: 

134 with File.from_path(inpath) as file: 

135 extract_initramfs(elf, file, outdir) 

136 

137 elif self.is_valid_upx(inpath=inpath, elf=elf): 

138 extract_upx(inpath, outdir) 

139 

140 elif not self.is_whole_file: 

141 # make a copy, and let the carved chunk be deleted 

142 outdir.mkdir(parents=True, exist_ok=False) 

143 shutil.copy2(inpath, outdir / "carved.elf") 

144 # more work will be done, when outdir is picked up by processing, 

145 # and the ELF file is processed as a whole file. 

146 # As a performance side effect, ELF files will be searched for chunks twice. 

147 # Even though the second chunk search one is short-circuited, 

148 # because the ELF handler will recognize it as a whole file 

149 # other handlers might burn some cycles on the file as well. 

150 

151 

152def extract_upx(inpath: Path, outdir: Path): 

153 extractor = Command("upx", "-d", "{inpath}", "-o{outdir}/{inpath.stem}.elf") 

154 outdir.mkdir(parents=True, exist_ok=False) 

155 extractor.extract(inpath, outdir) 

156 

157 

158def extract_initramfs(elf, file: File, outdir): 

159 """Extract the initramfs part, with a potentially 4 extra bytes. 

160 

161 Due to alignment definition of initramfs the start-end offsets can not be exactly calculated, 

162 so the output could have a 4 extra bytes before or after the initramfs. 

163 """ 

164 if not elf.has_section(KERNEL_INIT_DATA_SECTION): 

165 return 

166 

167 init_data = elf.get_section(KERNEL_INIT_DATA_SECTION) 

168 

169 if not init_data.size: 

170 return 

171 

172 is_64bit = elf.header.identity_class == lief.ELF.Header.CLASS.ELF64 

173 endian = ( 

174 Endian.LITTLE 

175 if elf.header.identity_data == lief.ELF.Header.ELF_DATA.LSB 

176 else Endian.BIG 

177 ) 

178 

179 init_data_end_offset = init_data.file_offset + init_data.size 

180 

181 # initramfs size is at the end of the section either 64bit or 32bit depending on the platform 

182 # see usr/initramfs_data.S in the kernel 

183 # The size is padded to 8 bytes, see include/asm-generic/vmlinux.lds.h 

184 # The actual initramfs is right before the size 

185 if is_64bit: 

186 initramfs_size_offset = init_data.file_offset + init_data.size - 8 

187 initramfs_size = convert_int64( 

188 file[initramfs_size_offset:init_data_end_offset], 

189 endian=endian, 

190 ) 

191 else: 

192 initramfs_size_offset = init_data.file_offset + init_data.size - 4 

193 initramfs_size = convert_int32( 

194 file[initramfs_size_offset:init_data_end_offset], 

195 endian=endian, 

196 ) 

197 

198 # initramfs start is aligned to 4 bytes, initramfs_size_offset is aligned to 8 bytes 

199 # this is unfortunate, as we do not know the start, only the padded end 

200 # unfortunately we have two valid values for the padding of the initramfs end: 

201 # 0 and 4, 1 and 5, 2 and 6, 3 and 7 

202 # let's calculate the offsets for the smaller padding values 

203 initramfs_start = initramfs_size_offset - round_up(initramfs_size, 4) 

204 initramfs_end = initramfs_start + initramfs_size 

205 padding = initramfs_size_offset - initramfs_end 

206 

207 # initramfs can be turned off (https://www.linux.com/training-tutorials/kernel-newbie-corner-initrd-and-initramfs-whats/) 

208 # in which case the above calculations most probably end up with bogus chunk offsets 

209 if not ( 

210 init_data.file_offset <= initramfs_start < initramfs_end <= init_data_end_offset 

211 and (bytes(padding) == file[initramfs_end:initramfs_size_offset]) 

212 ): 

213 return 

214 

215 # when bigger padding is also a possibility, include 4 more bytes from the beginning 

216 if (init_data.file_offset <= initramfs_start - 4) and ( 

217 bytes(padding + 4) == file[initramfs_end - 4 : initramfs_size_offset] 

218 ): 

219 initramfs_start -= 4 

220 

221 carve_chunk_to_file( 

222 outdir / "initramfs", 

223 file, 

224 ValidChunk(start_offset=initramfs_start, end_offset=initramfs_end), 

225 ) 

226 

227 

228class _ELFBase(StructHandler): 

229 EXTRACTOR = None 

230 SECTION_HEADER_STRUCT = "elf_shdr_t" 

231 PROGRAM_HEADER_STRUCT = "elf_phdr_t" 

232 

233 def is_valid_header(self, header) -> bool: 

234 # check that header fields have valid values 

235 try: 

236 lief.ELF.Header.FILE_TYPE(header.e_type) 

237 lief.ELF.ARCH(header.e_machine) 

238 lief.ELF.Header.VERSION(header.e_version) 

239 except ValueError: 

240 return False 

241 return True 

242 

243 @staticmethod 

244 def get_endianness(file: File, start_offset: int) -> Endian: 

245 file.seek(start_offset + 5, io.SEEK_SET) 

246 e_ident_data = convert_int8(file.read(1), Endian.LITTLE) 

247 return Endian.LITTLE if e_ident_data == 0x1 else Endian.BIG 

248 

249 def get_last_section_end( 

250 self, file: File, sections_start_offset: int, sections_num: int, endian 

251 ) -> int: 

252 last_section_end = 0 

253 file.seek(sections_start_offset) 

254 

255 for _ in range(sections_num): 

256 section_header = self._struct_parser.parse( 

257 self.SECTION_HEADER_STRUCT, file, endian 

258 ) 

259 

260 try: 

261 if ( 

262 lief.ELF.Section.TYPE(section_header.sh_type) 

263 == lief.ELF.Section.TYPE.NOBITS 

264 ): 

265 continue 

266 except ValueError: 

267 continue 

268 

269 section_end = section_header.sh_offset + section_header.sh_size 

270 last_section_end = max(section_end, last_section_end) 

271 

272 return last_section_end 

273 

274 def get_last_program_end( 

275 self, file: File, programs_start_offset: int, programs_num: int, endian 

276 ) -> int: 

277 last_program_end = 0 

278 file.seek(programs_start_offset) 

279 

280 for _ in range(programs_num): 

281 program_header = self._struct_parser.parse( 

282 self.PROGRAM_HEADER_STRUCT, file, endian 

283 ) 

284 

285 program_end = program_header.p_offset + program_header.p_filesz 

286 last_program_end = max(program_end, last_program_end) 

287 

288 return last_program_end 

289 

290 def get_end_offset(self, file: File, start_offset: int, header, endian) -> int: 

291 # Usually the section header is the last, but in some cases the program headers are 

292 # put to the end of the file, and in some cases sections header and actual sections 

293 # can be also intermixed, so we need also to check the end of the last section and 

294 # also the last program segment. 

295 # We check which one is the last and use it as a file size. 

296 section_headers_end = header.e_shoff + (header.e_shnum * header.e_shentsize) 

297 program_headers_end = header.e_phoff + (header.e_phnum * header.e_phentsize) 

298 

299 last_section_end = self.get_last_section_end( 

300 file, start_offset + header.e_shoff, header.e_shnum, endian 

301 ) 

302 

303 last_program_end = self.get_last_program_end( 

304 file, start_offset + header.e_phoff, header.e_phnum, endian 

305 ) 

306 

307 return start_offset + max( 

308 section_headers_end, program_headers_end, last_section_end, last_program_end 

309 ) 

310 

311 def get_signed_kernel_module_end_offset(self, file: File, end_offset: int) -> int: 

312 # signed kernel modules are ELF files followed by: 

313 # - a PKCS7 signature 

314 # - a module_signature structure 

315 # - a custom footer value '~~Module signature appended~\n~' 

316 # we check if a valid kernel module signature is present after the ELF file 

317 # and returns an end_offset that includes that whole signature part. 

318 

319 file.seek(end_offset, io.SEEK_SET) 

320 for footer_offset in iterate_patterns(file, KERNEL_MODULE_SIGNATURE_FOOTER): 

321 file.seek( 

322 footer_offset - KERNEL_MODULE_SIGNATURE_INFO_LEN, 

323 io.SEEK_SET, 

324 ) 

325 module_signature = self._struct_parser.parse( 

326 "module_signature_t", file, Endian.BIG 

327 ) 

328 logger.debug( 

329 "module_signature_t", 

330 module_signature=module_signature, 

331 _verbosity=3, 

332 ) 

333 if ( 

334 footer_offset 

335 == end_offset 

336 + module_signature.sig_len 

337 + KERNEL_MODULE_SIGNATURE_INFO_LEN 

338 ): 

339 end_offset = footer_offset + len(KERNEL_MODULE_SIGNATURE_FOOTER) 

340 

341 # We stop at the first SIGNATURE FOOTER match 

342 break 

343 

344 return end_offset 

345 

346 def is_upx(self, file: File, start_offset: int, end_offset: int) -> bool: 

347 """Check if UPX magic is present after ELF header.""" 

348 return b"UPX!" in file[start_offset : min(end_offset, start_offset + 4096)] 

349 

350 def get_upx_end_offset(self, file: File, start_offset: int, end_offset: int) -> int: 

351 """Locate UPX footer in ELF file and returns UPX end offset or original end offset.""" 

352 upx_footer = b"\xff\x00\x00\x00\x00UPX!\x00\x00\x00\x00" 

353 for packhead_offset in iterate_patterns(file=file, pattern=upx_footer): 

354 file.seek( 

355 packhead_offset + len(upx_footer), io.SEEK_SET 

356 ) # seek to end of footer 

357 file.seek( 

358 read_until_past(file=file, pattern=b"\x00") 

359 ) # sometimes more NULL bytes are added 

360 packheader = parse_upx_packhead(file) 

361 file_size_compressed = packheader.c_filesize + packheader.size 

362 packhead_is_valid = ( 

363 ( 

364 packheader.magic == b"UPX!" 

365 and packheader.u_filesize1 == packheader.u_filesize2 

366 ) 

367 and (file_size_compressed == file.tell() - start_offset) 

368 and (file_size_compressed % 4 == 0) 

369 ) 

370 if packhead_is_valid: 

371 return start_offset + file_size_compressed 

372 # no matching UPX footer found 

373 return end_offset 

374 

375 def calculate_chunk(self, file: File, start_offset: int) -> ElfChunk | None: 

376 endian = self.get_endianness(file, start_offset) 

377 file.seek(start_offset, io.SEEK_SET) 

378 header = self.parse_header(file, endian) 

379 if not self.is_valid_header(header): 

380 return None 

381 

382 qnx_marker_pos = start_offset + QNX_IFS_MARKER_OFFSET 

383 if file[qnx_marker_pos : qnx_marker_pos + 4] == QNX_IFS_MARKER: 

384 logger.info("QNX IFS embedded in ELF identified, bailing out.") 

385 return None 

386 end_offset = self.get_end_offset(file, start_offset, header, endian) 

387 

388 # kernel modules are always relocatable 

389 if header.e_type == lief.ELF.Header.FILE_TYPE.REL.value: 

390 end_offset = self.get_signed_kernel_module_end_offset(file, end_offset) 

391 

392 if self.is_upx(file=file, start_offset=start_offset, end_offset=end_offset): 

393 end_offset = self.get_upx_end_offset(file, start_offset, end_offset) 

394 

395 # do a special extraction of ELF files with ElfChunk 

396 return ElfChunk( 

397 start_offset=start_offset, 

398 end_offset=end_offset, 

399 ) 

400 

401 

402class ELF32Handler(_ELFBase): 

403 NAME = "elf32" 

404 

405 PATTERNS = [ 

406 HexString( 

407 """ 

408 // uint32 e_ident_magic; 

409 7F 45 4C 46 

410 // e_ident_class must be 0x1 (32bit) or 0x2 (64bit) 

411 01 

412 // e_ident_data must be 0x1 (little-endian) or 0x2 (big-endian) 

413 (01 | 02) 

414 // e_ident_version must be 0x1. 

415 01 

416 """ 

417 ) 

418 ] 

419 

420 C_DEFINITIONS = r""" 

421 typedef struct elf_header_32 { 

422 uint32 e_ident_magic; 

423 uint8 e_ident_class; 

424 uint8 e_ident_data; 

425 uint8 e_ident_version; 

426 uint8 e_ident_osabi; 

427 uint8 e_ident_abi_version; 

428 uint8 e_ident_pad[7]; 

429 uint16 e_type; 

430 uint16 e_machine; 

431 uint32 e_version; 

432 uint32 e_entry; 

433 uint32 e_phoff; 

434 uint32 e_shoff; 

435 uint32 e_flags; 

436 uint16 e_ehsize; 

437 uint16 e_phentsize; 

438 uint16 e_phnum; 

439 uint16 e_shentsize; 

440 uint16 e_shnum; 

441 uint16 e_shstrndx; 

442 } elf_header_32_t; 

443 

444 typedef struct elf32_shdr { 

445 uint32 sh_name; 

446 uint32 sh_type; 

447 uint32 sh_flags; 

448 uint32 sh_addr; 

449 uint32 sh_offset; 

450 uint32 sh_size; 

451 uint32 sh_link; 

452 uint32 sh_info; 

453 uint32 sh_addralign; 

454 uint32 sh_entsize; 

455 } elf_shdr_t; 

456 

457 typedef struct elf32_phdr { 

458 uint32 p_type; 

459 uint32 p_offset; 

460 uint32 p_vaddr; 

461 uint32 p_paddr; 

462 uint32 p_filesz; 

463 uint32 p_memsz; 

464 uint32 p_flags; 

465 uint32 p_align; 

466 } elf_phdr_t; 

467 

468 typedef struct module_signature { 

469 uint8 algo; /* Public-key crypto algorithm [0] */ 

470 uint8 hash; /* Digest algorithm [0] */ 

471 uint8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */ 

472 uint8 signer_len; /* Length of signer's name [0] */ 

473 uint8 key_id_len; /* Length of key identifier [0] */ 

474 uint8 __pad[3]; 

475 uint32 sig_len; /* Length of signature data */ 

476 } module_signature_t; 

477 """ 

478 HEADER_STRUCT = "elf_header_32_t" 

479 

480 DOC = HandlerDoc( 

481 name="ELF (32-bit)", 

482 description="The 32-bit ELF (Executable and Linkable Format) is a binary file format used for executables, object code, shared libraries, and core dumps. It supports 32-bit addressing and includes headers for program and section information.", 

483 handler_type=HandlerType.EXECUTABLE, 

484 vendor=None, 

485 references=[ 

486 Reference( 

487 title="ELF File Format Specification", 

488 url="https://refspecs.linuxfoundation.org/elf/elf.pdf", 

489 ), 

490 Reference( 

491 title="ELF Wikipedia", 

492 url="https://en.wikipedia.org/wiki/Executable_and_Linkable_Format", 

493 ), 

494 ], 

495 limitations=[], 

496 ) 

497 

498 

499class ELF64Handler(_ELFBase): 

500 NAME = "elf64" 

501 

502 PATTERNS = [ 

503 HexString( 

504 """ 

505 // uint32 e_ident_magic; 

506 7F 45 4C 46 

507 // e_ident_class must be 0x1 (32bit) or 0x2 (64bit) 

508 02 

509 // e_ident_data must be 0x1 (little-endian) or 0x2 (big-endian) 

510 (01 | 02) 

511 // e_ident_version must be 0x1. 

512 01 

513 """ 

514 ) 

515 ] 

516 

517 C_DEFINITIONS = r""" 

518 typedef struct elf_header_64 { 

519 uint32 e_ident_magic; 

520 uint8 e_ident_class; 

521 uint8 e_ident_data; 

522 uint8 e_ident_version; 

523 uint8 e_ident_osabi; 

524 uint8 e_ident_abi_version; 

525 uint8 e_ident_pad[7]; 

526 uint16 e_type; 

527 uint16 e_machine; 

528 uint32 e_version; 

529 uint64 e_entry; 

530 uint64 e_phoff; 

531 uint64 e_shoff; 

532 uint32 e_flags; 

533 uint16 e_ehsize; 

534 uint16 e_phentsize; 

535 uint16 e_phnum; 

536 uint16 e_shentsize; 

537 uint16 e_shnum; 

538 uint16 e_shstrndx; 

539 } elf_header_64_t; 

540 

541 typedef struct elf64_shdr { 

542 uint32 sh_name; 

543 uint32 sh_type; 

544 uint64 sh_flags; 

545 uint64 sh_addr; 

546 uint64 sh_offset; 

547 uint64 sh_size; 

548 uint32 sh_link; 

549 uint32 sh_info; 

550 uint64 sh_addralign; 

551 uint64 sh_entsize; 

552 } elf_shdr_t; 

553 

554 typedef struct elf64_phdr { 

555 uint32 p_type; 

556 uint32 p_flags; 

557 uint64 p_offset; 

558 uint64 p_vaddr; 

559 uint64 p_paddr; 

560 uint64 p_filesz; 

561 uint64 p_memsz; 

562 uint64 p_align; 

563 } elf_phdr_t; 

564 

565 typedef struct module_signature { 

566 uint8 algo; /* Public-key crypto algorithm [0] */ 

567 uint8 hash; /* Digest algorithm [0] */ 

568 uint8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */ 

569 uint8 signer_len; /* Length of signer's name [0] */ 

570 uint8 key_id_len; /* Length of key identifier [0] */ 

571 uint8 __pad[3]; 

572 uint32 sig_len; /* Length of signature data */ 

573 } module_signature_t; 

574 """ 

575 HEADER_STRUCT = "elf_header_64_t" 

576 

577 DOC = HandlerDoc( 

578 name="ELF (64-bit)", 

579 description="The 64-bit ELF (Executable and Linkable Format) is a binary file format used for executables, object code, shared libraries, and core dumps. It supports 64-bit addressing and includes headers for program and section information.", 

580 handler_type=HandlerType.EXECUTABLE, 

581 vendor=None, 

582 references=[ 

583 Reference( 

584 title="ELF File Format Specification", 

585 url="https://refspecs.linuxfoundation.org/elf/elf.pdf", 

586 ), 

587 Reference( 

588 title="ELF Wikipedia", 

589 url="https://en.wikipedia.org/wiki/Executable_and_Linkable_Format", 

590 ), 

591 ], 

592 limitations=[], 

593 )