Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/unblob/handlers/executable/elf.py: 56%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

185 statements  

1import io 

2import shutil 

3import zlib 

4from pathlib import Path 

5from typing import Optional 

6 

7import attrs 

8import lief 

9from structlog import get_logger 

10 

11from unblob.extractor import carve_chunk_to_file 

12from unblob.extractors import Command 

13from unblob.file_utils import ( 

14 Endian, 

15 File, 

16 InvalidInputFormat, 

17 StructParser, 

18 convert_int8, 

19 convert_int32, 

20 convert_int64, 

21 iterate_file, 

22 iterate_patterns, 

23 read_until_past, 

24 round_up, 

25) 

26from unblob.models import ( 

27 HandlerDoc, 

28 HandlerType, 

29 HexString, 

30 Reference, 

31 StructHandler, 

32 ValidChunk, 

33) 

34 

35lief.logging.disable() 

36 

37logger = get_logger() 

38 

39KERNEL_MODULE_SIGNATURE_INFO_LEN = 12 

40KERNEL_MODULE_SIGNATURE_FOOTER = b"~Module signature appended~\n" 

41 

42KERNEL_INIT_DATA_SECTION = ".init.data" 

43 

44 

45# [Ref] https://github.com/upx/upx/blob/devel/src/stub/src/include/linux.h 

46UPX_C_DEFINITIONS = r""" 

47typedef struct packhead{ 

48 char magic[4]; 

49 uint8_t version; 

50 uint8_t format; 

51 uint8_t level; 

52 uint8_t method; 

53 uint64_t unknown1; 

54 uint32_t u_filesize1; 

55 uint32_t c_filesize; 

56 uint32_t u_filesize2; 

57 uint32_t unknown2; 

58 uint32_t l_info_offset; 

59} packhead_t; 

60 

61typedef struct l_info{ 

62 uint32_t l_checksum; 

63 char l_magic[4]; 

64 uint16_t l_lsize; 

65 uint8_t l_version; 

66 uint8_t l_format; 

67} l_info_t; 

68""" 

69upx_parser = StructParser(UPX_C_DEFINITIONS) 

70 

71 

72def parse_upx_packhead(file: File): 

73 return upx_parser.parse("packhead_t", file, Endian.LITTLE) 

74 

75 

76def parse_upx_l_info(file: File): 

77 return upx_parser.parse("l_info_t", file, Endian.LITTLE) 

78 

79 

80@attrs.define(repr=False) 

81class ElfChunk(ValidChunk): 

82 @staticmethod 

83 def upx_checksum_validates(file: File, l_info, elf) -> bool: 

84 size_pack2 = elf.last_offset_segment - l_info.l_lsize 

85 size_aligment = round_up(size_pack2, 4) # Forces to be mod 4 

86 xct_off = any(section.name == "init" for section in elf.sections) 

87 size_aligment += (4 & size_aligment) ^ (int(bool(xct_off)) << 2) # 4 or 0 

88 size_aligment += 8 # Added 2 times 4 byte (size of disp) 

89 if xct_off: 

90 size_aligment += 12 

91 alignment = size_aligment - size_pack2 

92 checksum_offset = elf.last_offset_segment - (l_info.l_lsize - alignment) 

93 file.seek(checksum_offset, io.SEEK_SET) 

94 adler32_checksum = 1 

95 for chunk in iterate_file(file, checksum_offset, l_info.l_lsize - alignment): 

96 adler32_checksum = zlib.adler32(chunk, adler32_checksum) 

97 return adler32_checksum == l_info.l_checksum 

98 

99 def is_valid_upx(self, inpath: Path, elf) -> bool: 

100 file = File.from_path(inpath) 

101 file.seek(-4, io.SEEK_END) # last 4 bytes indicates where linfo ends 

102 l_info_start_offset = abs( 

103 convert_int32(file.read(4), Endian.LITTLE) 

104 - upx_parser.cparser_le.l_info_t.size 

105 ) 

106 if l_info_start_offset > file.size(): 

107 return False 

108 file.seek(l_info_start_offset, io.SEEK_SET) 

109 upx_header = parse_upx_l_info(file) 

110 if upx_header.l_magic != b"UPX!": # Magic 

111 return False 

112 if not self.upx_checksum_validates(file, upx_header, elf): 

113 raise InvalidInputFormat("Invalid UPX checksum") 

114 return True 

115 

116 def extract(self, inpath: Path, outdir: Path): 

117 # ELF file extraction is special in that in the general case no new files are extracted, thus 

118 # when we want to clean up all carves to save place, carved ELF files would be deleted as well, 

119 # however we want to keep carved out ELF files, as they are the interesting stuff! 

120 elf = lief.ELF.parse(str(inpath)) 

121 

122 if elf is None: 

123 logger.error( 

124 "Trying to extract an invalid ELF file.", inpath=inpath, outdir=outdir 

125 ) 

126 return 

127 

128 is_kernel = ( 

129 elf.header.file_type == lief.ELF.Header.FILE_TYPE.EXEC 

130 and elf.has_section(KERNEL_INIT_DATA_SECTION) 

131 ) 

132 if is_kernel: 

133 with File.from_path(inpath) as file: 

134 extract_initramfs(elf, file, outdir) 

135 

136 elif self.is_valid_upx(inpath=inpath, elf=elf): 

137 extract_upx(inpath, outdir) 

138 

139 elif not self.is_whole_file: 

140 # make a copy, and let the carved chunk be deleted 

141 outdir.mkdir(parents=True, exist_ok=False) 

142 shutil.copy2(inpath, outdir / "carved.elf") 

143 # more work will be done, when outdir is picked up by processing, 

144 # and the ELF file is processed as a whole file. 

145 # As a performance side effect, ELF files will be searched for chunks twice. 

146 # Even though the second chunk search one is short-circuited, 

147 # because the ELF handler will recognize it as a whole file 

148 # other handlers might burn some cycles on the file as well. 

149 

150 

151def extract_upx(inpath: Path, outdir: Path): 

152 extractor = Command("upx", "-d", "{inpath}", "-o{outdir}/{inpath.stem}.elf") 

153 outdir.mkdir(parents=True, exist_ok=False) 

154 extractor.extract(inpath, outdir) 

155 

156 

157def extract_initramfs(elf, file: File, outdir): 

158 """Extract the initramfs part, with a potentially 4 extra bytes. 

159 

160 Due to alignment definition of initramfs the start-end offsets can not be exactly calculated, 

161 so the output could have a 4 extra bytes before or after the initramfs. 

162 """ 

163 if not elf.has_section(KERNEL_INIT_DATA_SECTION): 

164 return 

165 

166 init_data = elf.get_section(KERNEL_INIT_DATA_SECTION) 

167 

168 if not init_data.size: 

169 return 

170 

171 is_64bit = elf.header.identity_class == lief.ELF.Header.CLASS.ELF64 

172 endian = ( 

173 Endian.LITTLE 

174 if elf.header.identity_data == lief.ELF.Header.ELF_DATA.LSB 

175 else Endian.BIG 

176 ) 

177 

178 init_data_end_offset = init_data.file_offset + init_data.size 

179 

180 # initramfs size is at the end of the section either 64bit or 32bit depending on the platform 

181 # see usr/initramfs_data.S in the kernel 

182 # The size is padded to 8 bytes, see include/asm-generic/vmlinux.lds.h 

183 # The actual initramfs is right before the size 

184 if is_64bit: 

185 initramfs_size_offset = init_data.file_offset + init_data.size - 8 

186 initramfs_size = convert_int64( 

187 file[initramfs_size_offset:init_data_end_offset], 

188 endian=endian, 

189 ) 

190 else: 

191 initramfs_size_offset = init_data.file_offset + init_data.size - 4 

192 initramfs_size = convert_int32( 

193 file[initramfs_size_offset:init_data_end_offset], 

194 endian=endian, 

195 ) 

196 

197 # initramfs start is aligned to 4 bytes, initramfs_size_offset is aligned to 8 bytes 

198 # this is unfortunate, as we do not know the start, only the padded end 

199 # unfortunately we have two valid values for the padding of the initramfs end: 

200 # 0 and 4, 1 and 5, 2 and 6, 3 and 7 

201 # let's calculate the offsets for the smaller padding values 

202 initramfs_start = initramfs_size_offset - round_up(initramfs_size, 4) 

203 initramfs_end = initramfs_start + initramfs_size 

204 padding = initramfs_size_offset - initramfs_end 

205 

206 # initramfs can be turned off (https://www.linux.com/training-tutorials/kernel-newbie-corner-initrd-and-initramfs-whats/) 

207 # in which case the above calculations most probably end up with bogus chunk offsets 

208 if not ( 

209 init_data.file_offset <= initramfs_start < initramfs_end <= init_data_end_offset 

210 and (bytes(padding) == file[initramfs_end:initramfs_size_offset]) 

211 ): 

212 return 

213 

214 # when bigger padding is also a possibility, include 4 more bytes from the beginning 

215 if (init_data.file_offset <= initramfs_start - 4) and ( 

216 bytes(padding + 4) == file[initramfs_end - 4 : initramfs_size_offset] 

217 ): 

218 initramfs_start -= 4 

219 

220 carve_chunk_to_file( 

221 outdir / "initramfs", 

222 file, 

223 ValidChunk(start_offset=initramfs_start, end_offset=initramfs_end), 

224 ) 

225 

226 

227class _ELFBase(StructHandler): 

228 EXTRACTOR = None 

229 SECTION_HEADER_STRUCT = "elf_shdr_t" 

230 PROGRAM_HEADER_STRUCT = "elf_phdr_t" 

231 

232 def is_valid_header(self, header) -> bool: 

233 # check that header fields have valid values 

234 try: 

235 lief.ELF.Header.FILE_TYPE(header.e_type) 

236 lief.ELF.ARCH(header.e_machine) 

237 lief.ELF.Header.VERSION(header.e_version) 

238 except ValueError: 

239 return False 

240 return True 

241 

242 @staticmethod 

243 def get_endianness(file: File, start_offset: int) -> Endian: 

244 file.seek(start_offset + 5, io.SEEK_SET) 

245 e_ident_data = convert_int8(file.read(1), Endian.LITTLE) 

246 return Endian.LITTLE if e_ident_data == 0x1 else Endian.BIG 

247 

248 def get_last_section_end( 

249 self, file: File, sections_start_offset: int, sections_num: int, endian 

250 ) -> int: 

251 last_section_end = 0 

252 file.seek(sections_start_offset) 

253 

254 for _ in range(sections_num): 

255 section_header = self._struct_parser.parse( 

256 self.SECTION_HEADER_STRUCT, file, endian 

257 ) 

258 

259 try: 

260 if ( 

261 lief.ELF.Section.TYPE(section_header.sh_type) 

262 == lief.ELF.Section.TYPE.NOBITS 

263 ): 

264 continue 

265 except ValueError: 

266 continue 

267 

268 section_end = section_header.sh_offset + section_header.sh_size 

269 last_section_end = max(section_end, last_section_end) 

270 

271 return last_section_end 

272 

273 def get_last_program_end( 

274 self, file: File, programs_start_offset: int, programs_num: int, endian 

275 ) -> int: 

276 last_program_end = 0 

277 file.seek(programs_start_offset) 

278 

279 for _ in range(programs_num): 

280 program_header = self._struct_parser.parse( 

281 self.PROGRAM_HEADER_STRUCT, file, endian 

282 ) 

283 

284 program_end = program_header.p_offset + program_header.p_filesz 

285 last_program_end = max(program_end, last_program_end) 

286 

287 return last_program_end 

288 

289 def get_end_offset(self, file: File, start_offset: int, header, endian) -> int: 

290 # Usually the section header is the last, but in some cases the program headers are 

291 # put to the end of the file, and in some cases sections header and actual sections 

292 # can be also intermixed, so we need also to check the end of the last section and 

293 # also the last program segment. 

294 # We check which one is the last and use it as a file size. 

295 section_headers_end = header.e_shoff + (header.e_shnum * header.e_shentsize) 

296 program_headers_end = header.e_phoff + (header.e_phnum * header.e_phentsize) 

297 

298 last_section_end = self.get_last_section_end( 

299 file, start_offset + header.e_shoff, header.e_shnum, endian 

300 ) 

301 

302 last_program_end = self.get_last_program_end( 

303 file, start_offset + header.e_phoff, header.e_phnum, endian 

304 ) 

305 

306 return start_offset + max( 

307 section_headers_end, program_headers_end, last_section_end, last_program_end 

308 ) 

309 

310 def get_signed_kernel_module_end_offset(self, file: File, end_offset: int) -> int: 

311 # signed kernel modules are ELF files followed by: 

312 # - a PKCS7 signature 

313 # - a module_signature structure 

314 # - a custom footer value '~~Module signature appended~\n~' 

315 # we check if a valid kernel module signature is present after the ELF file 

316 # and returns an end_offset that includes that whole signature part. 

317 

318 file.seek(end_offset, io.SEEK_SET) 

319 for footer_offset in iterate_patterns(file, KERNEL_MODULE_SIGNATURE_FOOTER): 

320 file.seek( 

321 footer_offset - KERNEL_MODULE_SIGNATURE_INFO_LEN, 

322 io.SEEK_SET, 

323 ) 

324 module_signature = self._struct_parser.parse( 

325 "module_signature_t", file, Endian.BIG 

326 ) 

327 logger.debug( 

328 "module_signature_t", 

329 module_signature=module_signature, 

330 _verbosity=3, 

331 ) 

332 if ( 

333 footer_offset 

334 == end_offset 

335 + module_signature.sig_len 

336 + KERNEL_MODULE_SIGNATURE_INFO_LEN 

337 ): 

338 end_offset = footer_offset + len(KERNEL_MODULE_SIGNATURE_FOOTER) 

339 

340 # We stop at the first SIGNATURE FOOTER match 

341 break 

342 

343 return end_offset 

344 

345 def is_upx(self, file: File, start_offset: int, end_offset: int) -> bool: 

346 """Check if UPX magic is present after ELF header.""" 

347 return b"UPX!" in file[start_offset : min(end_offset, start_offset + 4096)] 

348 

349 def get_upx_end_offset(self, file: File, start_offset: int, end_offset: int) -> int: 

350 """Locate UPX footer in ELF file and returns UPX end offset or original end offset.""" 

351 upx_footer = b"\xff\x00\x00\x00\x00UPX!\x00\x00\x00\x00" 

352 for packhead_offset in iterate_patterns(file=file, pattern=upx_footer): 

353 file.seek( 

354 packhead_offset + len(upx_footer), io.SEEK_SET 

355 ) # seek to end of footer 

356 file.seek( 

357 read_until_past(file=file, pattern=b"\x00") 

358 ) # sometimes more NULL bytes are added 

359 packheader = parse_upx_packhead(file) 

360 file_size_compressed = packheader.c_filesize + packheader.size 

361 packhead_is_valid = ( 

362 ( 

363 packheader.magic == b"UPX!" 

364 and packheader.u_filesize1 == packheader.u_filesize2 

365 ) 

366 and (file_size_compressed == file.tell() - start_offset) 

367 and (file_size_compressed % 4 == 0) 

368 ) 

369 if packhead_is_valid: 

370 return start_offset + file_size_compressed 

371 # no matching UPX footer found 

372 return end_offset 

373 

374 def calculate_chunk(self, file: File, start_offset: int) -> Optional[ElfChunk]: 

375 endian = self.get_endianness(file, start_offset) 

376 file.seek(start_offset, io.SEEK_SET) 

377 header = self.parse_header(file, endian) 

378 if not self.is_valid_header(header): 

379 return None 

380 end_offset = self.get_end_offset(file, start_offset, header, endian) 

381 

382 # kernel modules are always relocatable 

383 if header.e_type == lief.ELF.Header.FILE_TYPE.REL.value: 

384 end_offset = self.get_signed_kernel_module_end_offset(file, end_offset) 

385 

386 if self.is_upx(file=file, start_offset=start_offset, end_offset=end_offset): 

387 end_offset = self.get_upx_end_offset(file, start_offset, end_offset) 

388 

389 # do a special extraction of ELF files with ElfChunk 

390 return ElfChunk( 

391 start_offset=start_offset, 

392 end_offset=end_offset, 

393 ) 

394 

395 

396class ELF32Handler(_ELFBase): 

397 NAME = "elf32" 

398 

399 PATTERNS = [ 

400 HexString( 

401 """ 

402 // uint32 e_ident_magic; 

403 7F 45 4C 46 

404 // e_ident_class must be 0x1 (32bit) or 0x2 (64bit) 

405 01 

406 // e_ident_data must be 0x1 (little-endian) or 0x2 (big-endian) 

407 (01 | 02) 

408 // e_ident_version must be 0x1. 

409 01 

410 """ 

411 ) 

412 ] 

413 

414 C_DEFINITIONS = r""" 

415 typedef struct elf_header_32 { 

416 uint32 e_ident_magic; 

417 uint8 e_ident_class; 

418 uint8 e_ident_data; 

419 uint8 e_ident_version; 

420 uint8 e_ident_osabi; 

421 uint8 e_ident_abi_version; 

422 uint8 e_ident_pad[7]; 

423 uint16 e_type; 

424 uint16 e_machine; 

425 uint32 e_version; 

426 uint32 e_entry; 

427 uint32 e_phoff; 

428 uint32 e_shoff; 

429 uint32 e_flags; 

430 uint16 e_ehsize; 

431 uint16 e_phentsize; 

432 uint16 e_phnum; 

433 uint16 e_shentsize; 

434 uint16 e_shnum; 

435 uint16 e_shstrndx; 

436 } elf_header_32_t; 

437 

438 typedef struct elf32_shdr { 

439 uint32 sh_name; 

440 uint32 sh_type; 

441 uint32 sh_flags; 

442 uint32 sh_addr; 

443 uint32 sh_offset; 

444 uint32 sh_size; 

445 uint32 sh_link; 

446 uint32 sh_info; 

447 uint32 sh_addralign; 

448 uint32 sh_entsize; 

449 } elf_shdr_t; 

450 

451 typedef struct elf32_phdr { 

452 uint32 p_type; 

453 uint32 p_offset; 

454 uint32 p_vaddr; 

455 uint32 p_paddr; 

456 uint32 p_filesz; 

457 uint32 p_memsz; 

458 uint32 p_flags; 

459 uint32 p_align; 

460 } elf_phdr_t; 

461 

462 typedef struct module_signature { 

463 uint8 algo; /* Public-key crypto algorithm [0] */ 

464 uint8 hash; /* Digest algorithm [0] */ 

465 uint8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */ 

466 uint8 signer_len; /* Length of signer's name [0] */ 

467 uint8 key_id_len; /* Length of key identifier [0] */ 

468 uint8 __pad[3]; 

469 uint32 sig_len; /* Length of signature data */ 

470 } module_signature_t; 

471 """ 

472 HEADER_STRUCT = "elf_header_32_t" 

473 

474 DOC = HandlerDoc( 

475 name="ELF (32-bit)", 

476 description="The 32-bit ELF (Executable and Linkable Format) is a binary file format used for executables, object code, shared libraries, and core dumps. It supports 32-bit addressing and includes headers for program and section information.", 

477 handler_type=HandlerType.EXECUTABLE, 

478 vendor=None, 

479 references=[ 

480 Reference( 

481 title="ELF File Format Specification", 

482 url="https://refspecs.linuxfoundation.org/elf/elf.pdf", 

483 ), 

484 Reference( 

485 title="ELF Wikipedia", 

486 url="https://en.wikipedia.org/wiki/Executable_and_Linkable_Format", 

487 ), 

488 ], 

489 limitations=[], 

490 ) 

491 

492 

493class ELF64Handler(_ELFBase): 

494 NAME = "elf64" 

495 

496 PATTERNS = [ 

497 HexString( 

498 """ 

499 // uint32 e_ident_magic; 

500 7F 45 4C 46 

501 // e_ident_class must be 0x1 (32bit) or 0x2 (64bit) 

502 02 

503 // e_ident_data must be 0x1 (little-endian) or 0x2 (big-endian) 

504 (01 | 02) 

505 // e_ident_version must be 0x1. 

506 01 

507 """ 

508 ) 

509 ] 

510 

511 C_DEFINITIONS = r""" 

512 typedef struct elf_header_64 { 

513 uint32 e_ident_magic; 

514 uint8 e_ident_class; 

515 uint8 e_ident_data; 

516 uint8 e_ident_version; 

517 uint8 e_ident_osabi; 

518 uint8 e_ident_abi_version; 

519 uint8 e_ident_pad[7]; 

520 uint16 e_type; 

521 uint16 e_machine; 

522 uint32 e_version; 

523 uint64 e_entry; 

524 uint64 e_phoff; 

525 uint64 e_shoff; 

526 uint32 e_flags; 

527 uint16 e_ehsize; 

528 uint16 e_phentsize; 

529 uint16 e_phnum; 

530 uint16 e_shentsize; 

531 uint16 e_shnum; 

532 uint16 e_shstrndx; 

533 } elf_header_64_t; 

534 

535 typedef struct elf64_shdr { 

536 uint32 sh_name; 

537 uint32 sh_type; 

538 uint64 sh_flags; 

539 uint64 sh_addr; 

540 uint64 sh_offset; 

541 uint64 sh_size; 

542 uint32 sh_link; 

543 uint32 sh_info; 

544 uint64 sh_addralign; 

545 uint64 sh_entsize; 

546 } elf_shdr_t; 

547 

548 typedef struct elf64_phdr { 

549 uint32 p_type; 

550 uint32 p_flags; 

551 uint64 p_offset; 

552 uint64 p_vaddr; 

553 uint64 p_paddr; 

554 uint64 p_filesz; 

555 uint64 p_memsz; 

556 uint64 p_align; 

557 } elf_phdr_t; 

558 

559 typedef struct module_signature { 

560 uint8 algo; /* Public-key crypto algorithm [0] */ 

561 uint8 hash; /* Digest algorithm [0] */ 

562 uint8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */ 

563 uint8 signer_len; /* Length of signer's name [0] */ 

564 uint8 key_id_len; /* Length of key identifier [0] */ 

565 uint8 __pad[3]; 

566 uint32 sig_len; /* Length of signature data */ 

567 } module_signature_t; 

568 """ 

569 HEADER_STRUCT = "elf_header_64_t" 

570 

571 DOC = HandlerDoc( 

572 name="ELF (64-bit)", 

573 description="The 64-bit ELF (Executable and Linkable Format) is a binary file format used for executables, object code, shared libraries, and core dumps. It supports 64-bit addressing and includes headers for program and section information.", 

574 handler_type=HandlerType.EXECUTABLE, 

575 vendor=None, 

576 references=[ 

577 Reference( 

578 title="ELF File Format Specification", 

579 url="https://refspecs.linuxfoundation.org/elf/elf.pdf", 

580 ), 

581 Reference( 

582 title="ELF Wikipedia", 

583 url="https://en.wikipedia.org/wiki/Executable_and_Linkable_Format", 

584 ), 

585 ], 

586 limitations=[], 

587 )