1import io
2import shutil
3import zlib
4from pathlib import Path
5
6import attrs
7import lief
8from structlog import get_logger
9
10from unblob.extractor import carve_chunk_to_file
11from unblob.extractors import Command
12from unblob.file_utils import (
13 Endian,
14 File,
15 InvalidInputFormat,
16 StructParser,
17 convert_int8,
18 convert_int32,
19 convert_int64,
20 iterate_file,
21 iterate_patterns,
22 read_until_past,
23 round_up,
24)
25from unblob.models import (
26 HandlerDoc,
27 HandlerType,
28 HexString,
29 Reference,
30 StructHandler,
31 ValidChunk,
32)
33
34lief.logging.disable()
35
36logger = get_logger()
37
38KERNEL_MODULE_SIGNATURE_INFO_LEN = 12
39KERNEL_MODULE_SIGNATURE_FOOTER = b"~Module signature appended~\n"
40
41KERNEL_INIT_DATA_SECTION = ".init.data"
42
43QNX_IFS_MARKER = b"\xeb\x7e\xff\x00"
44QNX_IFS_MARKER_OFFSET = 0x3000
45
46# [Ref] https://github.com/upx/upx/blob/devel/src/stub/src/include/linux.h
47UPX_C_DEFINITIONS = r"""
48typedef struct packhead{
49 char magic[4];
50 uint8_t version;
51 uint8_t format;
52 uint8_t level;
53 uint8_t method;
54 uint64_t unknown1;
55 uint32_t u_filesize1;
56 uint32_t c_filesize;
57 uint32_t u_filesize2;
58 uint32_t unknown2;
59 uint32_t l_info_offset;
60} packhead_t;
61
62typedef struct l_info{
63 uint32_t l_checksum;
64 char l_magic[4];
65 uint16_t l_lsize;
66 uint8_t l_version;
67 uint8_t l_format;
68} l_info_t;
69"""
70upx_parser = StructParser(UPX_C_DEFINITIONS)
71
72
73def parse_upx_packhead(file: File):
74 return upx_parser.parse("packhead_t", file, Endian.LITTLE)
75
76
77def parse_upx_l_info(file: File):
78 return upx_parser.parse("l_info_t", file, Endian.LITTLE)
79
80
81@attrs.define(repr=False)
82class ElfChunk(ValidChunk):
83 @staticmethod
84 def upx_checksum_validates(file: File, l_info, elf) -> bool:
85 size_pack2 = elf.last_offset_segment - l_info.l_lsize
86 size_aligment = round_up(size_pack2, 4) # Forces to be mod 4
87 xct_off = any(section.name == "init" for section in elf.sections)
88 size_aligment += (4 & size_aligment) ^ (int(bool(xct_off)) << 2) # 4 or 0
89 size_aligment += 8 # Added 2 times 4 byte (size of disp)
90 if xct_off:
91 size_aligment += 12
92 alignment = size_aligment - size_pack2
93 checksum_offset = elf.last_offset_segment - (l_info.l_lsize - alignment)
94 file.seek(checksum_offset, io.SEEK_SET)
95 adler32_checksum = 1
96 for chunk in iterate_file(file, checksum_offset, l_info.l_lsize - alignment):
97 adler32_checksum = zlib.adler32(chunk, adler32_checksum)
98 return adler32_checksum == l_info.l_checksum
99
100 def is_valid_upx(self, inpath: Path, elf) -> bool:
101 file = File.from_path(inpath)
102 file.seek(-4, io.SEEK_END) # last 4 bytes indicates where linfo ends
103 l_info_start_offset = abs(
104 convert_int32(file.read(4), Endian.LITTLE)
105 - upx_parser.cparser_le.l_info_t.size
106 )
107 if l_info_start_offset > file.size():
108 return False
109 file.seek(l_info_start_offset, io.SEEK_SET)
110 upx_header = parse_upx_l_info(file)
111 if upx_header.l_magic != b"UPX!": # Magic
112 return False
113 if not self.upx_checksum_validates(file, upx_header, elf):
114 raise InvalidInputFormat("Invalid UPX checksum")
115 return True
116
117 def extract(self, inpath: Path, outdir: Path):
118 # ELF file extraction is special in that in the general case no new files are extracted, thus
119 # when we want to clean up all carves to save place, carved ELF files would be deleted as well,
120 # however we want to keep carved out ELF files, as they are the interesting stuff!
121 elf = lief.ELF.parse(str(inpath))
122
123 if elf is None:
124 logger.error(
125 "Trying to extract an invalid ELF file.", inpath=inpath, outdir=outdir
126 )
127 return
128
129 is_kernel = (
130 elf.header.file_type == lief.ELF.Header.FILE_TYPE.EXEC
131 and elf.has_section(KERNEL_INIT_DATA_SECTION)
132 )
133 if is_kernel:
134 with File.from_path(inpath) as file:
135 extract_initramfs(elf, file, outdir)
136
137 elif self.is_valid_upx(inpath=inpath, elf=elf):
138 extract_upx(inpath, outdir)
139
140 elif not self.is_whole_file:
141 # make a copy, and let the carved chunk be deleted
142 outdir.mkdir(parents=True, exist_ok=False)
143 shutil.copy2(inpath, outdir / "carved.elf")
144 # more work will be done, when outdir is picked up by processing,
145 # and the ELF file is processed as a whole file.
146 # As a performance side effect, ELF files will be searched for chunks twice.
147 # Even though the second chunk search one is short-circuited,
148 # because the ELF handler will recognize it as a whole file
149 # other handlers might burn some cycles on the file as well.
150
151
152def extract_upx(inpath: Path, outdir: Path):
153 extractor = Command("upx", "-d", "{inpath}", "-o{outdir}/{inpath.stem}.elf")
154 outdir.mkdir(parents=True, exist_ok=False)
155 extractor.extract(inpath, outdir)
156
157
158def extract_initramfs(elf, file: File, outdir):
159 """Extract the initramfs part, with a potentially 4 extra bytes.
160
161 Due to alignment definition of initramfs the start-end offsets can not be exactly calculated,
162 so the output could have a 4 extra bytes before or after the initramfs.
163 """
164 if not elf.has_section(KERNEL_INIT_DATA_SECTION):
165 return
166
167 init_data = elf.get_section(KERNEL_INIT_DATA_SECTION)
168
169 if not init_data.size:
170 return
171
172 is_64bit = elf.header.identity_class == lief.ELF.Header.CLASS.ELF64
173 endian = (
174 Endian.LITTLE
175 if elf.header.identity_data == lief.ELF.Header.ELF_DATA.LSB
176 else Endian.BIG
177 )
178
179 init_data_end_offset = init_data.file_offset + init_data.size
180
181 # initramfs size is at the end of the section either 64bit or 32bit depending on the platform
182 # see usr/initramfs_data.S in the kernel
183 # The size is padded to 8 bytes, see include/asm-generic/vmlinux.lds.h
184 # The actual initramfs is right before the size
185 if is_64bit:
186 initramfs_size_offset = init_data.file_offset + init_data.size - 8
187 initramfs_size = convert_int64(
188 file[initramfs_size_offset:init_data_end_offset],
189 endian=endian,
190 )
191 else:
192 initramfs_size_offset = init_data.file_offset + init_data.size - 4
193 initramfs_size = convert_int32(
194 file[initramfs_size_offset:init_data_end_offset],
195 endian=endian,
196 )
197
198 # initramfs start is aligned to 4 bytes, initramfs_size_offset is aligned to 8 bytes
199 # this is unfortunate, as we do not know the start, only the padded end
200 # unfortunately we have two valid values for the padding of the initramfs end:
201 # 0 and 4, 1 and 5, 2 and 6, 3 and 7
202 # let's calculate the offsets for the smaller padding values
203 initramfs_start = initramfs_size_offset - round_up(initramfs_size, 4)
204 initramfs_end = initramfs_start + initramfs_size
205 padding = initramfs_size_offset - initramfs_end
206
207 # initramfs can be turned off (https://www.linux.com/training-tutorials/kernel-newbie-corner-initrd-and-initramfs-whats/)
208 # in which case the above calculations most probably end up with bogus chunk offsets
209 if not (
210 init_data.file_offset <= initramfs_start < initramfs_end <= init_data_end_offset
211 and (bytes(padding) == file[initramfs_end:initramfs_size_offset])
212 ):
213 return
214
215 # when bigger padding is also a possibility, include 4 more bytes from the beginning
216 if (init_data.file_offset <= initramfs_start - 4) and (
217 bytes(padding + 4) == file[initramfs_end - 4 : initramfs_size_offset]
218 ):
219 initramfs_start -= 4
220
221 carve_chunk_to_file(
222 outdir / "initramfs",
223 file,
224 ValidChunk(start_offset=initramfs_start, end_offset=initramfs_end),
225 )
226
227
228class _ELFBase(StructHandler):
229 EXTRACTOR = None
230 SECTION_HEADER_STRUCT = "elf_shdr_t"
231 PROGRAM_HEADER_STRUCT = "elf_phdr_t"
232
233 def is_valid_header(self, header) -> bool:
234 # check that header fields have valid values
235 try:
236 lief.ELF.Header.FILE_TYPE(header.e_type)
237 lief.ELF.ARCH(header.e_machine)
238 lief.ELF.Header.VERSION(header.e_version)
239 except ValueError:
240 return False
241 return True
242
243 @staticmethod
244 def get_endianness(file: File, start_offset: int) -> Endian:
245 file.seek(start_offset + 5, io.SEEK_SET)
246 e_ident_data = convert_int8(file.read(1), Endian.LITTLE)
247 return Endian.LITTLE if e_ident_data == 0x1 else Endian.BIG
248
249 def get_last_section_end(
250 self, file: File, sections_start_offset: int, sections_num: int, endian
251 ) -> int:
252 last_section_end = 0
253 file.seek(sections_start_offset)
254
255 for _ in range(sections_num):
256 section_header = self._struct_parser.parse(
257 self.SECTION_HEADER_STRUCT, file, endian
258 )
259
260 try:
261 if (
262 lief.ELF.Section.TYPE(section_header.sh_type)
263 == lief.ELF.Section.TYPE.NOBITS
264 ):
265 continue
266 except ValueError:
267 continue
268
269 section_end = section_header.sh_offset + section_header.sh_size
270 last_section_end = max(section_end, last_section_end)
271
272 return last_section_end
273
274 def get_last_program_end(
275 self, file: File, programs_start_offset: int, programs_num: int, endian
276 ) -> int:
277 last_program_end = 0
278 file.seek(programs_start_offset)
279
280 for _ in range(programs_num):
281 program_header = self._struct_parser.parse(
282 self.PROGRAM_HEADER_STRUCT, file, endian
283 )
284
285 program_end = program_header.p_offset + program_header.p_filesz
286 last_program_end = max(program_end, last_program_end)
287
288 return last_program_end
289
290 def get_end_offset(self, file: File, start_offset: int, header, endian) -> int:
291 # Usually the section header is the last, but in some cases the program headers are
292 # put to the end of the file, and in some cases sections header and actual sections
293 # can be also intermixed, so we need also to check the end of the last section and
294 # also the last program segment.
295 # We check which one is the last and use it as a file size.
296 section_headers_end = header.e_shoff + (header.e_shnum * header.e_shentsize)
297 program_headers_end = header.e_phoff + (header.e_phnum * header.e_phentsize)
298
299 last_section_end = self.get_last_section_end(
300 file, start_offset + header.e_shoff, header.e_shnum, endian
301 )
302
303 last_program_end = self.get_last_program_end(
304 file, start_offset + header.e_phoff, header.e_phnum, endian
305 )
306
307 return start_offset + max(
308 section_headers_end, program_headers_end, last_section_end, last_program_end
309 )
310
311 def get_signed_kernel_module_end_offset(self, file: File, end_offset: int) -> int:
312 # signed kernel modules are ELF files followed by:
313 # - a PKCS7 signature
314 # - a module_signature structure
315 # - a custom footer value '~~Module signature appended~\n~'
316 # we check if a valid kernel module signature is present after the ELF file
317 # and returns an end_offset that includes that whole signature part.
318
319 file.seek(end_offset, io.SEEK_SET)
320 for footer_offset in iterate_patterns(file, KERNEL_MODULE_SIGNATURE_FOOTER):
321 file.seek(
322 footer_offset - KERNEL_MODULE_SIGNATURE_INFO_LEN,
323 io.SEEK_SET,
324 )
325 module_signature = self._struct_parser.parse(
326 "module_signature_t", file, Endian.BIG
327 )
328 logger.debug(
329 "module_signature_t",
330 module_signature=module_signature,
331 _verbosity=3,
332 )
333 if (
334 footer_offset
335 == end_offset
336 + module_signature.sig_len
337 + KERNEL_MODULE_SIGNATURE_INFO_LEN
338 ):
339 end_offset = footer_offset + len(KERNEL_MODULE_SIGNATURE_FOOTER)
340
341 # We stop at the first SIGNATURE FOOTER match
342 break
343
344 return end_offset
345
346 def is_upx(self, file: File, start_offset: int, end_offset: int) -> bool:
347 """Check if UPX magic is present after ELF header."""
348 return b"UPX!" in file[start_offset : min(end_offset, start_offset + 4096)]
349
350 def get_upx_end_offset(self, file: File, start_offset: int, end_offset: int) -> int:
351 """Locate UPX footer in ELF file and returns UPX end offset or original end offset."""
352 upx_footer = b"\xff\x00\x00\x00\x00UPX!\x00\x00\x00\x00"
353 for packhead_offset in iterate_patterns(file=file, pattern=upx_footer):
354 file.seek(
355 packhead_offset + len(upx_footer), io.SEEK_SET
356 ) # seek to end of footer
357 file.seek(
358 read_until_past(file=file, pattern=b"\x00")
359 ) # sometimes more NULL bytes are added
360 packheader = parse_upx_packhead(file)
361 file_size_compressed = packheader.c_filesize + packheader.size
362 packhead_is_valid = (
363 (
364 packheader.magic == b"UPX!"
365 and packheader.u_filesize1 == packheader.u_filesize2
366 )
367 and (file_size_compressed == file.tell() - start_offset)
368 and (file_size_compressed % 4 == 0)
369 )
370 if packhead_is_valid:
371 return start_offset + file_size_compressed
372 # no matching UPX footer found
373 return end_offset
374
375 def calculate_chunk(self, file: File, start_offset: int) -> ElfChunk | None:
376 endian = self.get_endianness(file, start_offset)
377 file.seek(start_offset, io.SEEK_SET)
378 header = self.parse_header(file, endian)
379 if not self.is_valid_header(header):
380 return None
381
382 qnx_marker_pos = start_offset + QNX_IFS_MARKER_OFFSET
383 if file[qnx_marker_pos : qnx_marker_pos + 4] == QNX_IFS_MARKER:
384 logger.info("QNX IFS embedded in ELF identified, bailing out.")
385 return None
386 end_offset = self.get_end_offset(file, start_offset, header, endian)
387
388 # kernel modules are always relocatable
389 if header.e_type == lief.ELF.Header.FILE_TYPE.REL.value:
390 end_offset = self.get_signed_kernel_module_end_offset(file, end_offset)
391
392 if self.is_upx(file=file, start_offset=start_offset, end_offset=end_offset):
393 end_offset = self.get_upx_end_offset(file, start_offset, end_offset)
394
395 # do a special extraction of ELF files with ElfChunk
396 return ElfChunk(
397 start_offset=start_offset,
398 end_offset=end_offset,
399 )
400
401
402class ELF32Handler(_ELFBase):
403 NAME = "elf32"
404
405 PATTERNS = [
406 HexString(
407 """
408 // uint32 e_ident_magic;
409 7F 45 4C 46
410 // e_ident_class must be 0x1 (32bit) or 0x2 (64bit)
411 01
412 // e_ident_data must be 0x1 (little-endian) or 0x2 (big-endian)
413 (01 | 02)
414 // e_ident_version must be 0x1.
415 01
416 """
417 )
418 ]
419
420 C_DEFINITIONS = r"""
421 typedef struct elf_header_32 {
422 uint32 e_ident_magic;
423 uint8 e_ident_class;
424 uint8 e_ident_data;
425 uint8 e_ident_version;
426 uint8 e_ident_osabi;
427 uint8 e_ident_abi_version;
428 uint8 e_ident_pad[7];
429 uint16 e_type;
430 uint16 e_machine;
431 uint32 e_version;
432 uint32 e_entry;
433 uint32 e_phoff;
434 uint32 e_shoff;
435 uint32 e_flags;
436 uint16 e_ehsize;
437 uint16 e_phentsize;
438 uint16 e_phnum;
439 uint16 e_shentsize;
440 uint16 e_shnum;
441 uint16 e_shstrndx;
442 } elf_header_32_t;
443
444 typedef struct elf32_shdr {
445 uint32 sh_name;
446 uint32 sh_type;
447 uint32 sh_flags;
448 uint32 sh_addr;
449 uint32 sh_offset;
450 uint32 sh_size;
451 uint32 sh_link;
452 uint32 sh_info;
453 uint32 sh_addralign;
454 uint32 sh_entsize;
455 } elf_shdr_t;
456
457 typedef struct elf32_phdr {
458 uint32 p_type;
459 uint32 p_offset;
460 uint32 p_vaddr;
461 uint32 p_paddr;
462 uint32 p_filesz;
463 uint32 p_memsz;
464 uint32 p_flags;
465 uint32 p_align;
466 } elf_phdr_t;
467
468 typedef struct module_signature {
469 uint8 algo; /* Public-key crypto algorithm [0] */
470 uint8 hash; /* Digest algorithm [0] */
471 uint8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */
472 uint8 signer_len; /* Length of signer's name [0] */
473 uint8 key_id_len; /* Length of key identifier [0] */
474 uint8 __pad[3];
475 uint32 sig_len; /* Length of signature data */
476 } module_signature_t;
477 """
478 HEADER_STRUCT = "elf_header_32_t"
479
480 DOC = HandlerDoc(
481 name="ELF (32-bit)",
482 description="The 32-bit ELF (Executable and Linkable Format) is a binary file format used for executables, object code, shared libraries, and core dumps. It supports 32-bit addressing and includes headers for program and section information.",
483 handler_type=HandlerType.EXECUTABLE,
484 vendor=None,
485 references=[
486 Reference(
487 title="ELF File Format Specification",
488 url="https://refspecs.linuxfoundation.org/elf/elf.pdf",
489 ),
490 Reference(
491 title="ELF Wikipedia",
492 url="https://en.wikipedia.org/wiki/Executable_and_Linkable_Format",
493 ),
494 ],
495 limitations=[],
496 )
497
498
499class ELF64Handler(_ELFBase):
500 NAME = "elf64"
501
502 PATTERNS = [
503 HexString(
504 """
505 // uint32 e_ident_magic;
506 7F 45 4C 46
507 // e_ident_class must be 0x1 (32bit) or 0x2 (64bit)
508 02
509 // e_ident_data must be 0x1 (little-endian) or 0x2 (big-endian)
510 (01 | 02)
511 // e_ident_version must be 0x1.
512 01
513 """
514 )
515 ]
516
517 C_DEFINITIONS = r"""
518 typedef struct elf_header_64 {
519 uint32 e_ident_magic;
520 uint8 e_ident_class;
521 uint8 e_ident_data;
522 uint8 e_ident_version;
523 uint8 e_ident_osabi;
524 uint8 e_ident_abi_version;
525 uint8 e_ident_pad[7];
526 uint16 e_type;
527 uint16 e_machine;
528 uint32 e_version;
529 uint64 e_entry;
530 uint64 e_phoff;
531 uint64 e_shoff;
532 uint32 e_flags;
533 uint16 e_ehsize;
534 uint16 e_phentsize;
535 uint16 e_phnum;
536 uint16 e_shentsize;
537 uint16 e_shnum;
538 uint16 e_shstrndx;
539 } elf_header_64_t;
540
541 typedef struct elf64_shdr {
542 uint32 sh_name;
543 uint32 sh_type;
544 uint64 sh_flags;
545 uint64 sh_addr;
546 uint64 sh_offset;
547 uint64 sh_size;
548 uint32 sh_link;
549 uint32 sh_info;
550 uint64 sh_addralign;
551 uint64 sh_entsize;
552 } elf_shdr_t;
553
554 typedef struct elf64_phdr {
555 uint32 p_type;
556 uint32 p_flags;
557 uint64 p_offset;
558 uint64 p_vaddr;
559 uint64 p_paddr;
560 uint64 p_filesz;
561 uint64 p_memsz;
562 uint64 p_align;
563 } elf_phdr_t;
564
565 typedef struct module_signature {
566 uint8 algo; /* Public-key crypto algorithm [0] */
567 uint8 hash; /* Digest algorithm [0] */
568 uint8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */
569 uint8 signer_len; /* Length of signer's name [0] */
570 uint8 key_id_len; /* Length of key identifier [0] */
571 uint8 __pad[3];
572 uint32 sig_len; /* Length of signature data */
573 } module_signature_t;
574 """
575 HEADER_STRUCT = "elf_header_64_t"
576
577 DOC = HandlerDoc(
578 name="ELF (64-bit)",
579 description="The 64-bit ELF (Executable and Linkable Format) is a binary file format used for executables, object code, shared libraries, and core dumps. It supports 64-bit addressing and includes headers for program and section information.",
580 handler_type=HandlerType.EXECUTABLE,
581 vendor=None,
582 references=[
583 Reference(
584 title="ELF File Format Specification",
585 url="https://refspecs.linuxfoundation.org/elf/elf.pdf",
586 ),
587 Reference(
588 title="ELF Wikipedia",
589 url="https://en.wikipedia.org/wiki/Executable_and_Linkable_Format",
590 ),
591 ],
592 limitations=[],
593 )