1import io
2import shutil
3import zlib
4from pathlib import Path
5
6import attrs
7import lief
8from structlog import get_logger
9
10from unblob.extractor import carve_chunk_to_file
11from unblob.extractors import Command
12from unblob.file_utils import (
13 Endian,
14 File,
15 InvalidInputFormat,
16 StructParser,
17 convert_int8,
18 convert_int32,
19 convert_int64,
20 iterate_file,
21 iterate_patterns,
22 read_until_past,
23 round_up,
24)
25from unblob.models import (
26 HandlerDoc,
27 HandlerType,
28 HexString,
29 Reference,
30 StructHandler,
31 ValidChunk,
32)
33
34lief.logging.disable()
35
36logger = get_logger()
37
38KERNEL_MODULE_SIGNATURE_INFO_LEN = 12
39KERNEL_MODULE_SIGNATURE_FOOTER = b"~Module signature appended~\n"
40
41KERNEL_INIT_DATA_SECTION = ".init.data"
42
43
44# [Ref] https://github.com/upx/upx/blob/devel/src/stub/src/include/linux.h
45UPX_C_DEFINITIONS = r"""
46typedef struct packhead{
47 char magic[4];
48 uint8_t version;
49 uint8_t format;
50 uint8_t level;
51 uint8_t method;
52 uint64_t unknown1;
53 uint32_t u_filesize1;
54 uint32_t c_filesize;
55 uint32_t u_filesize2;
56 uint32_t unknown2;
57 uint32_t l_info_offset;
58} packhead_t;
59
60typedef struct l_info{
61 uint32_t l_checksum;
62 char l_magic[4];
63 uint16_t l_lsize;
64 uint8_t l_version;
65 uint8_t l_format;
66} l_info_t;
67"""
68upx_parser = StructParser(UPX_C_DEFINITIONS)
69
70
71def parse_upx_packhead(file: File):
72 return upx_parser.parse("packhead_t", file, Endian.LITTLE)
73
74
75def parse_upx_l_info(file: File):
76 return upx_parser.parse("l_info_t", file, Endian.LITTLE)
77
78
79@attrs.define(repr=False)
80class ElfChunk(ValidChunk):
81 @staticmethod
82 def upx_checksum_validates(file: File, l_info, elf) -> bool:
83 size_pack2 = elf.last_offset_segment - l_info.l_lsize
84 size_aligment = round_up(size_pack2, 4) # Forces to be mod 4
85 xct_off = any(section.name == "init" for section in elf.sections)
86 size_aligment += (4 & size_aligment) ^ (int(bool(xct_off)) << 2) # 4 or 0
87 size_aligment += 8 # Added 2 times 4 byte (size of disp)
88 if xct_off:
89 size_aligment += 12
90 alignment = size_aligment - size_pack2
91 checksum_offset = elf.last_offset_segment - (l_info.l_lsize - alignment)
92 file.seek(checksum_offset, io.SEEK_SET)
93 adler32_checksum = 1
94 for chunk in iterate_file(file, checksum_offset, l_info.l_lsize - alignment):
95 adler32_checksum = zlib.adler32(chunk, adler32_checksum)
96 return adler32_checksum == l_info.l_checksum
97
98 def is_valid_upx(self, inpath: Path, elf) -> bool:
99 file = File.from_path(inpath)
100 file.seek(-4, io.SEEK_END) # last 4 bytes indicates where linfo ends
101 l_info_start_offset = abs(
102 convert_int32(file.read(4), Endian.LITTLE)
103 - upx_parser.cparser_le.l_info_t.size
104 )
105 if l_info_start_offset > file.size():
106 return False
107 file.seek(l_info_start_offset, io.SEEK_SET)
108 upx_header = parse_upx_l_info(file)
109 if upx_header.l_magic != b"UPX!": # Magic
110 return False
111 if not self.upx_checksum_validates(file, upx_header, elf):
112 raise InvalidInputFormat("Invalid UPX checksum")
113 return True
114
115 def extract(self, inpath: Path, outdir: Path):
116 # ELF file extraction is special in that in the general case no new files are extracted, thus
117 # when we want to clean up all carves to save place, carved ELF files would be deleted as well,
118 # however we want to keep carved out ELF files, as they are the interesting stuff!
119 elf = lief.ELF.parse(str(inpath))
120
121 if elf is None:
122 logger.error(
123 "Trying to extract an invalid ELF file.", inpath=inpath, outdir=outdir
124 )
125 return
126
127 is_kernel = (
128 elf.header.file_type == lief.ELF.Header.FILE_TYPE.EXEC
129 and elf.has_section(KERNEL_INIT_DATA_SECTION)
130 )
131 if is_kernel:
132 with File.from_path(inpath) as file:
133 extract_initramfs(elf, file, outdir)
134
135 elif self.is_valid_upx(inpath=inpath, elf=elf):
136 extract_upx(inpath, outdir)
137
138 elif not self.is_whole_file:
139 # make a copy, and let the carved chunk be deleted
140 outdir.mkdir(parents=True, exist_ok=False)
141 shutil.copy2(inpath, outdir / "carved.elf")
142 # more work will be done, when outdir is picked up by processing,
143 # and the ELF file is processed as a whole file.
144 # As a performance side effect, ELF files will be searched for chunks twice.
145 # Even though the second chunk search one is short-circuited,
146 # because the ELF handler will recognize it as a whole file
147 # other handlers might burn some cycles on the file as well.
148
149
150def extract_upx(inpath: Path, outdir: Path):
151 extractor = Command("upx", "-d", "{inpath}", "-o{outdir}/{inpath.stem}.elf")
152 outdir.mkdir(parents=True, exist_ok=False)
153 extractor.extract(inpath, outdir)
154
155
156def extract_initramfs(elf, file: File, outdir):
157 """Extract the initramfs part, with a potentially 4 extra bytes.
158
159 Due to alignment definition of initramfs the start-end offsets can not be exactly calculated,
160 so the output could have a 4 extra bytes before or after the initramfs.
161 """
162 if not elf.has_section(KERNEL_INIT_DATA_SECTION):
163 return
164
165 init_data = elf.get_section(KERNEL_INIT_DATA_SECTION)
166
167 if not init_data.size:
168 return
169
170 is_64bit = elf.header.identity_class == lief.ELF.Header.CLASS.ELF64
171 endian = (
172 Endian.LITTLE
173 if elf.header.identity_data == lief.ELF.Header.ELF_DATA.LSB
174 else Endian.BIG
175 )
176
177 init_data_end_offset = init_data.file_offset + init_data.size
178
179 # initramfs size is at the end of the section either 64bit or 32bit depending on the platform
180 # see usr/initramfs_data.S in the kernel
181 # The size is padded to 8 bytes, see include/asm-generic/vmlinux.lds.h
182 # The actual initramfs is right before the size
183 if is_64bit:
184 initramfs_size_offset = init_data.file_offset + init_data.size - 8
185 initramfs_size = convert_int64(
186 file[initramfs_size_offset:init_data_end_offset],
187 endian=endian,
188 )
189 else:
190 initramfs_size_offset = init_data.file_offset + init_data.size - 4
191 initramfs_size = convert_int32(
192 file[initramfs_size_offset:init_data_end_offset],
193 endian=endian,
194 )
195
196 # initramfs start is aligned to 4 bytes, initramfs_size_offset is aligned to 8 bytes
197 # this is unfortunate, as we do not know the start, only the padded end
198 # unfortunately we have two valid values for the padding of the initramfs end:
199 # 0 and 4, 1 and 5, 2 and 6, 3 and 7
200 # let's calculate the offsets for the smaller padding values
201 initramfs_start = initramfs_size_offset - round_up(initramfs_size, 4)
202 initramfs_end = initramfs_start + initramfs_size
203 padding = initramfs_size_offset - initramfs_end
204
205 # initramfs can be turned off (https://www.linux.com/training-tutorials/kernel-newbie-corner-initrd-and-initramfs-whats/)
206 # in which case the above calculations most probably end up with bogus chunk offsets
207 if not (
208 init_data.file_offset <= initramfs_start < initramfs_end <= init_data_end_offset
209 and (bytes(padding) == file[initramfs_end:initramfs_size_offset])
210 ):
211 return
212
213 # when bigger padding is also a possibility, include 4 more bytes from the beginning
214 if (init_data.file_offset <= initramfs_start - 4) and (
215 bytes(padding + 4) == file[initramfs_end - 4 : initramfs_size_offset]
216 ):
217 initramfs_start -= 4
218
219 carve_chunk_to_file(
220 outdir / "initramfs",
221 file,
222 ValidChunk(start_offset=initramfs_start, end_offset=initramfs_end),
223 )
224
225
226class _ELFBase(StructHandler):
227 EXTRACTOR = None
228 SECTION_HEADER_STRUCT = "elf_shdr_t"
229 PROGRAM_HEADER_STRUCT = "elf_phdr_t"
230
231 def is_valid_header(self, header) -> bool:
232 # check that header fields have valid values
233 try:
234 lief.ELF.Header.FILE_TYPE(header.e_type)
235 lief.ELF.ARCH(header.e_machine)
236 lief.ELF.Header.VERSION(header.e_version)
237 except ValueError:
238 return False
239 return True
240
241 @staticmethod
242 def get_endianness(file: File, start_offset: int) -> Endian:
243 file.seek(start_offset + 5, io.SEEK_SET)
244 e_ident_data = convert_int8(file.read(1), Endian.LITTLE)
245 return Endian.LITTLE if e_ident_data == 0x1 else Endian.BIG
246
247 def get_last_section_end(
248 self, file: File, sections_start_offset: int, sections_num: int, endian
249 ) -> int:
250 last_section_end = 0
251 file.seek(sections_start_offset)
252
253 for _ in range(sections_num):
254 section_header = self._struct_parser.parse(
255 self.SECTION_HEADER_STRUCT, file, endian
256 )
257
258 try:
259 if (
260 lief.ELF.Section.TYPE(section_header.sh_type)
261 == lief.ELF.Section.TYPE.NOBITS
262 ):
263 continue
264 except ValueError:
265 continue
266
267 section_end = section_header.sh_offset + section_header.sh_size
268 last_section_end = max(section_end, last_section_end)
269
270 return last_section_end
271
272 def get_last_program_end(
273 self, file: File, programs_start_offset: int, programs_num: int, endian
274 ) -> int:
275 last_program_end = 0
276 file.seek(programs_start_offset)
277
278 for _ in range(programs_num):
279 program_header = self._struct_parser.parse(
280 self.PROGRAM_HEADER_STRUCT, file, endian
281 )
282
283 program_end = program_header.p_offset + program_header.p_filesz
284 last_program_end = max(program_end, last_program_end)
285
286 return last_program_end
287
288 def get_end_offset(self, file: File, start_offset: int, header, endian) -> int:
289 # Usually the section header is the last, but in some cases the program headers are
290 # put to the end of the file, and in some cases sections header and actual sections
291 # can be also intermixed, so we need also to check the end of the last section and
292 # also the last program segment.
293 # We check which one is the last and use it as a file size.
294 section_headers_end = header.e_shoff + (header.e_shnum * header.e_shentsize)
295 program_headers_end = header.e_phoff + (header.e_phnum * header.e_phentsize)
296
297 last_section_end = self.get_last_section_end(
298 file, start_offset + header.e_shoff, header.e_shnum, endian
299 )
300
301 last_program_end = self.get_last_program_end(
302 file, start_offset + header.e_phoff, header.e_phnum, endian
303 )
304
305 return start_offset + max(
306 section_headers_end, program_headers_end, last_section_end, last_program_end
307 )
308
309 def get_signed_kernel_module_end_offset(self, file: File, end_offset: int) -> int:
310 # signed kernel modules are ELF files followed by:
311 # - a PKCS7 signature
312 # - a module_signature structure
313 # - a custom footer value '~~Module signature appended~\n~'
314 # we check if a valid kernel module signature is present after the ELF file
315 # and returns an end_offset that includes that whole signature part.
316
317 file.seek(end_offset, io.SEEK_SET)
318 for footer_offset in iterate_patterns(file, KERNEL_MODULE_SIGNATURE_FOOTER):
319 file.seek(
320 footer_offset - KERNEL_MODULE_SIGNATURE_INFO_LEN,
321 io.SEEK_SET,
322 )
323 module_signature = self._struct_parser.parse(
324 "module_signature_t", file, Endian.BIG
325 )
326 logger.debug(
327 "module_signature_t",
328 module_signature=module_signature,
329 _verbosity=3,
330 )
331 if (
332 footer_offset
333 == end_offset
334 + module_signature.sig_len
335 + KERNEL_MODULE_SIGNATURE_INFO_LEN
336 ):
337 end_offset = footer_offset + len(KERNEL_MODULE_SIGNATURE_FOOTER)
338
339 # We stop at the first SIGNATURE FOOTER match
340 break
341
342 return end_offset
343
344 def is_upx(self, file: File, start_offset: int, end_offset: int) -> bool:
345 """Check if UPX magic is present after ELF header."""
346 return b"UPX!" in file[start_offset : min(end_offset, start_offset + 4096)]
347
348 def get_upx_end_offset(self, file: File, start_offset: int, end_offset: int) -> int:
349 """Locate UPX footer in ELF file and returns UPX end offset or original end offset."""
350 upx_footer = b"\xff\x00\x00\x00\x00UPX!\x00\x00\x00\x00"
351 for packhead_offset in iterate_patterns(file=file, pattern=upx_footer):
352 file.seek(
353 packhead_offset + len(upx_footer), io.SEEK_SET
354 ) # seek to end of footer
355 file.seek(
356 read_until_past(file=file, pattern=b"\x00")
357 ) # sometimes more NULL bytes are added
358 packheader = parse_upx_packhead(file)
359 file_size_compressed = packheader.c_filesize + packheader.size
360 packhead_is_valid = (
361 (
362 packheader.magic == b"UPX!"
363 and packheader.u_filesize1 == packheader.u_filesize2
364 )
365 and (file_size_compressed == file.tell() - start_offset)
366 and (file_size_compressed % 4 == 0)
367 )
368 if packhead_is_valid:
369 return start_offset + file_size_compressed
370 # no matching UPX footer found
371 return end_offset
372
373 def calculate_chunk(self, file: File, start_offset: int) -> ElfChunk | None:
374 endian = self.get_endianness(file, start_offset)
375 file.seek(start_offset, io.SEEK_SET)
376 header = self.parse_header(file, endian)
377 if not self.is_valid_header(header):
378 return None
379 end_offset = self.get_end_offset(file, start_offset, header, endian)
380
381 # kernel modules are always relocatable
382 if header.e_type == lief.ELF.Header.FILE_TYPE.REL.value:
383 end_offset = self.get_signed_kernel_module_end_offset(file, end_offset)
384
385 if self.is_upx(file=file, start_offset=start_offset, end_offset=end_offset):
386 end_offset = self.get_upx_end_offset(file, start_offset, end_offset)
387
388 # do a special extraction of ELF files with ElfChunk
389 return ElfChunk(
390 start_offset=start_offset,
391 end_offset=end_offset,
392 )
393
394
395class ELF32Handler(_ELFBase):
396 NAME = "elf32"
397
398 PATTERNS = [
399 HexString(
400 """
401 // uint32 e_ident_magic;
402 7F 45 4C 46
403 // e_ident_class must be 0x1 (32bit) or 0x2 (64bit)
404 01
405 // e_ident_data must be 0x1 (little-endian) or 0x2 (big-endian)
406 (01 | 02)
407 // e_ident_version must be 0x1.
408 01
409 """
410 )
411 ]
412
413 C_DEFINITIONS = r"""
414 typedef struct elf_header_32 {
415 uint32 e_ident_magic;
416 uint8 e_ident_class;
417 uint8 e_ident_data;
418 uint8 e_ident_version;
419 uint8 e_ident_osabi;
420 uint8 e_ident_abi_version;
421 uint8 e_ident_pad[7];
422 uint16 e_type;
423 uint16 e_machine;
424 uint32 e_version;
425 uint32 e_entry;
426 uint32 e_phoff;
427 uint32 e_shoff;
428 uint32 e_flags;
429 uint16 e_ehsize;
430 uint16 e_phentsize;
431 uint16 e_phnum;
432 uint16 e_shentsize;
433 uint16 e_shnum;
434 uint16 e_shstrndx;
435 } elf_header_32_t;
436
437 typedef struct elf32_shdr {
438 uint32 sh_name;
439 uint32 sh_type;
440 uint32 sh_flags;
441 uint32 sh_addr;
442 uint32 sh_offset;
443 uint32 sh_size;
444 uint32 sh_link;
445 uint32 sh_info;
446 uint32 sh_addralign;
447 uint32 sh_entsize;
448 } elf_shdr_t;
449
450 typedef struct elf32_phdr {
451 uint32 p_type;
452 uint32 p_offset;
453 uint32 p_vaddr;
454 uint32 p_paddr;
455 uint32 p_filesz;
456 uint32 p_memsz;
457 uint32 p_flags;
458 uint32 p_align;
459 } elf_phdr_t;
460
461 typedef struct module_signature {
462 uint8 algo; /* Public-key crypto algorithm [0] */
463 uint8 hash; /* Digest algorithm [0] */
464 uint8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */
465 uint8 signer_len; /* Length of signer's name [0] */
466 uint8 key_id_len; /* Length of key identifier [0] */
467 uint8 __pad[3];
468 uint32 sig_len; /* Length of signature data */
469 } module_signature_t;
470 """
471 HEADER_STRUCT = "elf_header_32_t"
472
473 DOC = HandlerDoc(
474 name="ELF (32-bit)",
475 description="The 32-bit ELF (Executable and Linkable Format) is a binary file format used for executables, object code, shared libraries, and core dumps. It supports 32-bit addressing and includes headers for program and section information.",
476 handler_type=HandlerType.EXECUTABLE,
477 vendor=None,
478 references=[
479 Reference(
480 title="ELF File Format Specification",
481 url="https://refspecs.linuxfoundation.org/elf/elf.pdf",
482 ),
483 Reference(
484 title="ELF Wikipedia",
485 url="https://en.wikipedia.org/wiki/Executable_and_Linkable_Format",
486 ),
487 ],
488 limitations=[],
489 )
490
491
492class ELF64Handler(_ELFBase):
493 NAME = "elf64"
494
495 PATTERNS = [
496 HexString(
497 """
498 // uint32 e_ident_magic;
499 7F 45 4C 46
500 // e_ident_class must be 0x1 (32bit) or 0x2 (64bit)
501 02
502 // e_ident_data must be 0x1 (little-endian) or 0x2 (big-endian)
503 (01 | 02)
504 // e_ident_version must be 0x1.
505 01
506 """
507 )
508 ]
509
510 C_DEFINITIONS = r"""
511 typedef struct elf_header_64 {
512 uint32 e_ident_magic;
513 uint8 e_ident_class;
514 uint8 e_ident_data;
515 uint8 e_ident_version;
516 uint8 e_ident_osabi;
517 uint8 e_ident_abi_version;
518 uint8 e_ident_pad[7];
519 uint16 e_type;
520 uint16 e_machine;
521 uint32 e_version;
522 uint64 e_entry;
523 uint64 e_phoff;
524 uint64 e_shoff;
525 uint32 e_flags;
526 uint16 e_ehsize;
527 uint16 e_phentsize;
528 uint16 e_phnum;
529 uint16 e_shentsize;
530 uint16 e_shnum;
531 uint16 e_shstrndx;
532 } elf_header_64_t;
533
534 typedef struct elf64_shdr {
535 uint32 sh_name;
536 uint32 sh_type;
537 uint64 sh_flags;
538 uint64 sh_addr;
539 uint64 sh_offset;
540 uint64 sh_size;
541 uint32 sh_link;
542 uint32 sh_info;
543 uint64 sh_addralign;
544 uint64 sh_entsize;
545 } elf_shdr_t;
546
547 typedef struct elf64_phdr {
548 uint32 p_type;
549 uint32 p_flags;
550 uint64 p_offset;
551 uint64 p_vaddr;
552 uint64 p_paddr;
553 uint64 p_filesz;
554 uint64 p_memsz;
555 uint64 p_align;
556 } elf_phdr_t;
557
558 typedef struct module_signature {
559 uint8 algo; /* Public-key crypto algorithm [0] */
560 uint8 hash; /* Digest algorithm [0] */
561 uint8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */
562 uint8 signer_len; /* Length of signer's name [0] */
563 uint8 key_id_len; /* Length of key identifier [0] */
564 uint8 __pad[3];
565 uint32 sig_len; /* Length of signature data */
566 } module_signature_t;
567 """
568 HEADER_STRUCT = "elf_header_64_t"
569
570 DOC = HandlerDoc(
571 name="ELF (64-bit)",
572 description="The 64-bit ELF (Executable and Linkable Format) is a binary file format used for executables, object code, shared libraries, and core dumps. It supports 64-bit addressing and includes headers for program and section information.",
573 handler_type=HandlerType.EXECUTABLE,
574 vendor=None,
575 references=[
576 Reference(
577 title="ELF File Format Specification",
578 url="https://refspecs.linuxfoundation.org/elf/elf.pdf",
579 ),
580 Reference(
581 title="ELF Wikipedia",
582 url="https://en.wikipedia.org/wiki/Executable_and_Linkable_Format",
583 ),
584 ],
585 limitations=[],
586 )