1import io
2import shutil
3import zlib
4from pathlib import Path
5from typing import Optional
6
7import attrs
8import lief
9from structlog import get_logger
10
11from unblob.extractor import carve_chunk_to_file
12from unblob.extractors import Command
13from unblob.file_utils import (
14 Endian,
15 File,
16 InvalidInputFormat,
17 StructParser,
18 convert_int8,
19 convert_int32,
20 convert_int64,
21 iterate_file,
22 iterate_patterns,
23 read_until_past,
24 round_up,
25)
26from unblob.models import (
27 HandlerDoc,
28 HandlerType,
29 HexString,
30 Reference,
31 StructHandler,
32 ValidChunk,
33)
34
35lief.logging.disable()
36
37logger = get_logger()
38
39KERNEL_MODULE_SIGNATURE_INFO_LEN = 12
40KERNEL_MODULE_SIGNATURE_FOOTER = b"~Module signature appended~\n"
41
42KERNEL_INIT_DATA_SECTION = ".init.data"
43
44
45# [Ref] https://github.com/upx/upx/blob/devel/src/stub/src/include/linux.h
46UPX_C_DEFINITIONS = r"""
47typedef struct packhead{
48 char magic[4];
49 uint8_t version;
50 uint8_t format;
51 uint8_t level;
52 uint8_t method;
53 uint64_t unknown1;
54 uint32_t u_filesize1;
55 uint32_t c_filesize;
56 uint32_t u_filesize2;
57 uint32_t unknown2;
58 uint32_t l_info_offset;
59} packhead_t;
60
61typedef struct l_info{
62 uint32_t l_checksum;
63 char l_magic[4];
64 uint16_t l_lsize;
65 uint8_t l_version;
66 uint8_t l_format;
67} l_info_t;
68"""
69upx_parser = StructParser(UPX_C_DEFINITIONS)
70
71
72def parse_upx_packhead(file: File):
73 return upx_parser.parse("packhead_t", file, Endian.LITTLE)
74
75
76def parse_upx_l_info(file: File):
77 return upx_parser.parse("l_info_t", file, Endian.LITTLE)
78
79
80@attrs.define(repr=False)
81class ElfChunk(ValidChunk):
82 @staticmethod
83 def upx_checksum_validates(file: File, l_info, elf) -> bool:
84 size_pack2 = elf.last_offset_segment - l_info.l_lsize
85 size_aligment = round_up(size_pack2, 4) # Forces to be mod 4
86 xct_off = any(section.name == "init" for section in elf.sections)
87 size_aligment += (4 & size_aligment) ^ (int(bool(xct_off)) << 2) # 4 or 0
88 size_aligment += 8 # Added 2 times 4 byte (size of disp)
89 if xct_off:
90 size_aligment += 12
91 alignment = size_aligment - size_pack2
92 checksum_offset = elf.last_offset_segment - (l_info.l_lsize - alignment)
93 file.seek(checksum_offset, io.SEEK_SET)
94 adler32_checksum = 1
95 for chunk in iterate_file(file, checksum_offset, l_info.l_lsize - alignment):
96 adler32_checksum = zlib.adler32(chunk, adler32_checksum)
97 return adler32_checksum == l_info.l_checksum
98
99 def is_valid_upx(self, inpath: Path, elf) -> bool:
100 file = File.from_path(inpath)
101 file.seek(-4, io.SEEK_END) # last 4 bytes indicates where linfo ends
102 l_info_start_offset = abs(
103 convert_int32(file.read(4), Endian.LITTLE)
104 - upx_parser.cparser_le.l_info_t.size
105 )
106 if l_info_start_offset > file.size():
107 return False
108 file.seek(l_info_start_offset, io.SEEK_SET)
109 upx_header = parse_upx_l_info(file)
110 if upx_header.l_magic != b"UPX!": # Magic
111 return False
112 if not self.upx_checksum_validates(file, upx_header, elf):
113 raise InvalidInputFormat("Invalid UPX checksum")
114 return True
115
116 def extract(self, inpath: Path, outdir: Path):
117 # ELF file extraction is special in that in the general case no new files are extracted, thus
118 # when we want to clean up all carves to save place, carved ELF files would be deleted as well,
119 # however we want to keep carved out ELF files, as they are the interesting stuff!
120 elf = lief.ELF.parse(str(inpath))
121
122 if elf is None:
123 logger.error(
124 "Trying to extract an invalid ELF file.", inpath=inpath, outdir=outdir
125 )
126 return
127
128 is_kernel = (
129 elf.header.file_type == lief.ELF.Header.FILE_TYPE.EXEC
130 and elf.has_section(KERNEL_INIT_DATA_SECTION)
131 )
132 if is_kernel:
133 with File.from_path(inpath) as file:
134 extract_initramfs(elf, file, outdir)
135
136 elif self.is_valid_upx(inpath=inpath, elf=elf):
137 extract_upx(inpath, outdir)
138
139 elif not self.is_whole_file:
140 # make a copy, and let the carved chunk be deleted
141 outdir.mkdir(parents=True, exist_ok=False)
142 shutil.copy2(inpath, outdir / "carved.elf")
143 # more work will be done, when outdir is picked up by processing,
144 # and the ELF file is processed as a whole file.
145 # As a performance side effect, ELF files will be searched for chunks twice.
146 # Even though the second chunk search one is short-circuited,
147 # because the ELF handler will recognize it as a whole file
148 # other handlers might burn some cycles on the file as well.
149
150
151def extract_upx(inpath: Path, outdir: Path):
152 extractor = Command("upx", "-d", "{inpath}", "-o{outdir}/{inpath.stem}.elf")
153 outdir.mkdir(parents=True, exist_ok=False)
154 extractor.extract(inpath, outdir)
155
156
157def extract_initramfs(elf, file: File, outdir):
158 """Extract the initramfs part, with a potentially 4 extra bytes.
159
160 Due to alignment definition of initramfs the start-end offsets can not be exactly calculated,
161 so the output could have a 4 extra bytes before or after the initramfs.
162 """
163 if not elf.has_section(KERNEL_INIT_DATA_SECTION):
164 return
165
166 init_data = elf.get_section(KERNEL_INIT_DATA_SECTION)
167
168 if not init_data.size:
169 return
170
171 is_64bit = elf.header.identity_class == lief.ELF.Header.CLASS.ELF64
172 endian = (
173 Endian.LITTLE
174 if elf.header.identity_data == lief.ELF.Header.ELF_DATA.LSB
175 else Endian.BIG
176 )
177
178 init_data_end_offset = init_data.file_offset + init_data.size
179
180 # initramfs size is at the end of the section either 64bit or 32bit depending on the platform
181 # see usr/initramfs_data.S in the kernel
182 # The size is padded to 8 bytes, see include/asm-generic/vmlinux.lds.h
183 # The actual initramfs is right before the size
184 if is_64bit:
185 initramfs_size_offset = init_data.file_offset + init_data.size - 8
186 initramfs_size = convert_int64(
187 file[initramfs_size_offset:init_data_end_offset],
188 endian=endian,
189 )
190 else:
191 initramfs_size_offset = init_data.file_offset + init_data.size - 4
192 initramfs_size = convert_int32(
193 file[initramfs_size_offset:init_data_end_offset],
194 endian=endian,
195 )
196
197 # initramfs start is aligned to 4 bytes, initramfs_size_offset is aligned to 8 bytes
198 # this is unfortunate, as we do not know the start, only the padded end
199 # unfortunately we have two valid values for the padding of the initramfs end:
200 # 0 and 4, 1 and 5, 2 and 6, 3 and 7
201 # let's calculate the offsets for the smaller padding values
202 initramfs_start = initramfs_size_offset - round_up(initramfs_size, 4)
203 initramfs_end = initramfs_start + initramfs_size
204 padding = initramfs_size_offset - initramfs_end
205
206 # initramfs can be turned off (https://www.linux.com/training-tutorials/kernel-newbie-corner-initrd-and-initramfs-whats/)
207 # in which case the above calculations most probably end up with bogus chunk offsets
208 if not (
209 init_data.file_offset <= initramfs_start < initramfs_end <= init_data_end_offset
210 and (bytes(padding) == file[initramfs_end:initramfs_size_offset])
211 ):
212 return
213
214 # when bigger padding is also a possibility, include 4 more bytes from the beginning
215 if (init_data.file_offset <= initramfs_start - 4) and (
216 bytes(padding + 4) == file[initramfs_end - 4 : initramfs_size_offset]
217 ):
218 initramfs_start -= 4
219
220 carve_chunk_to_file(
221 outdir / "initramfs",
222 file,
223 ValidChunk(start_offset=initramfs_start, end_offset=initramfs_end),
224 )
225
226
227class _ELFBase(StructHandler):
228 EXTRACTOR = None
229 SECTION_HEADER_STRUCT = "elf_shdr_t"
230 PROGRAM_HEADER_STRUCT = "elf_phdr_t"
231
232 def is_valid_header(self, header) -> bool:
233 # check that header fields have valid values
234 try:
235 lief.ELF.Header.FILE_TYPE(header.e_type)
236 lief.ELF.ARCH(header.e_machine)
237 lief.ELF.Header.VERSION(header.e_version)
238 except ValueError:
239 return False
240 return True
241
242 @staticmethod
243 def get_endianness(file: File, start_offset: int) -> Endian:
244 file.seek(start_offset + 5, io.SEEK_SET)
245 e_ident_data = convert_int8(file.read(1), Endian.LITTLE)
246 return Endian.LITTLE if e_ident_data == 0x1 else Endian.BIG
247
248 def get_last_section_end(
249 self, file: File, sections_start_offset: int, sections_num: int, endian
250 ) -> int:
251 last_section_end = 0
252 file.seek(sections_start_offset)
253
254 for _ in range(sections_num):
255 section_header = self._struct_parser.parse(
256 self.SECTION_HEADER_STRUCT, file, endian
257 )
258
259 try:
260 if (
261 lief.ELF.Section.TYPE(section_header.sh_type)
262 == lief.ELF.Section.TYPE.NOBITS
263 ):
264 continue
265 except ValueError:
266 continue
267
268 section_end = section_header.sh_offset + section_header.sh_size
269 last_section_end = max(section_end, last_section_end)
270
271 return last_section_end
272
273 def get_last_program_end(
274 self, file: File, programs_start_offset: int, programs_num: int, endian
275 ) -> int:
276 last_program_end = 0
277 file.seek(programs_start_offset)
278
279 for _ in range(programs_num):
280 program_header = self._struct_parser.parse(
281 self.PROGRAM_HEADER_STRUCT, file, endian
282 )
283
284 program_end = program_header.p_offset + program_header.p_filesz
285 last_program_end = max(program_end, last_program_end)
286
287 return last_program_end
288
289 def get_end_offset(self, file: File, start_offset: int, header, endian) -> int:
290 # Usually the section header is the last, but in some cases the program headers are
291 # put to the end of the file, and in some cases sections header and actual sections
292 # can be also intermixed, so we need also to check the end of the last section and
293 # also the last program segment.
294 # We check which one is the last and use it as a file size.
295 section_headers_end = header.e_shoff + (header.e_shnum * header.e_shentsize)
296 program_headers_end = header.e_phoff + (header.e_phnum * header.e_phentsize)
297
298 last_section_end = self.get_last_section_end(
299 file, start_offset + header.e_shoff, header.e_shnum, endian
300 )
301
302 last_program_end = self.get_last_program_end(
303 file, start_offset + header.e_phoff, header.e_phnum, endian
304 )
305
306 return start_offset + max(
307 section_headers_end, program_headers_end, last_section_end, last_program_end
308 )
309
310 def get_signed_kernel_module_end_offset(self, file: File, end_offset: int) -> int:
311 # signed kernel modules are ELF files followed by:
312 # - a PKCS7 signature
313 # - a module_signature structure
314 # - a custom footer value '~~Module signature appended~\n~'
315 # we check if a valid kernel module signature is present after the ELF file
316 # and returns an end_offset that includes that whole signature part.
317
318 file.seek(end_offset, io.SEEK_SET)
319 for footer_offset in iterate_patterns(file, KERNEL_MODULE_SIGNATURE_FOOTER):
320 file.seek(
321 footer_offset - KERNEL_MODULE_SIGNATURE_INFO_LEN,
322 io.SEEK_SET,
323 )
324 module_signature = self._struct_parser.parse(
325 "module_signature_t", file, Endian.BIG
326 )
327 logger.debug(
328 "module_signature_t",
329 module_signature=module_signature,
330 _verbosity=3,
331 )
332 if (
333 footer_offset
334 == end_offset
335 + module_signature.sig_len
336 + KERNEL_MODULE_SIGNATURE_INFO_LEN
337 ):
338 end_offset = footer_offset + len(KERNEL_MODULE_SIGNATURE_FOOTER)
339
340 # We stop at the first SIGNATURE FOOTER match
341 break
342
343 return end_offset
344
345 def is_upx(self, file: File, start_offset: int, end_offset: int) -> bool:
346 """Check if UPX magic is present after ELF header."""
347 return b"UPX!" in file[start_offset : min(end_offset, start_offset + 4096)]
348
349 def get_upx_end_offset(self, file: File, start_offset: int, end_offset: int) -> int:
350 """Locate UPX footer in ELF file and returns UPX end offset or original end offset."""
351 upx_footer = b"\xff\x00\x00\x00\x00UPX!\x00\x00\x00\x00"
352 for packhead_offset in iterate_patterns(file=file, pattern=upx_footer):
353 file.seek(
354 packhead_offset + len(upx_footer), io.SEEK_SET
355 ) # seek to end of footer
356 file.seek(
357 read_until_past(file=file, pattern=b"\x00")
358 ) # sometimes more NULL bytes are added
359 packheader = parse_upx_packhead(file)
360 file_size_compressed = packheader.c_filesize + packheader.size
361 packhead_is_valid = (
362 (
363 packheader.magic == b"UPX!"
364 and packheader.u_filesize1 == packheader.u_filesize2
365 )
366 and (file_size_compressed == file.tell() - start_offset)
367 and (file_size_compressed % 4 == 0)
368 )
369 if packhead_is_valid:
370 return start_offset + file_size_compressed
371 # no matching UPX footer found
372 return end_offset
373
374 def calculate_chunk(self, file: File, start_offset: int) -> Optional[ElfChunk]:
375 endian = self.get_endianness(file, start_offset)
376 file.seek(start_offset, io.SEEK_SET)
377 header = self.parse_header(file, endian)
378 if not self.is_valid_header(header):
379 return None
380 end_offset = self.get_end_offset(file, start_offset, header, endian)
381
382 # kernel modules are always relocatable
383 if header.e_type == lief.ELF.Header.FILE_TYPE.REL.value:
384 end_offset = self.get_signed_kernel_module_end_offset(file, end_offset)
385
386 if self.is_upx(file=file, start_offset=start_offset, end_offset=end_offset):
387 end_offset = self.get_upx_end_offset(file, start_offset, end_offset)
388
389 # do a special extraction of ELF files with ElfChunk
390 return ElfChunk(
391 start_offset=start_offset,
392 end_offset=end_offset,
393 )
394
395
396class ELF32Handler(_ELFBase):
397 NAME = "elf32"
398
399 PATTERNS = [
400 HexString(
401 """
402 // uint32 e_ident_magic;
403 7F 45 4C 46
404 // e_ident_class must be 0x1 (32bit) or 0x2 (64bit)
405 01
406 // e_ident_data must be 0x1 (little-endian) or 0x2 (big-endian)
407 (01 | 02)
408 // e_ident_version must be 0x1.
409 01
410 """
411 )
412 ]
413
414 C_DEFINITIONS = r"""
415 typedef struct elf_header_32 {
416 uint32 e_ident_magic;
417 uint8 e_ident_class;
418 uint8 e_ident_data;
419 uint8 e_ident_version;
420 uint8 e_ident_osabi;
421 uint8 e_ident_abi_version;
422 uint8 e_ident_pad[7];
423 uint16 e_type;
424 uint16 e_machine;
425 uint32 e_version;
426 uint32 e_entry;
427 uint32 e_phoff;
428 uint32 e_shoff;
429 uint32 e_flags;
430 uint16 e_ehsize;
431 uint16 e_phentsize;
432 uint16 e_phnum;
433 uint16 e_shentsize;
434 uint16 e_shnum;
435 uint16 e_shstrndx;
436 } elf_header_32_t;
437
438 typedef struct elf32_shdr {
439 uint32 sh_name;
440 uint32 sh_type;
441 uint32 sh_flags;
442 uint32 sh_addr;
443 uint32 sh_offset;
444 uint32 sh_size;
445 uint32 sh_link;
446 uint32 sh_info;
447 uint32 sh_addralign;
448 uint32 sh_entsize;
449 } elf_shdr_t;
450
451 typedef struct elf32_phdr {
452 uint32 p_type;
453 uint32 p_offset;
454 uint32 p_vaddr;
455 uint32 p_paddr;
456 uint32 p_filesz;
457 uint32 p_memsz;
458 uint32 p_flags;
459 uint32 p_align;
460 } elf_phdr_t;
461
462 typedef struct module_signature {
463 uint8 algo; /* Public-key crypto algorithm [0] */
464 uint8 hash; /* Digest algorithm [0] */
465 uint8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */
466 uint8 signer_len; /* Length of signer's name [0] */
467 uint8 key_id_len; /* Length of key identifier [0] */
468 uint8 __pad[3];
469 uint32 sig_len; /* Length of signature data */
470 } module_signature_t;
471 """
472 HEADER_STRUCT = "elf_header_32_t"
473
474 DOC = HandlerDoc(
475 name="ELF (32-bit)",
476 description="The 32-bit ELF (Executable and Linkable Format) is a binary file format used for executables, object code, shared libraries, and core dumps. It supports 32-bit addressing and includes headers for program and section information.",
477 handler_type=HandlerType.EXECUTABLE,
478 vendor=None,
479 references=[
480 Reference(
481 title="ELF File Format Specification",
482 url="https://refspecs.linuxfoundation.org/elf/elf.pdf",
483 ),
484 Reference(
485 title="ELF Wikipedia",
486 url="https://en.wikipedia.org/wiki/Executable_and_Linkable_Format",
487 ),
488 ],
489 limitations=[],
490 )
491
492
493class ELF64Handler(_ELFBase):
494 NAME = "elf64"
495
496 PATTERNS = [
497 HexString(
498 """
499 // uint32 e_ident_magic;
500 7F 45 4C 46
501 // e_ident_class must be 0x1 (32bit) or 0x2 (64bit)
502 02
503 // e_ident_data must be 0x1 (little-endian) or 0x2 (big-endian)
504 (01 | 02)
505 // e_ident_version must be 0x1.
506 01
507 """
508 )
509 ]
510
511 C_DEFINITIONS = r"""
512 typedef struct elf_header_64 {
513 uint32 e_ident_magic;
514 uint8 e_ident_class;
515 uint8 e_ident_data;
516 uint8 e_ident_version;
517 uint8 e_ident_osabi;
518 uint8 e_ident_abi_version;
519 uint8 e_ident_pad[7];
520 uint16 e_type;
521 uint16 e_machine;
522 uint32 e_version;
523 uint64 e_entry;
524 uint64 e_phoff;
525 uint64 e_shoff;
526 uint32 e_flags;
527 uint16 e_ehsize;
528 uint16 e_phentsize;
529 uint16 e_phnum;
530 uint16 e_shentsize;
531 uint16 e_shnum;
532 uint16 e_shstrndx;
533 } elf_header_64_t;
534
535 typedef struct elf64_shdr {
536 uint32 sh_name;
537 uint32 sh_type;
538 uint64 sh_flags;
539 uint64 sh_addr;
540 uint64 sh_offset;
541 uint64 sh_size;
542 uint32 sh_link;
543 uint32 sh_info;
544 uint64 sh_addralign;
545 uint64 sh_entsize;
546 } elf_shdr_t;
547
548 typedef struct elf64_phdr {
549 uint32 p_type;
550 uint32 p_flags;
551 uint64 p_offset;
552 uint64 p_vaddr;
553 uint64 p_paddr;
554 uint64 p_filesz;
555 uint64 p_memsz;
556 uint64 p_align;
557 } elf_phdr_t;
558
559 typedef struct module_signature {
560 uint8 algo; /* Public-key crypto algorithm [0] */
561 uint8 hash; /* Digest algorithm [0] */
562 uint8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */
563 uint8 signer_len; /* Length of signer's name [0] */
564 uint8 key_id_len; /* Length of key identifier [0] */
565 uint8 __pad[3];
566 uint32 sig_len; /* Length of signature data */
567 } module_signature_t;
568 """
569 HEADER_STRUCT = "elf_header_64_t"
570
571 DOC = HandlerDoc(
572 name="ELF (64-bit)",
573 description="The 64-bit ELF (Executable and Linkable Format) is a binary file format used for executables, object code, shared libraries, and core dumps. It supports 64-bit addressing and includes headers for program and section information.",
574 handler_type=HandlerType.EXECUTABLE,
575 vendor=None,
576 references=[
577 Reference(
578 title="ELF File Format Specification",
579 url="https://refspecs.linuxfoundation.org/elf/elf.pdf",
580 ),
581 Reference(
582 title="ELF Wikipedia",
583 url="https://en.wikipedia.org/wiki/Executable_and_Linkable_Format",
584 ),
585 ],
586 limitations=[],
587 )