1from pathlib import Path
2
3from lz4.block import decompress
4
5from unblob.file_utils import File, FileSystem, InvalidInputFormat, StructParser
6from unblob.models import (
7 Endian,
8 Extractor,
9 ExtractResult,
10 HandlerDoc,
11 HandlerType,
12 Reference,
13 Regex,
14 StructHandler,
15 ValidChunk,
16)
17
18C_DEFINITIONS = r"""
19typedef struct xalz_header {
20 uint32 magic;
21 uint32 descriptor_index;
22 uint32 uncompressed_size;
23} xalz_header_t;
24"""
25
26
27class XALZExtractor(Extractor):
28 def __init__(self):
29 self._struct_parser = StructParser(C_DEFINITIONS)
30
31 def extract(self, inpath: Path, outdir: Path):
32 fs = FileSystem(outdir)
33 with File.from_path(inpath) as file:
34 header = self._struct_parser.parse("xalz_header_t", file, Endian.LITTLE)
35
36 fs.write_bytes(
37 Path(f"{inpath.name}.uncompressed"),
38 decompress(file.read(), uncompressed_size=header.uncompressed_size),
39 )
40 return ExtractResult(reports=fs.problems)
41
42
43class XALZHandler(StructHandler):
44 NAME = "xalz"
45
46 PATTERNS = [Regex("^\x58\x41\x4c\x5a")]
47
48 C_DEFINITIONS = C_DEFINITIONS
49 HEADER_STRUCT = "xalz_header_t"
50 EXTRACTOR = XALZExtractor()
51
52 DOC = HandlerDoc(
53 name="Xamarin Compressed assemblies",
54 description="Xamarin compressed assemblies are Xamarin DLL compressed with LZ4 + a custom header.",
55 handler_type=HandlerType.EXECUTABLE,
56 vendor="Microsoft",
57 references=[
58 Reference(
59 title="Reverse Engineering a Xamarin Application",
60 url="https://web.archive.org/web/20250114215653/https://securitygrind.com/reverse-engineering-a-xamarin-application/",
61 )
62 ],
63 limitations=[],
64 )
65
66 def is_valid_header(self, header) -> bool:
67 return header.uncompressed_size > 0
68
69 def calculate_chunk(self, file: File, start_offset: int) -> ValidChunk:
70 header = self.parse_header(file, endian=Endian.LITTLE)
71
72 if not self.is_valid_header(header):
73 raise InvalidInputFormat("Invalid XALZ header")
74
75 # NOTE: XALZ does not store compressed size and python's lz4 is too dumb
76 # to allow us to find out where the lz4 raw compressed stream ends without
77 # lots of rewriting.
78 return ValidChunk(start_offset=start_offset, end_offset=file.size())