1import io
2
3from structlog import get_logger
4
5from unblob.extractors import Command
6
7from ...file_utils import Endian, convert_int64
8from ...models import (
9 File,
10 Handler,
11 HandlerDoc,
12 HandlerType,
13 HexString,
14 Reference,
15 ValidChunk,
16)
17
18logger = get_logger()
19
20# magic (4 bytes) + VN (1 byte) + DS (1 byte)
21HEADER_LEN = 4 + 1 + 1
22# LZMA stream is 2 bytes aligned
23LZMA_ALIGNMENT = 2
24
25
26class LZipHandler(Handler):
27 NAME = "lzip"
28
29 PATTERNS = [HexString("4C 5A 49 50 01")]
30
31 EXTRACTOR = Command(
32 "lziprecover", "-k", "-D0", "-i", "{inpath}", "-o", "{outdir}/lz.uncompressed"
33 )
34
35 DOC = HandlerDoc(
36 name="Lzip",
37 description="Lzip is a lossless compressed file format based on the LZMA algorithm. It features a simple header, CRC-checked integrity, and efficient compression for large files.",
38 handler_type=HandlerType.COMPRESSION,
39 vendor=None,
40 references=[
41 Reference(
42 title="Lzip File Format Documentation",
43 url="https://www.nongnu.org/lzip/manual/lzip_manual.html",
44 ),
45 Reference(
46 title="Lzip Wikipedia",
47 url="https://en.wikipedia.org/wiki/Lzip",
48 ),
49 ],
50 limitations=[],
51 )
52
53 def calculate_chunk(self, file: File, start_offset: int) -> ValidChunk | None:
54 file.seek(HEADER_LEN, io.SEEK_CUR)
55 # quite the naive idea but it works
56 # the idea is to read 8 bytes uint64 every 2 bytes alignment
57 # until we end up reading the Member Size field which corresponds
58 # to "the total size of the member, including header and trailer".
59 # We either find it or reach EOF, which will be caught by finder.
60
61 while True:
62 file.seek(LZMA_ALIGNMENT, io.SEEK_CUR)
63 member_size = convert_int64(file.read(8), Endian.LITTLE)
64 if member_size == (file.tell() - start_offset):
65 end_offset = file.tell()
66 break
67 file.seek(-8, io.SEEK_CUR)
68
69 return ValidChunk(start_offset=start_offset, end_offset=end_offset)