1import io
2from typing import Optional
3
4from structlog import get_logger
5
6from unblob.extractors import Command
7
8from ...file_utils import Endian, convert_int64
9from ...models import (
10 File,
11 Handler,
12 HandlerDoc,
13 HandlerType,
14 HexString,
15 Reference,
16 ValidChunk,
17)
18
19logger = get_logger()
20
21# magic (4 bytes) + VN (1 byte) + DS (1 byte)
22HEADER_LEN = 4 + 1 + 1
23# LZMA stream is 2 bytes aligned
24LZMA_ALIGNMENT = 2
25
26
27class LZipHandler(Handler):
28 NAME = "lzip"
29
30 PATTERNS = [HexString("4C 5A 49 50 01")]
31
32 EXTRACTOR = Command(
33 "lziprecover", "-k", "-D0", "-i", "{inpath}", "-o", "{outdir}/lz.uncompressed"
34 )
35
36 DOC = HandlerDoc(
37 name="Lzip",
38 description="Lzip is a lossless compressed file format based on the LZMA algorithm. It features a simple header, CRC-checked integrity, and efficient compression for large files.",
39 handler_type=HandlerType.COMPRESSION,
40 vendor=None,
41 references=[
42 Reference(
43 title="Lzip File Format Documentation",
44 url="https://www.nongnu.org/lzip/manual/lzip_manual.html",
45 ),
46 Reference(
47 title="Lzip Wikipedia",
48 url="https://en.wikipedia.org/wiki/Lzip",
49 ),
50 ],
51 limitations=[],
52 )
53
54 def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
55 file.seek(HEADER_LEN, io.SEEK_CUR)
56 # quite the naive idea but it works
57 # the idea is to read 8 bytes uint64 every 2 bytes alignment
58 # until we end up reading the Member Size field which corresponds
59 # to "the total size of the member, including header and trailer".
60 # We either find it or reach EOF, which will be caught by finder.
61
62 while True:
63 file.seek(LZMA_ALIGNMENT, io.SEEK_CUR)
64 member_size = convert_int64(file.read(8), Endian.LITTLE)
65 if member_size == (file.tell() - start_offset):
66 end_offset = file.tell()
67 break
68 file.seek(-8, io.SEEK_CUR)
69
70 return ValidChunk(start_offset=start_offset, end_offset=end_offset)