Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/unblob/handlers/compression/lzip.py: 62%

1import io

2from typing import Optional

4from structlog import get_logger

6from unblob.extractors import Command

8from ...file_utils import Endian, convert_int64

9from ...models import (

10 File,

11 Handler,

12 HandlerDoc,

13 HandlerType,

14 HexString,

15 Reference,

16 ValidChunk,

17)

19logger = get_logger()

21# magic (4 bytes) + VN (1 byte) + DS (1 byte)

22HEADER_LEN = 4 + 1 + 1

23# LZMA stream is 2 bytes aligned

24LZMA_ALIGNMENT = 2

27class LZipHandler(Handler):

28 NAME = "lzip"

30 PATTERNS = [HexString("4C 5A 49 50 01")]

32 EXTRACTOR = Command(

33 "lziprecover", "-k", "-D0", "-i", "{inpath}", "-o", "{outdir}/lz.uncompressed"

34 )

36 DOC = HandlerDoc(

37 name="Lzip",

38 description="Lzip is a lossless compressed file format based on the LZMA algorithm. It features a simple header, CRC-checked integrity, and efficient compression for large files.",

39 handler_type=HandlerType.COMPRESSION,

40 vendor=None,

41 references=[

42 Reference(

43 title="Lzip File Format Documentation",

44 url="https://www.nongnu.org/lzip/manual/lzip_manual.html",

45 ),

46 Reference(

47 title="Lzip Wikipedia",

48 url="https://en.wikipedia.org/wiki/Lzip",

49 ),

50 ],

51 limitations=[],

52 )

54 def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:

55 file.seek(HEADER_LEN, io.SEEK_CUR)

56 # quite the naive idea but it works

57 # the idea is to read 8 bytes uint64 every 2 bytes alignment

58 # until we end up reading the Member Size field which corresponds

59 # to "the total size of the member, including header and trailer".

60 # We either find it or reach EOF, which will be caught by finder.

62 while True:

63 file.seek(LZMA_ALIGNMENT, io.SEEK_CUR)

64 member_size = convert_int64(file.read(8), Endian.LITTLE)

65 if member_size == (file.tell() - start_offset):

66 end_offset = file.tell()

67 break

68 file.seek(-8, io.SEEK_CUR)

70 return ValidChunk(start_offset=start_offset, end_offset=end_offset)