Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/unblob/handlers/compression/lzh.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

31 statements  

1import io 

2 

3from ...extractors import Command 

4from ...file_utils import Endian 

5from ...models import ( 

6 File, 

7 HandlerDoc, 

8 HandlerType, 

9 Reference, 

10 Regex, 

11 StructHandler, 

12 ValidChunk, 

13) 

14 

15PADDING_LEN = 2 

16# CPP/7zip/Archive/LzhHandler.cpp 

17HEADER_MIN_SIZE = 2 + 22 

18 

19 

20class LZHHandler(StructHandler): 

21 NAME = "lzh" 

22 

23 PATTERNS = [ 

24 Regex(r"-lh0-"), 

25 Regex(r"-lzs-"), 

26 Regex(r"-lz4-"), 

27 Regex(r"-lh1-"), 

28 Regex(r"-lh2-"), 

29 Regex(r"-lh3-"), 

30 Regex(r"-lh4-"), 

31 Regex(r"-lh5-"), 

32 Regex(r"-lh6-"), 

33 Regex(r"-lh7-"), 

34 Regex(r"-lh8-"), 

35 Regex(r"-lhd-"), 

36 ] 

37 

38 PATTERN_MATCH_OFFSET = -2 

39 

40 C_DEFINITIONS = r""" 

41 typedef struct lzh_default_header { 

42 uint8 header_size; // excludes extended headers size 

43 uint8 header_checksum; 

44 char method_id[5]; 

45 uint32 compressed_size; // includes all extended headers size (if level 1) 

46 uint32 uncompressed_size; 

47 uint32 timestamp; 

48 uint8 fd_attribute; 

49 uint8 level_identifier; 

50 } lzh_default_header_t; 

51 

52 typedef struct level_2_header { 

53 uint16 header_size; // includes all extended headers 

54 char method_id[5]; 

55 uint32 compressed_size; // excludes all extended headers 

56 uint32 uncompressed_size; 

57 uint32 timestamp; 

58 uint8 fd_attribute; 

59 uint8 level_identifier; 

60 } level_2_header_t; 

61 """ 

62 HEADER_STRUCT = "lzh_default_header_t" 

63 

64 EXTRACTOR = Command("7z", "x", "-p", "-y", "{inpath}", "-o{outdir}") 

65 

66 DOC = HandlerDoc( 

67 name="LZH", 

68 description="LZH is a legacy archive format that uses various compression methods such as '-lh0-' and '-lh5-'. It was widely used in Japan and on older systems for compressing and archiving files.", 

69 handler_type=HandlerType.COMPRESSION, 

70 vendor=None, 

71 references=[ 

72 Reference( 

73 title="LZH Compression Format", 

74 url="https://en.wikipedia.org/wiki/LHA_(file_format)", 

75 ), 

76 ], 

77 limitations=[], 

78 ) 

79 

80 def calculate_chunk(self, file: File, start_offset: int) -> ValidChunk | None: 

81 header = self.parse_header(file, Endian.LITTLE) 

82 

83 if header.level_identifier > 0x2: 

84 return None 

85 

86 if header.level_identifier == 0x2: 

87 # with level 2, the header size is a uint16 rather than uint8 and there 

88 # is no checksum. We use this magic trick so we don't parse the header 

89 # again. See the level_2_header definition in C_DEFINITIONS 

90 header_size = header.header_size + (header.header_checksum << 8) 

91 else: 

92 header_size = header.header_size + PADDING_LEN 

93 

94 if header_size < HEADER_MIN_SIZE: 

95 return None 

96 

97 file.seek(-len(header), io.SEEK_CUR) 

98 file.seek(header_size + header.compressed_size, io.SEEK_CUR) 

99 end_offset = file.tell() 

100 

101 # LZH files are null terminated, so we have to handle the case where 

102 # we matched the last LZH stream of a file and pad appropriately. 

103 file.seek(0, io.SEEK_END) 

104 end_pos = file.tell() 

105 

106 if end_pos - end_offset == 1: 

107 end_offset = end_pos 

108 

109 return ValidChunk( 

110 start_offset=start_offset, 

111 end_offset=end_offset, 

112 )