Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/unblob/handlers/compression/lzh.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

32 statements  

1import io 

2from typing import Optional 

3 

4from ...extractors import Command 

5from ...file_utils import Endian 

6from ...models import ( 

7 File, 

8 HandlerDoc, 

9 HandlerType, 

10 Reference, 

11 Regex, 

12 StructHandler, 

13 ValidChunk, 

14) 

15 

16PADDING_LEN = 2 

17# CPP/7zip/Archive/LzhHandler.cpp 

18HEADER_MIN_SIZE = 2 + 22 

19 

20 

21class LZHHandler(StructHandler): 

22 NAME = "lzh" 

23 

24 PATTERNS = [ 

25 Regex(r"-lh0-"), 

26 Regex(r"-lzs-"), 

27 Regex(r"-lz4-"), 

28 Regex(r"-lh1-"), 

29 Regex(r"-lh2-"), 

30 Regex(r"-lh3-"), 

31 Regex(r"-lh4-"), 

32 Regex(r"-lh5-"), 

33 Regex(r"-lh6-"), 

34 Regex(r"-lh7-"), 

35 Regex(r"-lh8-"), 

36 Regex(r"-lhd-"), 

37 ] 

38 

39 PATTERN_MATCH_OFFSET = -2 

40 

41 C_DEFINITIONS = r""" 

42 typedef struct lzh_default_header { 

43 uint8 header_size; // excludes extended headers size 

44 uint8 header_checksum; 

45 char method_id[5]; 

46 uint32 compressed_size; // includes all extended headers size (if level 1) 

47 uint32 uncompressed_size; 

48 uint32 timestamp; 

49 uint8 fd_attribute; 

50 uint8 level_identifier; 

51 } lzh_default_header_t; 

52 

53 typedef struct level_2_header { 

54 uint16 header_size; // includes all extended headers 

55 char method_id[5]; 

56 uint32 compressed_size; // excludes all extended headers 

57 uint32 uncompressed_size; 

58 uint32 timestamp; 

59 uint8 fd_attribute; 

60 uint8 level_identifier; 

61 } level_2_header_t; 

62 """ 

63 HEADER_STRUCT = "lzh_default_header_t" 

64 

65 EXTRACTOR = Command("7z", "x", "-p", "-y", "{inpath}", "-o{outdir}") 

66 

67 DOC = HandlerDoc( 

68 name="LZH", 

69 description="LZH is a legacy archive format that uses various compression methods such as '-lh0-' and '-lh5-'. It was widely used in Japan and on older systems for compressing and archiving files.", 

70 handler_type=HandlerType.COMPRESSION, 

71 vendor=None, 

72 references=[ 

73 Reference( 

74 title="LZH Compression Format", 

75 url="https://en.wikipedia.org/wiki/LHA_(file_format)", 

76 ), 

77 ], 

78 limitations=[], 

79 ) 

80 

81 def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]: 

82 header = self.parse_header(file, Endian.LITTLE) 

83 

84 if header.level_identifier > 0x2: 

85 return None 

86 

87 if header.level_identifier == 0x2: 

88 # with level 2, the header size is a uint16 rather than uint8 and there 

89 # is no checksum. We use this magic trick so we don't parse the header 

90 # again. See the level_2_header definition in C_DEFINITIONS 

91 header_size = header.header_size + (header.header_checksum << 8) 

92 else: 

93 header_size = header.header_size + PADDING_LEN 

94 

95 if header_size < HEADER_MIN_SIZE: 

96 return None 

97 

98 file.seek(-len(header), io.SEEK_CUR) 

99 file.seek(header_size + header.compressed_size, io.SEEK_CUR) 

100 end_offset = file.tell() 

101 

102 # LZH files are null terminated, so we have to handle the case where 

103 # we matched the last LZH stream of a file and pad appropriately. 

104 file.seek(0, io.SEEK_END) 

105 end_pos = file.tell() 

106 

107 if end_pos - end_offset == 1: 

108 end_offset = end_pos 

109 

110 return ValidChunk( 

111 start_offset=start_offset, 

112 end_offset=end_offset, 

113 )