Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/unblob/handlers/archive/arc.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

40 statements  

1from typing import Optional 

2 

3from structlog import get_logger 

4 

5from unblob.extractors.command import Command 

6 

7from ...models import ( 

8 File, 

9 HandlerDoc, 

10 HandlerType, 

11 HexString, 

12 Reference, 

13 StructHandler, 

14 ValidChunk, 

15) 

16 

17logger = get_logger() 

18 

19END_HEADER = b"\x1a\x00" 

20 

21 

22class ARCHandler(StructHandler): 

23 NAME = "arc" 

24 

25 PATTERNS = [ 

26 HexString( 

27 """ 

28 // Each entry in an archive begins with a one byte archive marker set to 0x1A. 

29 // The marker is followed by a one byte header type code, from 0x0 to 0x7. 

30 // Then a null-byte or uninitialized-byte terminated filename string of 13 bytes, the 

31 // uninitialized byte is always set between 0xf0 and 0xff. 

32 1A (01 | 02 | 03 | 04 | 05 | 06 | 07) [12] (00 | F0 | F1 | F2 | F3 | F4 | F5 | F6 | F7 | F8 | F9 | FA | FB | FC | FD | FE | FF) 

33 """ 

34 ) 

35 ] 

36 

37 C_DEFINITIONS = r""" 

38 typedef struct arc_head { /* archive entry header format */ 

39 int8 archive_marker; 

40 int8 header_type; 

41 char name[13]; /* file name */ 

42 ulong size; /* size of file, in bytes */ 

43 ushort date; /* creation date */ 

44 ushort time; /* creation time */ 

45 short crc; /* cyclic redundancy check */ 

46 ulong length; /* true file length */ 

47 } arc_head_t; 

48 """ 

49 

50 HEADER_STRUCT = "arc_head_t" 

51 EXTRACTOR = Command("unar", "-no-directory", "-o", "{outdir}", "{inpath}") 

52 

53 DOC = HandlerDoc( 

54 name="ARC", 

55 description="ARC is a legacy archive format used to store multiple files with metadata such as file size, creation date, and CRC.", 

56 handler_type=HandlerType.ARCHIVE, 

57 vendor=None, 

58 references=[ 

59 Reference( 

60 title="ARC File Format Documentation", 

61 url="https://en.wikipedia.org/wiki/ARC_(file_format)", 

62 ) 

63 ], 

64 limitations=[], 

65 ) 

66 

67 def valid_name(self, name: bytes) -> bool: 

68 try: 

69 # we return False if the name is made out of an array of null bytes 

70 # or if name starts with null. 

71 return bool( 

72 not name.startswith(b"\x00") 

73 and name[:-1].strip(b"\x00").decode("utf-8") 

74 ) 

75 except UnicodeDecodeError: 

76 return False 

77 

78 def valid_header(self, header) -> bool: 

79 if header.archive_marker != 0x1A: 

80 return False 

81 if header.header_type > 0x07: 

82 return False 

83 if not self.valid_name(header.name): # noqa: SIM103 

84 return False 

85 return True 

86 

87 def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]: 

88 # we loop from header to header until we reach the end header 

89 offset = start_offset 

90 while True: 

91 file.seek(offset) 

92 read_bytes = file.read(2) 

93 

94 if read_bytes == END_HEADER: 

95 offset += 2 

96 break 

97 file.seek(offset) 

98 header = self.parse_header(file) 

99 if not self.valid_header(header): 

100 return None 

101 

102 offset += len(header) + header.size 

103 

104 return ValidChunk( 

105 start_offset=start_offset, 

106 end_offset=offset, 

107 )