Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/unblob/handlers/archive/arc.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

39 statements  

1from structlog import get_logger 

2 

3from unblob.extractors.command import Command 

4 

5from ...models import ( 

6 File, 

7 HandlerDoc, 

8 HandlerType, 

9 HexString, 

10 Reference, 

11 StructHandler, 

12 ValidChunk, 

13) 

14 

15logger = get_logger() 

16 

17END_HEADER = b"\x1a\x00" 

18 

19 

20class ARCHandler(StructHandler): 

21 NAME = "arc" 

22 

23 PATTERNS = [ 

24 HexString( 

25 """ 

26 // Each entry in an archive begins with a one byte archive marker set to 0x1A. 

27 // The marker is followed by a one byte header type code, from 0x0 to 0x7. 

28 // Then a null-byte or uninitialized-byte terminated filename string of 13 bytes, the 

29 // uninitialized byte is always set between 0xf0 and 0xff. 

30 1A (01 | 02 | 03 | 04 | 05 | 06 | 07) [12] (00 | F0 | F1 | F2 | F3 | F4 | F5 | F6 | F7 | F8 | F9 | FA | FB | FC | FD | FE | FF) 

31 """ 

32 ) 

33 ] 

34 

35 C_DEFINITIONS = r""" 

36 typedef struct arc_head { /* archive entry header format */ 

37 int8 archive_marker; 

38 int8 header_type; 

39 char name[13]; /* file name */ 

40 ulong size; /* size of file, in bytes */ 

41 ushort date; /* creation date */ 

42 ushort time; /* creation time */ 

43 short crc; /* cyclic redundancy check */ 

44 ulong length; /* true file length */ 

45 } arc_head_t; 

46 """ 

47 

48 HEADER_STRUCT = "arc_head_t" 

49 EXTRACTOR = Command("unar", "-no-directory", "-o", "{outdir}", "{inpath}") 

50 

51 DOC = HandlerDoc( 

52 name="ARC", 

53 description="ARC is a legacy archive format used to store multiple files with metadata such as file size, creation date, and CRC.", 

54 handler_type=HandlerType.ARCHIVE, 

55 vendor=None, 

56 references=[ 

57 Reference( 

58 title="ARC File Format Documentation", 

59 url="https://en.wikipedia.org/wiki/ARC_(file_format)", 

60 ) 

61 ], 

62 limitations=[], 

63 ) 

64 

65 def valid_name(self, name: bytes) -> bool: 

66 try: 

67 # we return False if the name is made out of an array of null bytes 

68 # or if name starts with null. 

69 return bool( 

70 not name.startswith(b"\x00") 

71 and name[:-1].strip(b"\x00").decode("utf-8") 

72 ) 

73 except UnicodeDecodeError: 

74 return False 

75 

76 def valid_header(self, header) -> bool: 

77 if header.archive_marker != 0x1A: 

78 return False 

79 if header.header_type > 0x07: 

80 return False 

81 if not self.valid_name(header.name): # noqa: SIM103 

82 return False 

83 return True 

84 

85 def calculate_chunk(self, file: File, start_offset: int) -> ValidChunk | None: 

86 # we loop from header to header until we reach the end header 

87 offset = start_offset 

88 while True: 

89 file.seek(offset) 

90 read_bytes = file.read(2) 

91 

92 if read_bytes == END_HEADER: 

93 offset += 2 

94 break 

95 file.seek(offset) 

96 header = self.parse_header(file) 

97 if not self.valid_header(header): 

98 return None 

99 

100 offset += len(header) + header.size 

101 

102 return ValidChunk( 

103 start_offset=start_offset, 

104 end_offset=offset, 

105 )