Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/unblob/handlers/archive/ar.py: 71%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

41 statements  

1import os 

2from pathlib import Path 

3 

4import arpy 

5from structlog import get_logger 

6 

7from ...file_utils import FileSystem, OffsetFile, iterate_file 

8from ...models import ( 

9 Extractor, 

10 ExtractResult, 

11 File, 

12 Handler, 

13 HandlerDoc, 

14 HandlerType, 

15 HexString, 

16 Reference, 

17 ValidChunk, 

18) 

19from ...report import ExtractionProblem 

20 

21logger = get_logger() 

22 

23 

24HEADER_LENGTH = 0x44 

25SIGNATURE_LENGTH = 0x8 

26 

27 

28class ArExtractor(Extractor): 

29 def extract(self, inpath: Path, outdir: Path) -> ExtractResult | None: 

30 fs = FileSystem(outdir) 

31 

32 with arpy.Archive(inpath.as_posix()) as archive: 

33 archive.read_all_headers() 

34 

35 for name in sorted(archive.archived_files): 

36 archived_file = archive.archived_files[name] 

37 

38 try: 

39 path = Path(name.decode()) 

40 except UnicodeDecodeError: 

41 path = Path(name.decode(errors="replace")) 

42 fs.record_problem( 

43 ExtractionProblem( 

44 path=repr(name), 

45 problem="Path is not a valid UTF/8 string", 

46 resolution=f"Converted to {path}", 

47 ) 

48 ) 

49 

50 fs.write_chunks( 

51 path, 

52 chunks=iterate_file( 

53 archived_file, 

54 0, 

55 archived_file.header.size, 

56 ), 

57 ) 

58 

59 return ExtractResult(reports=fs.problems) 

60 

61 

62class ARHandler(Handler): 

63 NAME = "ar" 

64 

65 PATTERNS = [ 

66 HexString( 

67 """ 

68 // "!<arch>\\n", 58 chars of whatever, then the ARFMAG 

69 21 3C 61 72 63 68 3E 0A [58] 60 0A 

70 """ 

71 ) 

72 ] 

73 

74 EXTRACTOR = ArExtractor() 

75 

76 DOC = HandlerDoc( 

77 name="AR", 

78 description="Unix AR (archive) files are used to store multiple files in a single archive with a simple header format.", 

79 handler_type=HandlerType.ARCHIVE, 

80 vendor=None, 

81 references=[ 

82 Reference( 

83 title="Unix AR File Format Documentation", 

84 url="https://en.wikipedia.org/wiki/Ar_(Unix)", 

85 ) 

86 ], 

87 limitations=[], 

88 ) 

89 

90 def calculate_chunk(self, file: File, start_offset: int) -> ValidChunk | None: 

91 offset_file = OffsetFile(file, start_offset) 

92 ar = arpy.Archive(fileobj=offset_file) # type: ignore 

93 

94 try: 

95 ar.read_all_headers() 

96 except arpy.ArchiveFormatError as exc: 

97 logger.debug( 

98 "Hit an ArchiveFormatError, we've probably hit some other kind of data", 

99 exc_info=exc, 

100 ) 

101 

102 # wind the cursor back the whole header length to check if we failed on 

103 # the first match, which means malformed AR archive 

104 ar.file.seek(-HEADER_LENGTH, os.SEEK_CUR) 

105 # we check if we failed on the first match 

106 if start_offset == file.tell(): 

107 return None 

108 # otherwise we seek past the signature (failure on malformed AR archive 

109 # within the whole file, not at the start) 

110 ar.file.seek(SIGNATURE_LENGTH, os.SEEK_CUR) 

111 

112 return ValidChunk( 

113 start_offset=start_offset, 

114 end_offset=file.tell(), 

115 )