Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/unblob/handlers/archive/ar.py: 71%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

42 statements  

1import os 

2from pathlib import Path 

3from typing import Optional 

4 

5import arpy 

6from structlog import get_logger 

7 

8from ...file_utils import FileSystem, OffsetFile, iterate_file 

9from ...models import ( 

10 Extractor, 

11 ExtractResult, 

12 File, 

13 Handler, 

14 HandlerDoc, 

15 HandlerType, 

16 HexString, 

17 Reference, 

18 ValidChunk, 

19) 

20from ...report import ExtractionProblem 

21 

22logger = get_logger() 

23 

24 

25HEADER_LENGTH = 0x44 

26SIGNATURE_LENGTH = 0x8 

27 

28 

29class ArExtractor(Extractor): 

30 def extract(self, inpath: Path, outdir: Path) -> Optional[ExtractResult]: 

31 fs = FileSystem(outdir) 

32 

33 with arpy.Archive(inpath.as_posix()) as archive: 

34 archive.read_all_headers() 

35 

36 for name in sorted(archive.archived_files): 

37 archived_file = archive.archived_files[name] 

38 

39 try: 

40 path = Path(name.decode()) 

41 except UnicodeDecodeError: 

42 path = Path(name.decode(errors="replace")) 

43 fs.record_problem( 

44 ExtractionProblem( 

45 path=repr(name), 

46 problem="Path is not a valid UTF/8 string", 

47 resolution=f"Converted to {path}", 

48 ) 

49 ) 

50 

51 fs.write_chunks( 

52 path, 

53 chunks=iterate_file( 

54 archived_file, 

55 0, 

56 archived_file.header.size, 

57 ), 

58 ) 

59 

60 return ExtractResult(reports=fs.problems) 

61 

62 

63class ARHandler(Handler): 

64 NAME = "ar" 

65 

66 PATTERNS = [ 

67 HexString( 

68 """ 

69 // "!<arch>\\n", 58 chars of whatever, then the ARFMAG 

70 21 3C 61 72 63 68 3E 0A [58] 60 0A 

71 """ 

72 ) 

73 ] 

74 

75 EXTRACTOR = ArExtractor() 

76 

77 DOC = HandlerDoc( 

78 name="AR", 

79 description="Unix AR (archive) files are used to store multiple files in a single archive with a simple header format.", 

80 handler_type=HandlerType.ARCHIVE, 

81 vendor=None, 

82 references=[ 

83 Reference( 

84 title="Unix AR File Format Documentation", 

85 url="https://en.wikipedia.org/wiki/Ar_(Unix)", 

86 ) 

87 ], 

88 limitations=[], 

89 ) 

90 

91 def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]: 

92 offset_file = OffsetFile(file, start_offset) 

93 ar = arpy.Archive(fileobj=offset_file) # type: ignore 

94 

95 try: 

96 ar.read_all_headers() 

97 except arpy.ArchiveFormatError as exc: 

98 logger.debug( 

99 "Hit an ArchiveFormatError, we've probably hit some other kind of data", 

100 exc_info=exc, 

101 ) 

102 

103 # wind the cursor back the whole header length to check if we failed on 

104 # the first match, which means malformed AR archive 

105 ar.file.seek(-HEADER_LENGTH, os.SEEK_CUR) 

106 # we check if we failed on the first match 

107 if start_offset == file.tell(): 

108 return None 

109 # otherwise we seek past the signature (failure on malformed AR archive 

110 # within the whole file, not at the start) 

111 ar.file.seek(SIGNATURE_LENGTH, os.SEEK_CUR) 

112 

113 return ValidChunk( 

114 start_offset=start_offset, 

115 end_offset=file.tell(), 

116 )