1from structlog import get_logger
2
3from unblob.extractors.command import Command
4
5from ...models import (
6 File,
7 HandlerDoc,
8 HandlerType,
9 HexString,
10 Reference,
11 StructHandler,
12 ValidChunk,
13)
14
15logger = get_logger()
16
17END_HEADER = b"\x1a\x00"
18
19
20class ARCHandler(StructHandler):
21 NAME = "arc"
22
23 PATTERNS = [
24 HexString(
25 """
26 // Each entry in an archive begins with a one byte archive marker set to 0x1A.
27 // The marker is followed by a one byte header type code, from 0x0 to 0x7.
28 // Then a null-byte or uninitialized-byte terminated filename string of 13 bytes, the
29 // uninitialized byte is always set between 0xf0 and 0xff.
30 1A (01 | 02 | 03 | 04 | 05 | 06 | 07) [12] (00 | F0 | F1 | F2 | F3 | F4 | F5 | F6 | F7 | F8 | F9 | FA | FB | FC | FD | FE | FF)
31 """
32 )
33 ]
34
35 C_DEFINITIONS = r"""
36 typedef struct arc_head { /* archive entry header format */
37 int8 archive_marker;
38 int8 header_type;
39 char name[13]; /* file name */
40 ulong size; /* size of file, in bytes */
41 ushort date; /* creation date */
42 ushort time; /* creation time */
43 short crc; /* cyclic redundancy check */
44 ulong length; /* true file length */
45 } arc_head_t;
46 """
47
48 HEADER_STRUCT = "arc_head_t"
49 EXTRACTOR = Command("unar", "-no-directory", "-o", "{outdir}", "{inpath}")
50
51 DOC = HandlerDoc(
52 name="ARC",
53 description="ARC is a legacy archive format used to store multiple files with metadata such as file size, creation date, and CRC.",
54 handler_type=HandlerType.ARCHIVE,
55 vendor=None,
56 references=[
57 Reference(
58 title="ARC File Format Documentation",
59 url="https://en.wikipedia.org/wiki/ARC_(file_format)",
60 )
61 ],
62 limitations=[],
63 )
64
65 def valid_name(self, name: bytes) -> bool:
66 try:
67 # we return False if the name is made out of an array of null bytes
68 # or if name starts with null.
69 return bool(
70 not name.startswith(b"\x00")
71 and name[:-1].strip(b"\x00").decode("utf-8")
72 )
73 except UnicodeDecodeError:
74 return False
75
76 def valid_header(self, header) -> bool:
77 if header.archive_marker != 0x1A:
78 return False
79 if header.header_type > 0x07:
80 return False
81 if not self.valid_name(header.name): # noqa: SIM103
82 return False
83 return True
84
85 def calculate_chunk(self, file: File, start_offset: int) -> ValidChunk | None:
86 # we loop from header to header until we reach the end header
87 offset = start_offset
88 while True:
89 file.seek(offset)
90 read_bytes = file.read(2)
91
92 if read_bytes == END_HEADER:
93 offset += 2
94 break
95 file.seek(offset)
96 header = self.parse_header(file)
97 if not self.valid_header(header):
98 return None
99
100 offset += len(header) + header.size
101
102 return ValidChunk(
103 start_offset=start_offset,
104 end_offset=offset,
105 )