1from typing import Optional
2
3from structlog import get_logger
4
5from unblob.extractors.command import Command
6
7from ...models import (
8 File,
9 HandlerDoc,
10 HandlerType,
11 HexString,
12 Reference,
13 StructHandler,
14 ValidChunk,
15)
16
17logger = get_logger()
18
19END_HEADER = b"\x1a\x00"
20
21
22class ARCHandler(StructHandler):
23 NAME = "arc"
24
25 PATTERNS = [
26 HexString(
27 """
28 // Each entry in an archive begins with a one byte archive marker set to 0x1A.
29 // The marker is followed by a one byte header type code, from 0x0 to 0x7.
30 // Then a null-byte or uninitialized-byte terminated filename string of 13 bytes, the
31 // uninitialized byte is always set between 0xf0 and 0xff.
32 1A (01 | 02 | 03 | 04 | 05 | 06 | 07) [12] (00 | F0 | F1 | F2 | F3 | F4 | F5 | F6 | F7 | F8 | F9 | FA | FB | FC | FD | FE | FF)
33 """
34 )
35 ]
36
37 C_DEFINITIONS = r"""
38 typedef struct arc_head { /* archive entry header format */
39 int8 archive_marker;
40 int8 header_type;
41 char name[13]; /* file name */
42 ulong size; /* size of file, in bytes */
43 ushort date; /* creation date */
44 ushort time; /* creation time */
45 short crc; /* cyclic redundancy check */
46 ulong length; /* true file length */
47 } arc_head_t;
48 """
49
50 HEADER_STRUCT = "arc_head_t"
51 EXTRACTOR = Command("unar", "-no-directory", "-o", "{outdir}", "{inpath}")
52
53 DOC = HandlerDoc(
54 name="ARC",
55 description="ARC is a legacy archive format used to store multiple files with metadata such as file size, creation date, and CRC.",
56 handler_type=HandlerType.ARCHIVE,
57 vendor=None,
58 references=[
59 Reference(
60 title="ARC File Format Documentation",
61 url="https://en.wikipedia.org/wiki/ARC_(file_format)",
62 )
63 ],
64 limitations=[],
65 )
66
67 def valid_name(self, name: bytes) -> bool:
68 try:
69 # we return False if the name is made out of an array of null bytes
70 # or if name starts with null.
71 return bool(
72 not name.startswith(b"\x00")
73 and name[:-1].strip(b"\x00").decode("utf-8")
74 )
75 except UnicodeDecodeError:
76 return False
77
78 def valid_header(self, header) -> bool:
79 if header.archive_marker != 0x1A:
80 return False
81 if header.header_type > 0x07:
82 return False
83 if not self.valid_name(header.name): # noqa: SIM103
84 return False
85 return True
86
87 def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
88 # we loop from header to header until we reach the end header
89 offset = start_offset
90 while True:
91 file.seek(offset)
92 read_bytes = file.read(2)
93
94 if read_bytes == END_HEADER:
95 offset += 2
96 break
97 file.seek(offset)
98 header = self.parse_header(file)
99 if not self.valid_header(header):
100 return None
101
102 offset += len(header) + header.size
103
104 return ValidChunk(
105 start_offset=start_offset,
106 end_offset=offset,
107 )