/src/serenity/Userland/Libraries/LibArchive/Zip.cpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org> |
3 | | * Copyright (c) 2022-2025, the SerenityOS developers. |
4 | | * |
5 | | * SPDX-License-Identifier: BSD-2-Clause |
6 | | */ |
7 | | |
8 | | #include <LibArchive/Zip.h> |
9 | | #include <LibCompress/Deflate.h> |
10 | | #include <LibCrypto/Checksum/CRC32.h> |
11 | | |
12 | | namespace Archive { |
13 | | |
14 | | bool Zip::find_end_of_central_directory_offset(ReadonlyBytes buffer, size_t& offset) |
15 | 829 | { |
16 | 842k | for (size_t backwards_offset = 0; backwards_offset <= UINT16_MAX; backwards_offset++) // the file may have a trailing comment of an arbitrary 16 bit length |
17 | 842k | { |
18 | 842k | if (buffer.size() < (sizeof(EndOfCentralDirectory) - sizeof(u8*)) + backwards_offset) |
19 | 45 | return false; |
20 | | |
21 | 842k | auto const signature_offset = (buffer.size() - (sizeof(EndOfCentralDirectory) - sizeof(u8*)) - backwards_offset); |
22 | 842k | if (auto signature = ReadonlyBytes { buffer.data() + signature_offset, EndOfCentralDirectory::signature.size() }; |
23 | 842k | signature == EndOfCentralDirectory::signature) { |
24 | 777 | offset = signature_offset; |
25 | 777 | return true; |
26 | 777 | } |
27 | 842k | } |
28 | 7 | return false; |
29 | 829 | } |
30 | | |
31 | | Optional<Zip> Zip::try_create(ReadonlyBytes buffer) |
32 | 829 | { |
33 | 829 | size_t end_of_central_directory_offset; |
34 | 829 | if (!find_end_of_central_directory_offset(buffer, end_of_central_directory_offset)) |
35 | 52 | return {}; |
36 | | |
37 | 777 | EndOfCentralDirectory end_of_central_directory {}; |
38 | 777 | if (!end_of_central_directory.read(buffer.slice(end_of_central_directory_offset))) |
39 | 42 | return {}; |
40 | | |
41 | 735 | if (end_of_central_directory.disk_number != 0 || end_of_central_directory.central_directory_start_disk != 0 || end_of_central_directory.disk_records_count != end_of_central_directory.total_records_count) |
42 | 67 | return {}; // TODO: support multi-volume zip archives |
43 | | |
44 | 668 | size_t member_offset = end_of_central_directory.central_directory_offset; |
45 | 4.30k | for (size_t i = 0; i < end_of_central_directory.total_records_count; i++) { |
46 | 3.90k | CentralDirectoryRecord central_directory_record {}; |
47 | 3.90k | if (member_offset > buffer.size()) |
48 | 46 | return {}; |
49 | 3.85k | if (!central_directory_record.read(buffer.slice(member_offset))) |
50 | 41 | return {}; |
51 | 3.81k | if (central_directory_record.general_purpose_flags.encrypted) |
52 | 1 | return {}; // TODO: support encrypted zip members |
53 | 3.81k | if (central_directory_record.general_purpose_flags.data_descriptor) |
54 | 1 | return {}; // TODO: support zip data descriptors |
55 | 3.81k | if (central_directory_record.compression_method != ZipCompressionMethod::Store && central_directory_record.compression_method != ZipCompressionMethod::Deflate) |
56 | 21 | return {}; // TODO: support obsolete zip compression methods |
57 | 3.79k | if (central_directory_record.compression_method == ZipCompressionMethod::Store && central_directory_record.uncompressed_size != central_directory_record.compressed_size) |
58 | 39 | return {}; |
59 | 3.75k | if (central_directory_record.start_disk != 0) |
60 | 18 | return {}; // TODO: support multi-volume zip archives |
61 | 3.73k | if (memchr(central_directory_record.name, 0, central_directory_record.name_length) != nullptr) |
62 | 2 | return {}; |
63 | 3.73k | LocalFileHeader local_file_header {}; |
64 | 3.73k | if (central_directory_record.local_file_header_offset > buffer.size()) |
65 | 28 | return {}; |
66 | 3.70k | if (!local_file_header.read(buffer.slice(central_directory_record.local_file_header_offset))) |
67 | 41 | return {}; |
68 | 3.66k | if (buffer.size() - (local_file_header.compressed_data - buffer.data()) < central_directory_record.compressed_size) |
69 | 33 | return {}; |
70 | 3.63k | member_offset += central_directory_record.size(); |
71 | 3.63k | } |
72 | | |
73 | 397 | return Zip { |
74 | 397 | end_of_central_directory.total_records_count, |
75 | 397 | end_of_central_directory.central_directory_offset, |
76 | 397 | buffer, |
77 | 397 | }; |
78 | 668 | } |
79 | | |
80 | | ErrorOr<bool> Zip::for_each_member(Function<ErrorOr<IterationDecision>(ZipMember const&)> callback) const |
81 | 397 | { |
82 | 397 | size_t member_offset = m_members_start_offset; |
83 | 3.57k | for (size_t i = 0; i < m_member_count; i++) { |
84 | 3.28k | CentralDirectoryRecord central_directory_record {}; |
85 | 3.28k | VERIFY(central_directory_record.read(m_input_data.slice(member_offset))); |
86 | 3.28k | LocalFileHeader local_file_header {}; |
87 | 3.28k | VERIFY(local_file_header.read(m_input_data.slice(central_directory_record.local_file_header_offset))); |
88 | | |
89 | 3.28k | ZipMember member; |
90 | 3.28k | member.name = TRY(String::from_utf8({ central_directory_record.name, central_directory_record.name_length })); |
91 | 3.17k | member.compressed_data = { local_file_header.compressed_data, central_directory_record.compressed_size }; |
92 | 3.17k | member.compression_method = central_directory_record.compression_method; |
93 | 3.17k | member.uncompressed_size = central_directory_record.uncompressed_size; |
94 | 3.17k | member.crc32 = central_directory_record.crc32; |
95 | 3.17k | member.modification_time = central_directory_record.modification_time; |
96 | 3.17k | member.modification_date = central_directory_record.modification_date; |
97 | 3.17k | member.is_directory = central_directory_record.external_attributes.msdos & zip_directory_msdos_attribute || member.name.bytes_as_string_view().ends_with('/'); // FIXME: better directory detection |
98 | 3.17k | if (central_directory_record.made_by_version.made_by == ZipMadeBy::Unix) { |
99 | 163 | member.mode = static_cast<mode_t>(central_directory_record.external_attributes.unix); |
100 | 163 | } |
101 | | |
102 | 6.35k | if (TRY(callback(member)) == IterationDecision::Break) |
103 | 0 | return false; |
104 | | |
105 | 3.17k | member_offset += central_directory_record.size(); |
106 | 3.17k | } |
107 | 289 | return true; |
108 | 397 | } |
109 | | |
110 | | ErrorOr<Statistics> Zip::calculate_statistics() const |
111 | 0 | { |
112 | 0 | size_t file_count = 0; |
113 | 0 | size_t directory_count = 0; |
114 | 0 | size_t uncompressed_bytes = 0; |
115 | |
|
116 | 0 | TRY(for_each_member([&](auto zip_member) -> ErrorOr<IterationDecision> { |
117 | 0 | if (zip_member.is_directory) |
118 | 0 | directory_count++; |
119 | 0 | else |
120 | 0 | file_count++; |
121 | 0 | uncompressed_bytes += zip_member.uncompressed_size; |
122 | 0 | return IterationDecision::Continue; |
123 | 0 | })); |
124 | |
|
125 | 0 | return Statistics(file_count, directory_count, uncompressed_bytes); |
126 | 0 | } |
127 | | |
128 | | ZipOutputStream::ZipOutputStream(NonnullOwnPtr<Stream> stream) |
129 | 0 | : m_stream(move(stream)) |
130 | 0 | { |
131 | 0 | } |
132 | | |
133 | | static u16 minimum_version_needed(ZipCompressionMethod method) |
134 | 0 | { |
135 | | // Deflate was added in PKZip 2.0 |
136 | 0 | return method == ZipCompressionMethod::Deflate ? 20 : 10; |
137 | 0 | } |
138 | | |
139 | | ErrorOr<void> ZipOutputStream::add_member(ZipMember const& member) |
140 | 0 | { |
141 | 0 | VERIFY(!m_finished); |
142 | 0 | VERIFY(member.name.bytes_as_string_view().length() <= UINT16_MAX); |
143 | 0 | VERIFY(member.compressed_data.size() <= UINT32_MAX); |
144 | 0 | TRY(m_members.try_append(member)); |
145 | |
|
146 | 0 | LocalFileHeader local_file_header { |
147 | 0 | .minimum_version = minimum_version_needed(member.compression_method), |
148 | 0 | .general_purpose_flags = { .flags = 0 }, |
149 | 0 | .compression_method = static_cast<u16>(member.compression_method), |
150 | 0 | .modification_time = member.modification_time, |
151 | 0 | .modification_date = member.modification_date, |
152 | 0 | .crc32 = member.crc32, |
153 | 0 | .compressed_size = static_cast<u32>(member.compressed_data.size()), |
154 | 0 | .uncompressed_size = member.uncompressed_size, |
155 | 0 | .name_length = static_cast<u16>(member.name.bytes_as_string_view().length()), |
156 | 0 | .extra_data_length = 0, |
157 | 0 | .name = reinterpret_cast<u8 const*>(member.name.bytes_as_string_view().characters_without_null_termination()), |
158 | 0 | .extra_data = nullptr, |
159 | 0 | .compressed_data = member.compressed_data.data(), |
160 | 0 | }; |
161 | 0 | return local_file_header.write(*m_stream); |
162 | 0 | } |
163 | | |
164 | | ErrorOr<ZipOutputStream::MemberInformation> ZipOutputStream::add_member_from_stream(StringView path, Stream& stream, Optional<Core::DateTime> const& modification_time, Optional<mode_t> mode) |
165 | 0 | { |
166 | 0 | auto buffer = TRY(stream.read_until_eof()); |
167 | |
|
168 | 0 | Archive::ZipMember member {}; |
169 | 0 | member.name = TRY(String::from_utf8(path)); |
170 | |
|
171 | 0 | if (modification_time.has_value()) { |
172 | 0 | member.modification_date = to_packed_dos_date(modification_time->year(), modification_time->month(), modification_time->day()); |
173 | 0 | member.modification_time = to_packed_dos_time(modification_time->hour(), modification_time->minute(), modification_time->second()); |
174 | 0 | } |
175 | |
|
176 | 0 | auto deflate_buffer = Compress::DeflateCompressor::compress_all(buffer); |
177 | 0 | auto compression_ratio = 1.f; |
178 | 0 | auto compressed_size = buffer.size(); |
179 | |
|
180 | 0 | if (!deflate_buffer.is_error() && deflate_buffer.value().size() < buffer.size()) { |
181 | 0 | member.compressed_data = deflate_buffer.value().bytes(); |
182 | 0 | member.compression_method = Archive::ZipCompressionMethod::Deflate; |
183 | |
|
184 | 0 | compression_ratio = static_cast<float>(deflate_buffer.value().size()) / static_cast<float>(buffer.size()); |
185 | 0 | compressed_size = member.compressed_data.size(); |
186 | 0 | } else { |
187 | 0 | member.compressed_data = buffer.bytes(); |
188 | 0 | member.compression_method = Archive::ZipCompressionMethod::Store; |
189 | 0 | } |
190 | |
|
191 | 0 | member.uncompressed_size = buffer.size(); |
192 | |
|
193 | 0 | Crypto::Checksum::CRC32 checksum { buffer.bytes() }; |
194 | 0 | member.crc32 = checksum.digest(); |
195 | 0 | member.is_directory = false; |
196 | 0 | member.mode = mode; |
197 | |
|
198 | 0 | TRY(add_member(member)); |
199 | |
|
200 | 0 | return MemberInformation { compression_ratio, compressed_size }; |
201 | 0 | } |
202 | | |
203 | | ErrorOr<void> ZipOutputStream::add_directory(StringView name, Optional<Core::DateTime> const& modification_time, Optional<mode_t> mode) |
204 | 0 | { |
205 | 0 | Archive::ZipMember member {}; |
206 | 0 | member.name = TRY(String::from_utf8(name)); |
207 | 0 | member.compressed_data = {}; |
208 | 0 | member.compression_method = Archive::ZipCompressionMethod::Store; |
209 | 0 | member.uncompressed_size = 0; |
210 | 0 | member.crc32 = 0; |
211 | 0 | member.is_directory = true; |
212 | 0 | member.mode = mode; |
213 | |
|
214 | 0 | if (modification_time.has_value()) { |
215 | 0 | member.modification_date = to_packed_dos_date(modification_time->year(), modification_time->month(), modification_time->day()); |
216 | 0 | member.modification_time = to_packed_dos_time(modification_time->hour(), modification_time->minute(), modification_time->second()); |
217 | 0 | } |
218 | |
|
219 | 0 | return add_member(member); |
220 | 0 | } |
221 | | |
222 | | ErrorOr<void> ZipOutputStream::finish() |
223 | 0 | { |
224 | 0 | VERIFY(!m_finished); |
225 | 0 | m_finished = true; |
226 | |
|
227 | 0 | auto file_header_offset = 0u; |
228 | 0 | auto central_directory_size = 0u; |
229 | 0 | for (ZipMember const& member : m_members) { |
230 | 0 | auto zip_version = minimum_version_needed(member.compression_method); |
231 | 0 | CentralDirectoryRecord central_directory_record { |
232 | 0 | .made_by_version = { .version = static_cast<u8>(zip_version), .made_by = ZipMadeBy::Unix }, |
233 | 0 | .minimum_version = zip_version, |
234 | 0 | .general_purpose_flags = { .flags = 0 }, |
235 | 0 | .compression_method = member.compression_method, |
236 | 0 | .modification_time = member.modification_time, |
237 | 0 | .modification_date = member.modification_date, |
238 | 0 | .crc32 = member.crc32, |
239 | 0 | .compressed_size = static_cast<u32>(member.compressed_data.size()), |
240 | 0 | .uncompressed_size = member.uncompressed_size, |
241 | 0 | .name_length = static_cast<u16>(member.name.bytes_as_string_view().length()), |
242 | 0 | .extra_data_length = 0, |
243 | 0 | .comment_length = 0, |
244 | 0 | .start_disk = 0, |
245 | 0 | .internal_attributes = 0, |
246 | 0 | .external_attributes = { |
247 | 0 | .msdos = static_cast<u16>(member.is_directory ? zip_directory_msdos_attribute : 0), |
248 | 0 | .unix = static_cast<u16>(member.mode.value_or(0)), |
249 | 0 | }, |
250 | 0 | .local_file_header_offset = file_header_offset, // FIXME: we assume the wrapped output stream was never written to before us |
251 | 0 | .name = reinterpret_cast<u8 const*>(member.name.bytes_as_string_view().characters_without_null_termination()), |
252 | 0 | .extra_data = nullptr, |
253 | 0 | .comment = nullptr, |
254 | 0 | }; |
255 | 0 | file_header_offset += sizeof(LocalFileHeader::signature) + (sizeof(LocalFileHeader) - (sizeof(u8*) * 3)) + member.name.bytes_as_string_view().length() + member.compressed_data.size(); |
256 | 0 | TRY(central_directory_record.write(*m_stream)); |
257 | 0 | central_directory_size += central_directory_record.size(); |
258 | 0 | } |
259 | |
|
260 | 0 | EndOfCentralDirectory end_of_central_directory { |
261 | 0 | .disk_number = 0, |
262 | 0 | .central_directory_start_disk = 0, |
263 | 0 | .disk_records_count = static_cast<u16>(m_members.size()), |
264 | 0 | .total_records_count = static_cast<u16>(m_members.size()), |
265 | 0 | .central_directory_size = central_directory_size, |
266 | 0 | .central_directory_offset = file_header_offset, |
267 | 0 | .comment_length = 0, |
268 | 0 | .comment = nullptr, |
269 | 0 | }; |
270 | 0 | return end_of_central_directory.write(*m_stream); |
271 | 0 | } |
272 | | |
273 | | } |