/src/serenity/Userland/Libraries/LibCompress/Gzip.cpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2020-2022, the SerenityOS developers. |
3 | | * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org> |
4 | | * |
5 | | * SPDX-License-Identifier: BSD-2-Clause |
6 | | */ |
7 | | |
8 | | #include <LibCompress/Gzip.h> |
9 | | |
10 | | #include <AK/BitStream.h> |
11 | | #include <AK/MemoryStream.h> |
12 | | #include <AK/String.h> |
13 | | #include <LibCore/DateTime.h> |
14 | | |
15 | | namespace Compress { |
16 | | |
17 | | bool GzipDecompressor::is_likely_compressed(ReadonlyBytes bytes) |
18 | 0 | { |
19 | 0 | return bytes.size() >= 2 && bytes[0] == gzip_magic_1 && bytes[1] == gzip_magic_2; |
20 | 0 | } |
21 | | |
22 | | bool BlockHeader::valid_magic_number() const |
23 | 47.9k | { |
24 | 47.9k | return identification_1 == gzip_magic_1 && identification_2 == gzip_magic_2; |
25 | 47.9k | } |
26 | | |
27 | | bool BlockHeader::supported_by_implementation() const |
28 | 47.8k | { |
29 | 47.8k | if (compression_method != 0x08) { |
30 | | // RFC 1952 does not define any compression methods other than deflate. |
31 | 10 | return false; |
32 | 10 | } |
33 | | |
34 | 47.8k | if (flags > Flags::MAX) { |
35 | | // RFC 1952 does not define any more flags. |
36 | 3 | return false; |
37 | 3 | } |
38 | | |
39 | 47.8k | return true; |
40 | 47.8k | } |
41 | | |
42 | | ErrorOr<NonnullOwnPtr<GzipDecompressor::Member>> GzipDecompressor::Member::construct(BlockHeader header, LittleEndianInputBitStream& stream) |
43 | 47.7k | { |
44 | 47.7k | auto deflate_stream = TRY(DeflateDecompressor::construct(MaybeOwned<LittleEndianInputBitStream>(stream))); |
45 | 47.7k | return TRY(adopt_nonnull_own_or_enomem(new (nothrow) Member(header, move(deflate_stream)))); |
46 | 47.7k | } |
47 | | |
48 | | GzipDecompressor::Member::Member(BlockHeader header, NonnullOwnPtr<DeflateDecompressor> stream) |
49 | 47.7k | : m_header(header) |
50 | 47.7k | , m_stream(move(stream)) |
51 | 47.7k | { |
52 | 47.7k | } |
53 | | |
54 | | GzipDecompressor::GzipDecompressor(MaybeOwned<Stream> stream) |
55 | 4.29k | : m_input_stream(make<LittleEndianInputBitStream>(move(stream))) |
56 | 4.29k | { |
57 | 4.29k | } |
58 | | |
59 | | GzipDecompressor::~GzipDecompressor() |
60 | 4.29k | { |
61 | 4.29k | m_current_member.clear(); |
62 | 4.29k | } |
63 | | |
64 | | ErrorOr<Bytes> GzipDecompressor::read_some(Bytes bytes) |
65 | 2.16M | { |
66 | 2.16M | size_t total_read = 0; |
67 | 4.42M | while (total_read < bytes.size()) { |
68 | 2.25M | if (is_eof()) |
69 | 2.47k | break; |
70 | | |
71 | 2.25M | auto slice = bytes.slice(total_read); |
72 | | |
73 | 2.25M | if (m_current_member) { |
74 | 2.20M | auto current_slice = TRY(current_member().m_stream->read_some(slice)); |
75 | 2.20M | current_member().m_checksum.update(current_slice); |
76 | 2.20M | current_member().m_nread += current_slice.size(); |
77 | | |
78 | 2.20M | if (current_slice.size() < slice.size()) { |
79 | 46.3k | u32 crc32 = TRY(m_input_stream->read_value<LittleEndian<u32>>()); |
80 | 46.2k | u32 input_size = TRY(m_input_stream->read_value<LittleEndian<u32>>()); |
81 | | |
82 | 46.1k | if (crc32 != current_member().m_checksum.digest()) |
83 | 48 | return Error::from_string_literal("Stored CRC32 does not match the calculated CRC32 of the current member"); |
84 | | |
85 | 46.1k | if (input_size != current_member().m_nread) |
86 | 55 | return Error::from_string_literal("Input size does not match the number of read bytes"); |
87 | | |
88 | 46.0k | m_current_member.clear(); |
89 | | |
90 | 46.0k | total_read += current_slice.size(); |
91 | 46.0k | continue; |
92 | 46.1k | } |
93 | | |
94 | 2.16M | total_read += current_slice.size(); |
95 | 2.16M | continue; |
96 | 2.20M | } else { |
97 | 47.9k | auto current_partial_header_slice = Bytes { m_partial_header, sizeof(BlockHeader) }.slice(m_partial_header_offset); |
98 | 47.9k | auto current_partial_header_data = TRY(m_input_stream->read_some(current_partial_header_slice)); |
99 | 47.9k | m_partial_header_offset += current_partial_header_data.size(); |
100 | | |
101 | 47.9k | if (is_eof()) |
102 | 39 | break; |
103 | | |
104 | 47.9k | if (m_partial_header_offset < sizeof(BlockHeader)) { |
105 | 0 | break; // partial header read |
106 | 0 | } |
107 | 47.9k | m_partial_header_offset = 0; |
108 | | |
109 | 47.9k | BlockHeader header = *(reinterpret_cast<BlockHeader*>(m_partial_header)); |
110 | | |
111 | 47.9k | if (!header.valid_magic_number()) |
112 | 27 | return Error::from_string_literal("Header does not have a valid magic number"); |
113 | | |
114 | 47.8k | if (!header.supported_by_implementation()) |
115 | 13 | return Error::from_string_literal("Header is not supported by implementation"); |
116 | | |
117 | 47.8k | if (header.flags & Flags::FEXTRA) { |
118 | 483 | u16 subfield_id = TRY(m_input_stream->read_value<LittleEndian<u16>>()); |
119 | 480 | u16 length = TRY(m_input_stream->read_value<LittleEndian<u16>>()); |
120 | 479 | TRY(m_input_stream->discard(length)); |
121 | 420 | (void)subfield_id; |
122 | 420 | } |
123 | | |
124 | 47.8k | auto discard_string = [&]() -> ErrorOr<void> { |
125 | 40.0k | char next_char; |
126 | 4.10M | do { |
127 | 4.10M | next_char = TRY(m_input_stream->read_value<char>()); |
128 | 4.10M | } while (next_char); |
129 | | |
130 | 40.0k | return {}; |
131 | 40.0k | }; |
132 | | |
133 | 47.8k | if (header.flags & Flags::FNAME) |
134 | 39.7k | TRY(discard_string()); |
135 | | |
136 | 47.8k | if (header.flags & Flags::FCOMMENT) |
137 | 260 | TRY(discard_string()); |
138 | | |
139 | 47.7k | if (header.flags & Flags::FHCRC) { |
140 | 510 | u16 crc = TRY(m_input_stream->read_value<LittleEndian<u16>>()); |
141 | | // FIXME: we should probably verify this instead of just assuming it matches |
142 | 502 | (void)crc; |
143 | 502 | } |
144 | | |
145 | 47.7k | m_current_member = TRY(Member::construct(header, *m_input_stream)); |
146 | 47.7k | continue; |
147 | 47.7k | } |
148 | 2.25M | } |
149 | 2.16M | return bytes.slice(0, total_read); |
150 | 2.16M | } |
151 | | |
152 | | ErrorOr<Optional<String>> GzipDecompressor::describe_header(ReadonlyBytes bytes) |
153 | 0 | { |
154 | 0 | if (bytes.size() < sizeof(BlockHeader)) |
155 | 0 | return OptionalNone {}; |
156 | | |
157 | 0 | auto& header = *(reinterpret_cast<BlockHeader const*>(bytes.data())); |
158 | 0 | if (!header.valid_magic_number() || !header.supported_by_implementation()) |
159 | 0 | return OptionalNone {}; |
160 | | |
161 | 0 | LittleEndian<u32> original_size = *reinterpret_cast<u32 const*>(bytes.offset(bytes.size() - sizeof(u32))); |
162 | 0 | return TRY(String::formatted("last modified: {}, original size {}", Core::DateTime::from_timestamp(header.modification_time), (u32)original_size)); |
163 | 0 | } |
164 | | |
165 | | ErrorOr<ByteBuffer> GzipDecompressor::decompress_all(ReadonlyBytes bytes) |
166 | 4.29k | { |
167 | 4.29k | auto memory_stream = TRY(try_make<FixedMemoryStream>(bytes)); |
168 | 4.29k | auto gzip_stream = make<GzipDecompressor>(move(memory_stream)); |
169 | 4.29k | AllocatingMemoryStream output_stream; |
170 | | |
171 | 4.29k | auto buffer = TRY(ByteBuffer::create_uninitialized(4096)); |
172 | 2.16M | while (!gzip_stream->is_eof()) { |
173 | 2.16M | auto const data = TRY(gzip_stream->read_some(buffer)); |
174 | 2.16M | TRY(output_stream.write_until_depleted(data)); |
175 | 2.16M | } |
176 | | |
177 | 4.29k | return output_stream.read_until_eof(); |
178 | 4.29k | } |
179 | | |
180 | 4.47M | bool GzipDecompressor::is_eof() const { return m_input_stream->is_eof(); } |
181 | | |
182 | | ErrorOr<size_t> GzipDecompressor::write_some(ReadonlyBytes) |
183 | 0 | { |
184 | 0 | return Error::from_errno(EBADF); |
185 | 0 | } |
186 | | |
187 | | GzipCompressor::GzipCompressor(MaybeOwned<Stream> stream) |
188 | 2.39k | : m_output_stream(move(stream)) |
189 | 2.39k | { |
190 | 2.39k | } |
191 | | |
192 | | ErrorOr<Bytes> GzipCompressor::read_some(Bytes) |
193 | 0 | { |
194 | 0 | return Error::from_errno(EBADF); |
195 | 0 | } |
196 | | |
197 | | ErrorOr<size_t> GzipCompressor::write_some(ReadonlyBytes bytes) |
198 | 2.39k | { |
199 | 2.39k | BlockHeader header; |
200 | 2.39k | header.identification_1 = 0x1f; |
201 | 2.39k | header.identification_2 = 0x8b; |
202 | 2.39k | header.compression_method = 0x08; |
203 | 2.39k | header.flags = 0; |
204 | 2.39k | header.modification_time = 0; |
205 | 2.39k | header.extra_flags = 3; // DEFLATE sets 2 for maximum compression and 4 for minimum compression |
206 | 2.39k | header.operating_system = 3; // unix |
207 | 2.39k | TRY(m_output_stream->write_until_depleted({ &header, sizeof(header) })); |
208 | 2.39k | auto compressed_stream = TRY(DeflateCompressor::construct(MaybeOwned(*m_output_stream))); |
209 | 2.39k | TRY(compressed_stream->write_until_depleted(bytes)); |
210 | 2.39k | TRY(compressed_stream->final_flush()); |
211 | 2.39k | Crypto::Checksum::CRC32 crc32; |
212 | 2.39k | crc32.update(bytes); |
213 | 2.39k | TRY(m_output_stream->write_value<LittleEndian<u32>>(crc32.digest())); |
214 | 2.39k | TRY(m_output_stream->write_value<LittleEndian<u32>>(bytes.size())); |
215 | 2.39k | return bytes.size(); |
216 | 2.39k | } |
217 | | |
218 | | bool GzipCompressor::is_eof() const |
219 | 0 | { |
220 | 0 | return true; |
221 | 0 | } |
222 | | |
223 | | bool GzipCompressor::is_open() const |
224 | 0 | { |
225 | 0 | return m_output_stream->is_open(); |
226 | 0 | } |
227 | | |
228 | | void GzipCompressor::close() |
229 | 0 | { |
230 | 0 | } |
231 | | |
232 | | ErrorOr<ByteBuffer> GzipCompressor::compress_all(ReadonlyBytes bytes) |
233 | 2.39k | { |
234 | 2.39k | auto output_stream = TRY(try_make<AllocatingMemoryStream>()); |
235 | 2.39k | GzipCompressor gzip_stream { MaybeOwned<Stream>(*output_stream) }; |
236 | | |
237 | 2.39k | TRY(gzip_stream.write_until_depleted(bytes)); |
238 | | |
239 | 2.39k | return output_stream->read_until_eof(); |
240 | 2.39k | } |
241 | | |
242 | | } |