/src/duckdb/third_party/miniz/miniz_wrapper.hpp
Line | Count | Source |
1 | | //===----------------------------------------------------------------------===// |
2 | | // DuckDB |
3 | | // |
4 | | // miniz_wrapper.hpp |
5 | | // |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #pragma once |
10 | | |
11 | | #include "miniz.hpp" |
12 | | #include <string> |
13 | | #include <stdexcept> |
14 | | |
15 | | namespace duckdb { |
16 | | |
17 | | enum class MiniZStreamType { MINIZ_TYPE_NONE, MINIZ_TYPE_INFLATE, MINIZ_TYPE_DEFLATE }; |
18 | | |
19 | | struct MiniZStream { |
20 | | static constexpr uint8_t GZIP_HEADER_MINSIZE = 10; |
21 | | static constexpr uint8_t GZIP_FOOTER_SIZE = 8; |
22 | | static constexpr uint8_t GZIP_COMPRESSION_DEFLATE = 0x08; |
23 | | static constexpr unsigned char GZIP_FLAG_UNSUPPORTED = 0x1 | 0x2 | 0x4 | 0x10 | 0x20; |
24 | | |
25 | | public: |
26 | 0 | MiniZStream() : type(MiniZStreamType::MINIZ_TYPE_NONE) { |
27 | 0 | ResetStreamInternal(); |
28 | 0 | } |
29 | | |
30 | 0 | ~MiniZStream() { |
31 | 0 | switch (type) { |
32 | 0 | case MiniZStreamType::MINIZ_TYPE_INFLATE: |
33 | 0 | duckdb_miniz::mz_inflateEnd(&stream); |
34 | 0 | break; |
35 | 0 | case MiniZStreamType::MINIZ_TYPE_DEFLATE: |
36 | 0 | duckdb_miniz::mz_deflateEnd(&stream); |
37 | 0 | break; |
38 | 0 | default: |
39 | 0 | break; |
40 | 0 | } |
41 | 0 | } |
42 | | |
43 | 0 | void FormatException(const std::string &error_msg) { |
44 | 0 | throw std::runtime_error(error_msg); |
45 | 0 | } |
46 | | |
47 | 0 | void FormatException(const char *error_msg, int mz_ret) { |
48 | 0 | auto err = duckdb_miniz::mz_error(mz_ret); |
49 | 0 | FormatException(error_msg + std::string(": ") + (err ? err : "Unknown error code")); |
50 | 0 | } |
51 | | |
52 | 0 | void Decompress(const char *compressed_data, size_t compressed_size, char *out_data, size_t out_size) { |
53 | 0 | type = MiniZStreamType::MINIZ_TYPE_INFLATE; |
54 | | |
55 | | // Loop over blocks |
56 | 0 | while (compressed_size > 0) { |
57 | | // Read block header |
58 | 0 | if (compressed_size < GZIP_HEADER_MINSIZE) { |
59 | 0 | FormatException("Failed to decompress GZIP block: compressed size is less than gzip header size"); |
60 | 0 | } |
61 | 0 | auto gzip_hdr = reinterpret_cast<const unsigned char *>(compressed_data); |
62 | 0 | if (gzip_hdr[0] != 0x1F || gzip_hdr[1] != 0x8B || gzip_hdr[2] != GZIP_COMPRESSION_DEFLATE || |
63 | 0 | gzip_hdr[3] & GZIP_FLAG_UNSUPPORTED) { |
64 | 0 | FormatException("Input is invalid/unsupported GZIP stream"); |
65 | 0 | } |
66 | 0 | compressed_data += GZIP_HEADER_MINSIZE; |
67 | 0 | compressed_size -= GZIP_HEADER_MINSIZE; |
68 | | |
69 | | // Initialize stream |
70 | 0 | auto mz_ret = mz_inflateInit2(&stream, -MZ_DEFAULT_WINDOW_BITS); |
71 | 0 | if (mz_ret != duckdb_miniz::MZ_OK) { |
72 | 0 | FormatException("Failed to initialize miniz", mz_ret); |
73 | 0 | } |
74 | | |
75 | | // Set up in/out |
76 | 0 | stream.next_in = reinterpret_cast<const unsigned char *>(compressed_data); |
77 | 0 | stream.avail_in = static_cast<unsigned int>(compressed_size); |
78 | 0 | stream.next_out = reinterpret_cast<unsigned char *>(out_data); |
79 | 0 | stream.avail_out = static_cast<unsigned int>(out_size); |
80 | | |
81 | | // Decompress and uninitialize stream |
82 | 0 | mz_ret = mz_inflate(&stream, duckdb_miniz::MZ_FINISH); |
83 | 0 | if (mz_ret != duckdb_miniz::MZ_OK && mz_ret != duckdb_miniz::MZ_STREAM_END) { |
84 | 0 | FormatException("Failed to decompress GZIP block", mz_ret); |
85 | 0 | } |
86 | 0 | mz_inflateEnd(&stream); |
87 | | |
88 | | // Update indices |
89 | 0 | compressed_data += GZIP_FOOTER_SIZE + stream.total_in; |
90 | 0 | compressed_size -= GZIP_FOOTER_SIZE + stream.total_in; |
91 | 0 | out_data += stream.total_out; |
92 | 0 | out_size -= stream.total_out; |
93 | |
|
94 | 0 | ResetStreamInternal(); |
95 | 0 | } |
96 | 0 | } |
97 | | |
98 | 0 | static size_t MaxCompressedLength(size_t input_size) { |
99 | 0 | return duckdb_miniz::mz_compressBound(input_size) + GZIP_HEADER_MINSIZE + GZIP_FOOTER_SIZE; |
100 | 0 | } |
101 | | |
102 | 0 | static void InitializeGZIPHeader(unsigned char *gzip_header) { |
103 | 0 | memset(gzip_header, 0, GZIP_HEADER_MINSIZE); |
104 | 0 | gzip_header[0] = 0x1F; |
105 | 0 | gzip_header[1] = 0x8B; |
106 | 0 | gzip_header[2] = GZIP_COMPRESSION_DEFLATE; |
107 | 0 | gzip_header[3] = 0; |
108 | 0 | gzip_header[4] = 0; |
109 | 0 | gzip_header[5] = 0; |
110 | 0 | gzip_header[6] = 0; |
111 | 0 | gzip_header[7] = 0; |
112 | 0 | gzip_header[8] = 0; |
113 | 0 | gzip_header[9] = 0xFF; |
114 | 0 | } |
115 | | |
116 | 0 | static void InitializeGZIPFooter(unsigned char *gzip_footer, duckdb_miniz::mz_ulong crc, idx_t uncompressed_size) { |
117 | 0 | gzip_footer[0] = crc & 0xFF; |
118 | 0 | gzip_footer[1] = (crc >> 8) & 0xFF; |
119 | 0 | gzip_footer[2] = (crc >> 16) & 0xFF; |
120 | 0 | gzip_footer[3] = (crc >> 24) & 0xFF; |
121 | 0 | gzip_footer[4] = uncompressed_size & 0xFF; |
122 | 0 | gzip_footer[5] = (uncompressed_size >> 8) & 0xFF; |
123 | 0 | gzip_footer[6] = (uncompressed_size >> 16) & 0xFF; |
124 | 0 | gzip_footer[7] = (uncompressed_size >> 24) & 0xFF; |
125 | 0 | } |
126 | | |
127 | 0 | void Compress(const char *uncompressed_data, size_t uncompressed_size, char *out_data, size_t *out_size) { |
128 | 0 | auto mz_ret = |
129 | 0 | mz_deflateInit2(&stream, duckdb_miniz::MZ_DEFAULT_LEVEL, MZ_DEFLATED, -MZ_DEFAULT_WINDOW_BITS, 1, 0); |
130 | 0 | if (mz_ret != duckdb_miniz::MZ_OK) { |
131 | 0 | FormatException("Failed to initialize miniz", mz_ret); |
132 | 0 | } |
133 | 0 | type = MiniZStreamType::MINIZ_TYPE_DEFLATE; |
134 | |
|
135 | 0 | auto gzip_header = reinterpret_cast<unsigned char *>(out_data); |
136 | 0 | InitializeGZIPHeader(gzip_header); |
137 | |
|
138 | 0 | auto gzip_body = gzip_header + GZIP_HEADER_MINSIZE; |
139 | |
|
140 | 0 | stream.next_in = reinterpret_cast<const unsigned char *>(uncompressed_data); |
141 | 0 | stream.avail_in = static_cast<unsigned int>(uncompressed_size); |
142 | 0 | stream.next_out = gzip_body; |
143 | 0 | stream.avail_out = static_cast<unsigned int>(*out_size - GZIP_HEADER_MINSIZE); |
144 | |
|
145 | 0 | mz_ret = mz_deflate(&stream, duckdb_miniz::MZ_FINISH); |
146 | 0 | if (mz_ret != duckdb_miniz::MZ_OK && mz_ret != duckdb_miniz::MZ_STREAM_END) { |
147 | 0 | FormatException("Failed to compress GZIP block", mz_ret); |
148 | 0 | } |
149 | 0 | auto gzip_footer = gzip_body + stream.total_out; |
150 | 0 | auto crc = duckdb_miniz::mz_crc32(MZ_CRC32_INIT, reinterpret_cast<const unsigned char *>(uncompressed_data), |
151 | 0 | uncompressed_size); |
152 | 0 | InitializeGZIPFooter(gzip_footer, crc, uncompressed_size); |
153 | |
|
154 | 0 | *out_size = stream.total_out + GZIP_HEADER_MINSIZE + GZIP_FOOTER_SIZE; |
155 | 0 | } |
156 | | |
157 | | private: |
158 | 0 | void ResetStreamInternal() { |
159 | 0 | memset(&stream, 0, sizeof(duckdb_miniz::mz_stream)); |
160 | 0 | } |
161 | | |
162 | | private: |
163 | | duckdb_miniz::mz_stream stream; |
164 | | MiniZStreamType type; |
165 | | }; |
166 | | |
167 | | } // namespace duckdb |