/src/fluent-bit/src/flb_gzip.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | |
3 | | /* Fluent Bit |
4 | | * ========== |
5 | | * Copyright (C) 2015-2022 The Fluent Bit Authors |
6 | | * |
7 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | * you may not use this file except in compliance with the License. |
9 | | * You may obtain a copy of the License at |
10 | | * |
11 | | * http://www.apache.org/licenses/LICENSE-2.0 |
12 | | * |
13 | | * Unless required by applicable law or agreed to in writing, software |
14 | | * distributed under the License is distributed on an "AS IS" BASIS, |
15 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | | * See the License for the specific language governing permissions and |
17 | | * limitations under the License. |
18 | | */ |
19 | | |
20 | | #include <fluent-bit/flb_info.h> |
21 | | #include <fluent-bit/flb_mem.h> |
22 | | #include <fluent-bit/flb_log.h> |
23 | | #include <fluent-bit/flb_gzip.h> |
24 | | #include <miniz/miniz.h> |
25 | | |
26 | 0 | #define FLB_GZIP_HEADER_OFFSET 10 |
27 | | |
28 | | typedef enum { |
29 | | FTEXT = 1, |
30 | | FHCRC = 2, |
31 | | FEXTRA = 4, |
32 | | FNAME = 8, |
33 | | FCOMMENT = 16 |
34 | | } flb_tinf_gzip_flag; |
35 | | |
36 | | static unsigned int read_le16(const unsigned char *p) |
37 | 0 | { |
38 | 0 | return ((unsigned int) p[0]) | ((unsigned int) p[1] << 8); |
39 | 0 | } |
40 | | |
41 | | static unsigned int read_le32(const unsigned char *p) |
42 | 0 | { |
43 | 0 | return ((unsigned int) p[0]) |
44 | 0 | | ((unsigned int) p[1] << 8) |
45 | 0 | | ((unsigned int) p[2] << 16) |
46 | 0 | | ((unsigned int) p[3] << 24); |
47 | 0 | } |
48 | | |
49 | | static inline void gzip_header(void *buf) |
50 | 0 | { |
51 | 0 | uint8_t *p; |
52 | | |
53 | | /* GZip Magic bytes */ |
54 | 0 | p = buf; |
55 | 0 | *p++ = 0x1F; |
56 | 0 | *p++ = 0x8B; |
57 | 0 | *p++ = 8; |
58 | 0 | *p++ = 0; |
59 | 0 | *p++ = 0; |
60 | 0 | *p++ = 0; |
61 | 0 | *p++ = 0; |
62 | 0 | *p++ = 0; |
63 | 0 | *p++ = 0; |
64 | 0 | *p++ = 0xFF; |
65 | 0 | } |
66 | | |
67 | | int flb_gzip_compress(void *in_data, size_t in_len, |
68 | | void **out_data, size_t *out_len) |
69 | 0 | { |
70 | 0 | int flush; |
71 | 0 | int status; |
72 | 0 | int footer_start; |
73 | 0 | uint8_t *pb; |
74 | 0 | size_t out_size; |
75 | 0 | void *out_buf; |
76 | 0 | z_stream strm; |
77 | 0 | mz_ulong crc; |
78 | | |
79 | | /* |
80 | | * Calculating the upper bound for a gzip compression is |
81 | | * non-trivial, so we rely on miniz's own calculation |
82 | | * to guarantee memory safety. |
83 | | */ |
84 | 0 | out_size = compressBound(in_len); |
85 | 0 | out_buf = flb_malloc(out_size); |
86 | |
|
87 | 0 | if (!out_buf) { |
88 | 0 | flb_errno(); |
89 | 0 | flb_error("[gzip] could not allocate outgoing buffer"); |
90 | 0 | return -1; |
91 | 0 | } |
92 | | |
93 | | /* Initialize streaming buffer context */ |
94 | 0 | memset(&strm, '\0', sizeof(strm)); |
95 | 0 | strm.zalloc = Z_NULL; |
96 | 0 | strm.zfree = Z_NULL; |
97 | 0 | strm.opaque = Z_NULL; |
98 | 0 | strm.next_in = in_data; |
99 | 0 | strm.avail_in = in_len; |
100 | 0 | strm.total_out = 0; |
101 | | |
102 | | /* Deflate mode */ |
103 | 0 | deflateInit2(&strm, Z_DEFAULT_COMPRESSION, |
104 | 0 | Z_DEFLATED, -Z_DEFAULT_WINDOW_BITS, 9, Z_DEFAULT_STRATEGY); |
105 | | |
106 | | /* |
107 | | * Miniz don't support GZip format directly, instead we will: |
108 | | * |
109 | | * - append manual GZip magic bytes |
110 | | * - deflate raw content |
111 | | * - append manual CRC32 data |
112 | | */ |
113 | 0 | gzip_header(out_buf); |
114 | | |
115 | | /* Header offset */ |
116 | 0 | pb = (uint8_t *) out_buf + FLB_GZIP_HEADER_OFFSET; |
117 | |
|
118 | 0 | flush = Z_NO_FLUSH; |
119 | 0 | while (1) { |
120 | 0 | strm.next_out = pb + strm.total_out; |
121 | 0 | strm.avail_out = out_size - (pb - (uint8_t *) out_buf); |
122 | |
|
123 | 0 | if (strm.avail_in == 0) { |
124 | 0 | flush = Z_FINISH; |
125 | 0 | } |
126 | |
|
127 | 0 | status = deflate(&strm, flush); |
128 | 0 | if (status == Z_STREAM_END) { |
129 | 0 | break; |
130 | 0 | } |
131 | 0 | else if (status != Z_OK) { |
132 | 0 | deflateEnd(&strm); |
133 | 0 | return -1; |
134 | 0 | } |
135 | 0 | } |
136 | | |
137 | 0 | if (deflateEnd(&strm) != Z_OK) { |
138 | 0 | flb_free(out_buf); |
139 | 0 | return -1; |
140 | 0 | } |
141 | 0 | *out_len = strm.total_out; |
142 | | |
143 | | /* Construct the gzip checksum (CRC32 footer) */ |
144 | 0 | footer_start = FLB_GZIP_HEADER_OFFSET + *out_len; |
145 | 0 | pb = (uint8_t *) out_buf + footer_start; |
146 | |
|
147 | 0 | crc = mz_crc32(MZ_CRC32_INIT, in_data, in_len); |
148 | 0 | *pb++ = crc & 0xFF; |
149 | 0 | *pb++ = (crc >> 8) & 0xFF; |
150 | 0 | *pb++ = (crc >> 16) & 0xFF; |
151 | 0 | *pb++ = (crc >> 24) & 0xFF; |
152 | 0 | *pb++ = in_len & 0xFF; |
153 | 0 | *pb++ = (in_len >> 8) & 0xFF; |
154 | 0 | *pb++ = (in_len >> 16) & 0xFF; |
155 | 0 | *pb++ = (in_len >> 24) & 0xFF; |
156 | | |
157 | | /* Set the real buffer size for the caller */ |
158 | 0 | *out_len += FLB_GZIP_HEADER_OFFSET + 8; |
159 | 0 | *out_data = out_buf; |
160 | |
|
161 | 0 | return 0; |
162 | 0 | } |
163 | | |
164 | | /* Uncompress (inflate) GZip data */ |
165 | | int flb_gzip_uncompress(void *in_data, size_t in_len, |
166 | | void **out_data, size_t *out_len) |
167 | 0 | { |
168 | 0 | int status; |
169 | 0 | uint8_t *p; |
170 | 0 | void *out_buf; |
171 | 0 | size_t out_size = 0; |
172 | 0 | void *zip_data; |
173 | 0 | size_t zip_len; |
174 | 0 | unsigned char flg; |
175 | 0 | unsigned int xlen, hcrc; |
176 | 0 | unsigned int dlen, crc; |
177 | 0 | mz_ulong crc_out; |
178 | 0 | mz_stream stream; |
179 | 0 | const unsigned char *start; |
180 | | |
181 | | /* Minimal length: header + crc32 */ |
182 | 0 | if (in_len < 18) { |
183 | 0 | flb_error("[gzip] unexpected content length"); |
184 | 0 | return -1; |
185 | 0 | } |
186 | | |
187 | | /* Magic bytes */ |
188 | 0 | p = in_data; |
189 | 0 | if (p[0] != 0x1F || p[1] != 0x8B) { |
190 | 0 | flb_error("[gzip] invalid magic bytes"); |
191 | 0 | return -1; |
192 | 0 | } |
193 | | |
194 | 0 | if (p[2] != 8) { |
195 | 0 | flb_error("[gzip] invalid method"); |
196 | 0 | return -1; |
197 | 0 | } |
198 | | |
199 | | /* Flag byte */ |
200 | 0 | flg = p[3]; |
201 | | |
202 | | /* Reserved bits */ |
203 | 0 | if (flg & 0xE0) { |
204 | 0 | flb_error("[gzip] invalid flag"); |
205 | 0 | return -1; |
206 | 0 | } |
207 | | |
208 | | /* Skip base header of 10 bytes */ |
209 | 0 | start = p + FLB_GZIP_HEADER_OFFSET; |
210 | | |
211 | | /* Skip extra data if present */ |
212 | 0 | if (flg & FEXTRA) { |
213 | 0 | xlen = read_le16(start); |
214 | 0 | if (xlen > in_len - 12) { |
215 | 0 | flb_error("[gzip] invalid gzip data"); |
216 | 0 | return -1; |
217 | 0 | } |
218 | 0 | start += xlen + 2; |
219 | 0 | } |
220 | | |
221 | | /* Skip file name if present */ |
222 | 0 | if (flg & FNAME) { |
223 | 0 | do { |
224 | 0 | if (start - p >= in_len) { |
225 | 0 | flb_error("[gzip] invalid gzip data (FNAME)"); |
226 | 0 | return -1; |
227 | 0 | } |
228 | 0 | } while (*start++); |
229 | 0 | } |
230 | | |
231 | | /* Skip file comment if present */ |
232 | 0 | if (flg & FCOMMENT) { |
233 | 0 | do { |
234 | 0 | if (start - p >= in_len) { |
235 | 0 | flb_error("[gzip] invalid gzip data (FCOMMENT)"); |
236 | 0 | return -1; |
237 | 0 | } |
238 | 0 | } while (*start++); |
239 | 0 | } |
240 | | |
241 | | /* Check header crc if present */ |
242 | 0 | if (flg & FHCRC) { |
243 | 0 | if (start - p > in_len - 2) { |
244 | 0 | flb_error("[gzip] invalid gzip data (FHRC)"); |
245 | 0 | return -1; |
246 | 0 | } |
247 | | |
248 | 0 | hcrc = read_le16(start); |
249 | 0 | crc = mz_crc32(MZ_CRC32_INIT, p, start - p) & 0x0000FFFF; |
250 | 0 | if (hcrc != crc) { |
251 | 0 | flb_error("[gzip] invalid gzip header CRC"); |
252 | 0 | return -1; |
253 | 0 | } |
254 | 0 | start += 2; |
255 | 0 | } |
256 | | |
257 | | /* Get decompressed length */ |
258 | 0 | dlen = read_le32(&p[in_len - 4]); |
259 | | |
260 | | /* Limit decompressed length to 100MB */ |
261 | 0 | if (dlen > 100000000) { |
262 | 0 | flb_error("[gzip] maximum decompression size is 100MB"); |
263 | 0 | return -1; |
264 | 0 | } |
265 | | |
266 | | /* Get CRC32 checksum of original data */ |
267 | 0 | crc = read_le32(&p[in_len - 8]); |
268 | | |
269 | | /* Decompress data */ |
270 | 0 | if ((p + in_len) - p < 8) { |
271 | 0 | flb_error("[gzip] invalid gzip CRC32 checksum"); |
272 | 0 | return -1; |
273 | 0 | } |
274 | | |
275 | | /* Allocate outgoing buffer */ |
276 | 0 | out_buf = flb_malloc(dlen); |
277 | 0 | if (!out_buf) { |
278 | 0 | flb_errno(); |
279 | 0 | return -1; |
280 | 0 | } |
281 | 0 | out_size = dlen; |
282 | | |
283 | | /* Ensure size is above 0 */ |
284 | 0 | if (((p + in_len) - start - 8) <= 0) { |
285 | 0 | flb_free(out_buf); |
286 | 0 | return -1; |
287 | 0 | } |
288 | | |
289 | | /* Map zip content */ |
290 | 0 | zip_data = (uint8_t *) start; |
291 | 0 | zip_len = (p + in_len) - start - 8; |
292 | |
|
293 | 0 | memset(&stream, 0, sizeof(stream)); |
294 | 0 | stream.next_in = zip_data; |
295 | 0 | stream.avail_in = zip_len; |
296 | 0 | stream.next_out = out_buf; |
297 | 0 | stream.avail_out = out_size; |
298 | |
|
299 | 0 | status = mz_inflateInit2(&stream, -Z_DEFAULT_WINDOW_BITS); |
300 | 0 | if (status != MZ_OK) { |
301 | 0 | flb_free(out_buf); |
302 | 0 | return -1; |
303 | 0 | } |
304 | | |
305 | 0 | status = mz_inflate(&stream, MZ_FINISH); |
306 | 0 | if (status != MZ_STREAM_END) { |
307 | 0 | mz_inflateEnd(&stream); |
308 | 0 | flb_free(out_buf); |
309 | 0 | return -1; |
310 | 0 | } |
311 | | |
312 | 0 | if (stream.total_out != dlen) { |
313 | 0 | mz_inflateEnd(&stream); |
314 | 0 | flb_free(out_buf); |
315 | 0 | flb_error("[gzip] invalid gzip data size"); |
316 | 0 | return -1; |
317 | 0 | } |
318 | | |
319 | | /* terminate the stream, it's not longer required */ |
320 | 0 | mz_inflateEnd(&stream); |
321 | | |
322 | | /* Validate message CRC vs inflated data CRC */ |
323 | 0 | crc_out = mz_crc32(MZ_CRC32_INIT, out_buf, dlen); |
324 | 0 | if (crc_out != crc) { |
325 | 0 | flb_free(out_buf); |
326 | 0 | flb_error("[gzip] invalid GZip checksum (CRC32)"); |
327 | 0 | return -1; |
328 | 0 | } |
329 | | |
330 | | /* set the uncompressed data */ |
331 | 0 | *out_len = dlen; |
332 | 0 | *out_data = out_buf; |
333 | |
|
334 | 0 | return 0; |
335 | 0 | } |