/src/postgres/src/common/compression.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * compression.c |
4 | | * |
5 | | * Shared code for compression methods and specifications. |
6 | | * |
7 | | * A compression specification specifies the parameters that should be used |
8 | | * when performing compression with a specific algorithm. The simplest |
9 | | * possible compression specification is an integer, which sets the |
10 | | * compression level. |
11 | | * |
12 | | * Otherwise, a compression specification is a comma-separated list of items, |
13 | | * each having the form keyword or keyword=value. |
14 | | * |
15 | | * Currently, the supported keywords are "level", "long", and "workers". |
16 | | * |
17 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
18 | | * |
19 | | * IDENTIFICATION |
20 | | * src/common/compression.c |
21 | | *------------------------------------------------------------------------- |
22 | | */ |
23 | | |
24 | | #ifndef FRONTEND |
25 | | #include "postgres.h" |
26 | | #else |
27 | | #include "postgres_fe.h" |
28 | | #endif |
29 | | |
30 | | #ifdef USE_ZSTD |
31 | | #include <zstd.h> |
32 | | #endif |
33 | | #ifdef HAVE_LIBZ |
34 | | #include <zlib.h> |
35 | | #endif |
36 | | |
37 | | #include "common/compression.h" |
38 | | |
39 | | static int expect_integer_value(char *keyword, char *value, |
40 | | pg_compress_specification *result); |
41 | | static bool expect_boolean_value(char *keyword, char *value, |
42 | | pg_compress_specification *result); |
43 | | |
44 | | /* |
45 | | * Look up a compression algorithm by name. Returns true and sets *algorithm |
46 | | * if the name is recognized. Otherwise returns false. |
47 | | */ |
48 | | bool |
49 | | parse_compress_algorithm(char *name, pg_compress_algorithm *algorithm) |
50 | 0 | { |
51 | 0 | if (strcmp(name, "none") == 0) |
52 | 0 | *algorithm = PG_COMPRESSION_NONE; |
53 | 0 | else if (strcmp(name, "gzip") == 0) |
54 | 0 | *algorithm = PG_COMPRESSION_GZIP; |
55 | 0 | else if (strcmp(name, "lz4") == 0) |
56 | 0 | *algorithm = PG_COMPRESSION_LZ4; |
57 | 0 | else if (strcmp(name, "zstd") == 0) |
58 | 0 | *algorithm = PG_COMPRESSION_ZSTD; |
59 | 0 | else |
60 | 0 | return false; |
61 | 0 | return true; |
62 | 0 | } |
63 | | |
64 | | /* |
65 | | * Get the human-readable name corresponding to a particular compression |
66 | | * algorithm. |
67 | | */ |
68 | | const char * |
69 | | get_compress_algorithm_name(pg_compress_algorithm algorithm) |
70 | 0 | { |
71 | 0 | switch (algorithm) |
72 | 0 | { |
73 | 0 | case PG_COMPRESSION_NONE: |
74 | 0 | return "none"; |
75 | 0 | case PG_COMPRESSION_GZIP: |
76 | 0 | return "gzip"; |
77 | 0 | case PG_COMPRESSION_LZ4: |
78 | 0 | return "lz4"; |
79 | 0 | case PG_COMPRESSION_ZSTD: |
80 | 0 | return "zstd"; |
81 | | /* no default, to provoke compiler warnings if values are added */ |
82 | 0 | } |
83 | 0 | Assert(false); |
84 | 0 | return "???"; /* placate compiler */ |
85 | 0 | } |
86 | | |
87 | | /* |
88 | | * Parse a compression specification for a specified algorithm. |
89 | | * |
90 | | * See the file header comments for a brief description of what a compression |
91 | | * specification is expected to look like. |
92 | | * |
93 | | * On return, all fields of the result object will be initialized. |
94 | | * In particular, result->parse_error will be NULL if no errors occurred |
95 | | * during parsing, and will otherwise contain an appropriate error message. |
96 | | * The caller may free this error message string using pfree, if desired. |
97 | | * Note, however, even if there's no parse error, the string might not make |
98 | | * sense: e.g. for gzip, level=12 is not sensible, but it does parse OK. |
99 | | * |
100 | | * The compression level is assigned by default if not directly specified |
101 | | * by the specification. |
102 | | * |
103 | | * Use validate_compress_specification() to find out whether a compression |
104 | | * specification is semantically sensible. |
105 | | */ |
106 | | void |
107 | | parse_compress_specification(pg_compress_algorithm algorithm, char *specification, |
108 | | pg_compress_specification *result) |
109 | 0 | { |
110 | 0 | int bare_level; |
111 | 0 | char *bare_level_endp; |
112 | | |
113 | | /* Initial setup of result object. */ |
114 | 0 | result->algorithm = algorithm; |
115 | 0 | result->options = 0; |
116 | 0 | result->parse_error = NULL; |
117 | | |
118 | | /* |
119 | | * Assign a default level depending on the compression method. This may |
120 | | * be enforced later. |
121 | | */ |
122 | 0 | switch (result->algorithm) |
123 | 0 | { |
124 | 0 | case PG_COMPRESSION_NONE: |
125 | 0 | result->level = 0; |
126 | 0 | break; |
127 | 0 | case PG_COMPRESSION_LZ4: |
128 | | #ifdef USE_LZ4 |
129 | | result->level = 0; /* fast compression mode */ |
130 | | #else |
131 | 0 | result->parse_error = |
132 | 0 | psprintf(_("this build does not support compression with %s"), |
133 | 0 | "LZ4"); |
134 | 0 | #endif |
135 | 0 | break; |
136 | 0 | case PG_COMPRESSION_ZSTD: |
137 | | #ifdef USE_ZSTD |
138 | | result->level = ZSTD_CLEVEL_DEFAULT; |
139 | | #else |
140 | 0 | result->parse_error = |
141 | 0 | psprintf(_("this build does not support compression with %s"), |
142 | 0 | "ZSTD"); |
143 | 0 | #endif |
144 | 0 | break; |
145 | 0 | case PG_COMPRESSION_GZIP: |
146 | 0 | #ifdef HAVE_LIBZ |
147 | 0 | result->level = Z_DEFAULT_COMPRESSION; |
148 | | #else |
149 | | result->parse_error = |
150 | | psprintf(_("this build does not support compression with %s"), |
151 | | "gzip"); |
152 | | #endif |
153 | 0 | break; |
154 | 0 | } |
155 | | |
156 | | /* If there is no specification, we're done already. */ |
157 | 0 | if (specification == NULL) |
158 | 0 | return; |
159 | | |
160 | | /* As a special case, the specification can be a bare integer. */ |
161 | 0 | bare_level = strtol(specification, &bare_level_endp, 10); |
162 | 0 | if (specification != bare_level_endp && *bare_level_endp == '\0') |
163 | 0 | { |
164 | 0 | result->level = bare_level; |
165 | 0 | return; |
166 | 0 | } |
167 | | |
168 | | /* Look for comma-separated keyword or keyword=value entries. */ |
169 | 0 | while (1) |
170 | 0 | { |
171 | 0 | char *kwstart; |
172 | 0 | char *kwend; |
173 | 0 | char *vstart; |
174 | 0 | char *vend; |
175 | 0 | int kwlen; |
176 | 0 | int vlen; |
177 | 0 | bool has_value; |
178 | 0 | char *keyword; |
179 | 0 | char *value; |
180 | | |
181 | | /* Figure start, end, and length of next keyword and any value. */ |
182 | 0 | kwstart = kwend = specification; |
183 | 0 | while (*kwend != '\0' && *kwend != ',' && *kwend != '=') |
184 | 0 | ++kwend; |
185 | 0 | kwlen = kwend - kwstart; |
186 | 0 | if (*kwend != '=') |
187 | 0 | { |
188 | 0 | vstart = vend = NULL; |
189 | 0 | vlen = 0; |
190 | 0 | has_value = false; |
191 | 0 | } |
192 | 0 | else |
193 | 0 | { |
194 | 0 | vstart = vend = kwend + 1; |
195 | 0 | while (*vend != '\0' && *vend != ',') |
196 | 0 | ++vend; |
197 | 0 | vlen = vend - vstart; |
198 | 0 | has_value = true; |
199 | 0 | } |
200 | | |
201 | | /* Reject empty keyword. */ |
202 | 0 | if (kwlen == 0) |
203 | 0 | { |
204 | 0 | result->parse_error = |
205 | 0 | pstrdup(_("found empty string where a compression option was expected")); |
206 | 0 | break; |
207 | 0 | } |
208 | | |
209 | | /* Extract keyword and value as separate C strings. */ |
210 | 0 | keyword = palloc(kwlen + 1); |
211 | 0 | memcpy(keyword, kwstart, kwlen); |
212 | 0 | keyword[kwlen] = '\0'; |
213 | 0 | if (!has_value) |
214 | 0 | value = NULL; |
215 | 0 | else |
216 | 0 | { |
217 | 0 | value = palloc(vlen + 1); |
218 | 0 | memcpy(value, vstart, vlen); |
219 | 0 | value[vlen] = '\0'; |
220 | 0 | } |
221 | | |
222 | | /* Handle whatever keyword we found. */ |
223 | 0 | if (strcmp(keyword, "level") == 0) |
224 | 0 | { |
225 | 0 | result->level = expect_integer_value(keyword, value, result); |
226 | | |
227 | | /* |
228 | | * No need to set a flag in "options", there is a default level |
229 | | * set at least thanks to the logic above. |
230 | | */ |
231 | 0 | } |
232 | 0 | else if (strcmp(keyword, "workers") == 0) |
233 | 0 | { |
234 | 0 | result->workers = expect_integer_value(keyword, value, result); |
235 | 0 | result->options |= PG_COMPRESSION_OPTION_WORKERS; |
236 | 0 | } |
237 | 0 | else if (strcmp(keyword, "long") == 0) |
238 | 0 | { |
239 | 0 | result->long_distance = expect_boolean_value(keyword, value, result); |
240 | 0 | result->options |= PG_COMPRESSION_OPTION_LONG_DISTANCE; |
241 | 0 | } |
242 | 0 | else |
243 | 0 | result->parse_error = |
244 | 0 | psprintf(_("unrecognized compression option: \"%s\""), keyword); |
245 | | |
246 | | /* Release memory, just to be tidy. */ |
247 | 0 | pfree(keyword); |
248 | 0 | if (value != NULL) |
249 | 0 | pfree(value); |
250 | | |
251 | | /* |
252 | | * If we got an error or have reached the end of the string, stop. |
253 | | * |
254 | | * If there is no value, then the end of the keyword might have been |
255 | | * the end of the string. If there is a value, then the end of the |
256 | | * keyword cannot have been the end of the string, but the end of the |
257 | | * value might have been. |
258 | | */ |
259 | 0 | if (result->parse_error != NULL || |
260 | 0 | (vend == NULL ? *kwend == '\0' : *vend == '\0')) |
261 | 0 | break; |
262 | | |
263 | | /* Advance to next entry and loop around. */ |
264 | 0 | specification = vend == NULL ? kwend + 1 : vend + 1; |
265 | 0 | } |
266 | 0 | } |
267 | | |
268 | | /* |
269 | | * Parse 'value' as an integer and return the result. |
270 | | * |
271 | | * If parsing fails, set result->parse_error to an appropriate message |
272 | | * and return -1. |
273 | | */ |
274 | | static int |
275 | | expect_integer_value(char *keyword, char *value, pg_compress_specification *result) |
276 | 0 | { |
277 | 0 | int ivalue; |
278 | 0 | char *ivalue_endp; |
279 | |
|
280 | 0 | if (value == NULL) |
281 | 0 | { |
282 | 0 | result->parse_error = |
283 | 0 | psprintf(_("compression option \"%s\" requires a value"), |
284 | 0 | keyword); |
285 | 0 | return -1; |
286 | 0 | } |
287 | | |
288 | 0 | ivalue = strtol(value, &ivalue_endp, 10); |
289 | 0 | if (ivalue_endp == value || *ivalue_endp != '\0') |
290 | 0 | { |
291 | 0 | result->parse_error = |
292 | 0 | psprintf(_("value for compression option \"%s\" must be an integer"), |
293 | 0 | keyword); |
294 | 0 | return -1; |
295 | 0 | } |
296 | 0 | return ivalue; |
297 | 0 | } |
298 | | |
299 | | /* |
300 | | * Parse 'value' as a boolean and return the result. |
301 | | * |
302 | | * If parsing fails, set result->parse_error to an appropriate message |
303 | | * and return -1. The caller must check result->parse_error to determine if |
304 | | * the call was successful. |
305 | | * |
306 | | * Valid values are: yes, no, on, off, 1, 0. |
307 | | * |
308 | | * Inspired by ParseVariableBool(). |
309 | | */ |
310 | | static bool |
311 | | expect_boolean_value(char *keyword, char *value, pg_compress_specification *result) |
312 | 0 | { |
313 | 0 | if (value == NULL) |
314 | 0 | return true; |
315 | | |
316 | 0 | if (pg_strcasecmp(value, "yes") == 0) |
317 | 0 | return true; |
318 | 0 | if (pg_strcasecmp(value, "on") == 0) |
319 | 0 | return true; |
320 | 0 | if (pg_strcasecmp(value, "1") == 0) |
321 | 0 | return true; |
322 | | |
323 | 0 | if (pg_strcasecmp(value, "no") == 0) |
324 | 0 | return false; |
325 | 0 | if (pg_strcasecmp(value, "off") == 0) |
326 | 0 | return false; |
327 | 0 | if (pg_strcasecmp(value, "0") == 0) |
328 | 0 | return false; |
329 | | |
330 | 0 | result->parse_error = |
331 | 0 | psprintf(_("value for compression option \"%s\" must be a Boolean value"), |
332 | 0 | keyword); |
333 | 0 | return false; |
334 | 0 | } |
335 | | |
336 | | /* |
337 | | * Returns NULL if the compression specification string was syntactically |
338 | | * valid and semantically sensible. Otherwise, returns an error message. |
339 | | * |
340 | | * Does not test whether this build of PostgreSQL supports the requested |
341 | | * compression method. |
342 | | */ |
343 | | char * |
344 | | validate_compress_specification(pg_compress_specification *spec) |
345 | 0 | { |
346 | 0 | int min_level = 1; |
347 | 0 | int max_level = 1; |
348 | 0 | int default_level = 0; |
349 | | |
350 | | /* If it didn't even parse OK, it's definitely no good. */ |
351 | 0 | if (spec->parse_error != NULL) |
352 | 0 | return spec->parse_error; |
353 | | |
354 | | /* |
355 | | * Check that the algorithm expects a compression level and it is within |
356 | | * the legal range for the algorithm. |
357 | | */ |
358 | 0 | switch (spec->algorithm) |
359 | 0 | { |
360 | 0 | case PG_COMPRESSION_GZIP: |
361 | 0 | max_level = 9; |
362 | 0 | #ifdef HAVE_LIBZ |
363 | 0 | default_level = Z_DEFAULT_COMPRESSION; |
364 | 0 | #endif |
365 | 0 | break; |
366 | 0 | case PG_COMPRESSION_LZ4: |
367 | 0 | max_level = 12; |
368 | 0 | default_level = 0; /* fast mode */ |
369 | 0 | break; |
370 | 0 | case PG_COMPRESSION_ZSTD: |
371 | | #ifdef USE_ZSTD |
372 | | max_level = ZSTD_maxCLevel(); |
373 | | min_level = ZSTD_minCLevel(); |
374 | | default_level = ZSTD_CLEVEL_DEFAULT; |
375 | | #endif |
376 | 0 | break; |
377 | 0 | case PG_COMPRESSION_NONE: |
378 | 0 | if (spec->level != 0) |
379 | 0 | return psprintf(_("compression algorithm \"%s\" does not accept a compression level"), |
380 | 0 | get_compress_algorithm_name(spec->algorithm)); |
381 | 0 | break; |
382 | 0 | } |
383 | | |
384 | 0 | if ((spec->level < min_level || spec->level > max_level) && |
385 | 0 | spec->level != default_level) |
386 | 0 | return psprintf(_("compression algorithm \"%s\" expects a compression level between %d and %d (default at %d)"), |
387 | 0 | get_compress_algorithm_name(spec->algorithm), |
388 | 0 | min_level, max_level, default_level); |
389 | | |
390 | | /* |
391 | | * Of the compression algorithms that we currently support, only zstd |
392 | | * allows parallel workers. |
393 | | */ |
394 | 0 | if ((spec->options & PG_COMPRESSION_OPTION_WORKERS) != 0 && |
395 | 0 | (spec->algorithm != PG_COMPRESSION_ZSTD)) |
396 | 0 | { |
397 | 0 | return psprintf(_("compression algorithm \"%s\" does not accept a worker count"), |
398 | 0 | get_compress_algorithm_name(spec->algorithm)); |
399 | 0 | } |
400 | | |
401 | | /* |
402 | | * Of the compression algorithms that we currently support, only zstd |
403 | | * supports long-distance mode. |
404 | | */ |
405 | 0 | if ((spec->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0 && |
406 | 0 | (spec->algorithm != PG_COMPRESSION_ZSTD)) |
407 | 0 | { |
408 | 0 | return psprintf(_("compression algorithm \"%s\" does not support long-distance mode"), |
409 | 0 | get_compress_algorithm_name(spec->algorithm)); |
410 | 0 | } |
411 | | |
412 | 0 | return NULL; |
413 | 0 | } |
414 | | |
415 | | #ifdef FRONTEND |
416 | | |
417 | | /* |
418 | | * Basic parsing of a value specified through a command-line option, commonly |
419 | | * -Z/--compress. |
420 | | * |
421 | | * The parsing consists of a METHOD:DETAIL string fed later to |
422 | | * parse_compress_specification(). This only extracts METHOD and DETAIL. |
423 | | * If only an integer is found, the method is implied by the value specified. |
424 | | */ |
425 | | void |
426 | | parse_compress_options(const char *option, char **algorithm, char **detail) |
427 | | { |
428 | | char *sep; |
429 | | char *endp; |
430 | | long result; |
431 | | |
432 | | /* |
433 | | * Check whether the compression specification consists of a bare integer. |
434 | | * |
435 | | * For backward-compatibility, assume "none" if the integer found is zero |
436 | | * and "gzip" otherwise. |
437 | | */ |
438 | | result = strtol(option, &endp, 10); |
439 | | if (*endp == '\0') |
440 | | { |
441 | | if (result == 0) |
442 | | { |
443 | | *algorithm = pstrdup("none"); |
444 | | *detail = NULL; |
445 | | } |
446 | | else |
447 | | { |
448 | | *algorithm = pstrdup("gzip"); |
449 | | *detail = pstrdup(option); |
450 | | } |
451 | | return; |
452 | | } |
453 | | |
454 | | /* |
455 | | * Check whether there is a compression detail following the algorithm |
456 | | * name. |
457 | | */ |
458 | | sep = strchr(option, ':'); |
459 | | if (sep == NULL) |
460 | | { |
461 | | *algorithm = pstrdup(option); |
462 | | *detail = NULL; |
463 | | } |
464 | | else |
465 | | { |
466 | | char *alg; |
467 | | |
468 | | alg = palloc((sep - option) + 1); |
469 | | memcpy(alg, option, sep - option); |
470 | | alg[sep - option] = '\0'; |
471 | | |
472 | | *algorithm = alg; |
473 | | *detail = pstrdup(sep + 1); |
474 | | } |
475 | | } |
476 | | #endif /* FRONTEND */ |