Coverage Report

Created: 2025-06-13 06:06

/src/postgres/src/common/compression.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * compression.c
4
 *
5
 * Shared code for compression methods and specifications.
6
 *
7
 * A compression specification specifies the parameters that should be used
8
 * when performing compression with a specific algorithm. The simplest
9
 * possible compression specification is an integer, which sets the
10
 * compression level.
11
 *
12
 * Otherwise, a compression specification is a comma-separated list of items,
13
 * each having the form keyword or keyword=value.
14
 *
15
 * Currently, the supported keywords are "level", "long", and "workers".
16
 *
17
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
18
 *
19
 * IDENTIFICATION
20
 *      src/common/compression.c
21
 *-------------------------------------------------------------------------
22
 */
23
24
#ifndef FRONTEND
25
#include "postgres.h"
26
#else
27
#include "postgres_fe.h"
28
#endif
29
30
#ifdef USE_ZSTD
31
#include <zstd.h>
32
#endif
33
#ifdef HAVE_LIBZ
34
#include <zlib.h>
35
#endif
36
37
#include "common/compression.h"
38
39
static int  expect_integer_value(char *keyword, char *value,
40
                 pg_compress_specification *result);
41
static bool expect_boolean_value(char *keyword, char *value,
42
                 pg_compress_specification *result);
43
44
/*
45
 * Look up a compression algorithm by name. Returns true and sets *algorithm
46
 * if the name is recognized. Otherwise returns false.
47
 */
48
bool
49
parse_compress_algorithm(char *name, pg_compress_algorithm *algorithm)
50
0
{
51
0
  if (strcmp(name, "none") == 0)
52
0
    *algorithm = PG_COMPRESSION_NONE;
53
0
  else if (strcmp(name, "gzip") == 0)
54
0
    *algorithm = PG_COMPRESSION_GZIP;
55
0
  else if (strcmp(name, "lz4") == 0)
56
0
    *algorithm = PG_COMPRESSION_LZ4;
57
0
  else if (strcmp(name, "zstd") == 0)
58
0
    *algorithm = PG_COMPRESSION_ZSTD;
59
0
  else
60
0
    return false;
61
0
  return true;
62
0
}
63
64
/*
65
 * Get the human-readable name corresponding to a particular compression
66
 * algorithm.
67
 */
68
const char *
69
get_compress_algorithm_name(pg_compress_algorithm algorithm)
70
0
{
71
0
  switch (algorithm)
72
0
  {
73
0
    case PG_COMPRESSION_NONE:
74
0
      return "none";
75
0
    case PG_COMPRESSION_GZIP:
76
0
      return "gzip";
77
0
    case PG_COMPRESSION_LZ4:
78
0
      return "lz4";
79
0
    case PG_COMPRESSION_ZSTD:
80
0
      return "zstd";
81
      /* no default, to provoke compiler warnings if values are added */
82
0
  }
83
0
  Assert(false);
84
0
  return "???";       /* placate compiler */
85
0
}
86
87
/*
88
 * Parse a compression specification for a specified algorithm.
89
 *
90
 * See the file header comments for a brief description of what a compression
91
 * specification is expected to look like.
92
 *
93
 * On return, all fields of the result object will be initialized.
94
 * In particular, result->parse_error will be NULL if no errors occurred
95
 * during parsing, and will otherwise contain an appropriate error message.
96
 * The caller may free this error message string using pfree, if desired.
97
 * Note, however, even if there's no parse error, the string might not make
98
 * sense: e.g. for gzip, level=12 is not sensible, but it does parse OK.
99
 *
100
 * The compression level is assigned by default if not directly specified
101
 * by the specification.
102
 *
103
 * Use validate_compress_specification() to find out whether a compression
104
 * specification is semantically sensible.
105
 */
106
void
107
parse_compress_specification(pg_compress_algorithm algorithm, char *specification,
108
               pg_compress_specification *result)
109
0
{
110
0
  int     bare_level;
111
0
  char     *bare_level_endp;
112
113
  /* Initial setup of result object. */
114
0
  result->algorithm = algorithm;
115
0
  result->options = 0;
116
0
  result->parse_error = NULL;
117
118
  /*
119
   * Assign a default level depending on the compression method.  This may
120
   * be enforced later.
121
   */
122
0
  switch (result->algorithm)
123
0
  {
124
0
    case PG_COMPRESSION_NONE:
125
0
      result->level = 0;
126
0
      break;
127
0
    case PG_COMPRESSION_LZ4:
128
#ifdef USE_LZ4
129
      result->level = 0;  /* fast compression mode */
130
#else
131
0
      result->parse_error =
132
0
        psprintf(_("this build does not support compression with %s"),
133
0
             "LZ4");
134
0
#endif
135
0
      break;
136
0
    case PG_COMPRESSION_ZSTD:
137
#ifdef USE_ZSTD
138
      result->level = ZSTD_CLEVEL_DEFAULT;
139
#else
140
0
      result->parse_error =
141
0
        psprintf(_("this build does not support compression with %s"),
142
0
             "ZSTD");
143
0
#endif
144
0
      break;
145
0
    case PG_COMPRESSION_GZIP:
146
0
#ifdef HAVE_LIBZ
147
0
      result->level = Z_DEFAULT_COMPRESSION;
148
#else
149
      result->parse_error =
150
        psprintf(_("this build does not support compression with %s"),
151
             "gzip");
152
#endif
153
0
      break;
154
0
  }
155
156
  /* If there is no specification, we're done already. */
157
0
  if (specification == NULL)
158
0
    return;
159
160
  /* As a special case, the specification can be a bare integer. */
161
0
  bare_level = strtol(specification, &bare_level_endp, 10);
162
0
  if (specification != bare_level_endp && *bare_level_endp == '\0')
163
0
  {
164
0
    result->level = bare_level;
165
0
    return;
166
0
  }
167
168
  /* Look for comma-separated keyword or keyword=value entries. */
169
0
  while (1)
170
0
  {
171
0
    char     *kwstart;
172
0
    char     *kwend;
173
0
    char     *vstart;
174
0
    char     *vend;
175
0
    int     kwlen;
176
0
    int     vlen;
177
0
    bool    has_value;
178
0
    char     *keyword;
179
0
    char     *value;
180
181
    /* Figure start, end, and length of next keyword and any value. */
182
0
    kwstart = kwend = specification;
183
0
    while (*kwend != '\0' && *kwend != ',' && *kwend != '=')
184
0
      ++kwend;
185
0
    kwlen = kwend - kwstart;
186
0
    if (*kwend != '=')
187
0
    {
188
0
      vstart = vend = NULL;
189
0
      vlen = 0;
190
0
      has_value = false;
191
0
    }
192
0
    else
193
0
    {
194
0
      vstart = vend = kwend + 1;
195
0
      while (*vend != '\0' && *vend != ',')
196
0
        ++vend;
197
0
      vlen = vend - vstart;
198
0
      has_value = true;
199
0
    }
200
201
    /* Reject empty keyword. */
202
0
    if (kwlen == 0)
203
0
    {
204
0
      result->parse_error =
205
0
        pstrdup(_("found empty string where a compression option was expected"));
206
0
      break;
207
0
    }
208
209
    /* Extract keyword and value as separate C strings. */
210
0
    keyword = palloc(kwlen + 1);
211
0
    memcpy(keyword, kwstart, kwlen);
212
0
    keyword[kwlen] = '\0';
213
0
    if (!has_value)
214
0
      value = NULL;
215
0
    else
216
0
    {
217
0
      value = palloc(vlen + 1);
218
0
      memcpy(value, vstart, vlen);
219
0
      value[vlen] = '\0';
220
0
    }
221
222
    /* Handle whatever keyword we found. */
223
0
    if (strcmp(keyword, "level") == 0)
224
0
    {
225
0
      result->level = expect_integer_value(keyword, value, result);
226
227
      /*
228
       * No need to set a flag in "options", there is a default level
229
       * set at least thanks to the logic above.
230
       */
231
0
    }
232
0
    else if (strcmp(keyword, "workers") == 0)
233
0
    {
234
0
      result->workers = expect_integer_value(keyword, value, result);
235
0
      result->options |= PG_COMPRESSION_OPTION_WORKERS;
236
0
    }
237
0
    else if (strcmp(keyword, "long") == 0)
238
0
    {
239
0
      result->long_distance = expect_boolean_value(keyword, value, result);
240
0
      result->options |= PG_COMPRESSION_OPTION_LONG_DISTANCE;
241
0
    }
242
0
    else
243
0
      result->parse_error =
244
0
        psprintf(_("unrecognized compression option: \"%s\""), keyword);
245
246
    /* Release memory, just to be tidy. */
247
0
    pfree(keyword);
248
0
    if (value != NULL)
249
0
      pfree(value);
250
251
    /*
252
     * If we got an error or have reached the end of the string, stop.
253
     *
254
     * If there is no value, then the end of the keyword might have been
255
     * the end of the string. If there is a value, then the end of the
256
     * keyword cannot have been the end of the string, but the end of the
257
     * value might have been.
258
     */
259
0
    if (result->parse_error != NULL ||
260
0
      (vend == NULL ? *kwend == '\0' : *vend == '\0'))
261
0
      break;
262
263
    /* Advance to next entry and loop around. */
264
0
    specification = vend == NULL ? kwend + 1 : vend + 1;
265
0
  }
266
0
}
267
268
/*
269
 * Parse 'value' as an integer and return the result.
270
 *
271
 * If parsing fails, set result->parse_error to an appropriate message
272
 * and return -1.
273
 */
274
static int
275
expect_integer_value(char *keyword, char *value, pg_compress_specification *result)
276
0
{
277
0
  int     ivalue;
278
0
  char     *ivalue_endp;
279
280
0
  if (value == NULL)
281
0
  {
282
0
    result->parse_error =
283
0
      psprintf(_("compression option \"%s\" requires a value"),
284
0
           keyword);
285
0
    return -1;
286
0
  }
287
288
0
  ivalue = strtol(value, &ivalue_endp, 10);
289
0
  if (ivalue_endp == value || *ivalue_endp != '\0')
290
0
  {
291
0
    result->parse_error =
292
0
      psprintf(_("value for compression option \"%s\" must be an integer"),
293
0
           keyword);
294
0
    return -1;
295
0
  }
296
0
  return ivalue;
297
0
}
298
299
/*
300
 * Parse 'value' as a boolean and return the result.
301
 *
302
 * If parsing fails, set result->parse_error to an appropriate message
303
 * and return -1.  The caller must check result->parse_error to determine if
304
 * the call was successful.
305
 *
306
 * Valid values are: yes, no, on, off, 1, 0.
307
 *
308
 * Inspired by ParseVariableBool().
309
 */
310
static bool
311
expect_boolean_value(char *keyword, char *value, pg_compress_specification *result)
312
0
{
313
0
  if (value == NULL)
314
0
    return true;
315
316
0
  if (pg_strcasecmp(value, "yes") == 0)
317
0
    return true;
318
0
  if (pg_strcasecmp(value, "on") == 0)
319
0
    return true;
320
0
  if (pg_strcasecmp(value, "1") == 0)
321
0
    return true;
322
323
0
  if (pg_strcasecmp(value, "no") == 0)
324
0
    return false;
325
0
  if (pg_strcasecmp(value, "off") == 0)
326
0
    return false;
327
0
  if (pg_strcasecmp(value, "0") == 0)
328
0
    return false;
329
330
0
  result->parse_error =
331
0
    psprintf(_("value for compression option \"%s\" must be a Boolean value"),
332
0
         keyword);
333
0
  return false;
334
0
}
335
336
/*
337
 * Returns NULL if the compression specification string was syntactically
338
 * valid and semantically sensible.  Otherwise, returns an error message.
339
 *
340
 * Does not test whether this build of PostgreSQL supports the requested
341
 * compression method.
342
 */
343
char *
344
validate_compress_specification(pg_compress_specification *spec)
345
0
{
346
0
  int     min_level = 1;
347
0
  int     max_level = 1;
348
0
  int     default_level = 0;
349
350
  /* If it didn't even parse OK, it's definitely no good. */
351
0
  if (spec->parse_error != NULL)
352
0
    return spec->parse_error;
353
354
  /*
355
   * Check that the algorithm expects a compression level and it is within
356
   * the legal range for the algorithm.
357
   */
358
0
  switch (spec->algorithm)
359
0
  {
360
0
    case PG_COMPRESSION_GZIP:
361
0
      max_level = 9;
362
0
#ifdef HAVE_LIBZ
363
0
      default_level = Z_DEFAULT_COMPRESSION;
364
0
#endif
365
0
      break;
366
0
    case PG_COMPRESSION_LZ4:
367
0
      max_level = 12;
368
0
      default_level = 0;  /* fast mode */
369
0
      break;
370
0
    case PG_COMPRESSION_ZSTD:
371
#ifdef USE_ZSTD
372
      max_level = ZSTD_maxCLevel();
373
      min_level = ZSTD_minCLevel();
374
      default_level = ZSTD_CLEVEL_DEFAULT;
375
#endif
376
0
      break;
377
0
    case PG_COMPRESSION_NONE:
378
0
      if (spec->level != 0)
379
0
        return psprintf(_("compression algorithm \"%s\" does not accept a compression level"),
380
0
                get_compress_algorithm_name(spec->algorithm));
381
0
      break;
382
0
  }
383
384
0
  if ((spec->level < min_level || spec->level > max_level) &&
385
0
    spec->level != default_level)
386
0
    return psprintf(_("compression algorithm \"%s\" expects a compression level between %d and %d (default at %d)"),
387
0
            get_compress_algorithm_name(spec->algorithm),
388
0
            min_level, max_level, default_level);
389
390
  /*
391
   * Of the compression algorithms that we currently support, only zstd
392
   * allows parallel workers.
393
   */
394
0
  if ((spec->options & PG_COMPRESSION_OPTION_WORKERS) != 0 &&
395
0
    (spec->algorithm != PG_COMPRESSION_ZSTD))
396
0
  {
397
0
    return psprintf(_("compression algorithm \"%s\" does not accept a worker count"),
398
0
            get_compress_algorithm_name(spec->algorithm));
399
0
  }
400
401
  /*
402
   * Of the compression algorithms that we currently support, only zstd
403
   * supports long-distance mode.
404
   */
405
0
  if ((spec->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0 &&
406
0
    (spec->algorithm != PG_COMPRESSION_ZSTD))
407
0
  {
408
0
    return psprintf(_("compression algorithm \"%s\" does not support long-distance mode"),
409
0
            get_compress_algorithm_name(spec->algorithm));
410
0
  }
411
412
0
  return NULL;
413
0
}
414
415
#ifdef FRONTEND
416
417
/*
418
 * Basic parsing of a value specified through a command-line option, commonly
419
 * -Z/--compress.
420
 *
421
 * The parsing consists of a METHOD:DETAIL string fed later to
422
 * parse_compress_specification().  This only extracts METHOD and DETAIL.
423
 * If only an integer is found, the method is implied by the value specified.
424
 */
425
void
426
parse_compress_options(const char *option, char **algorithm, char **detail)
427
{
428
  char     *sep;
429
  char     *endp;
430
  long    result;
431
432
  /*
433
   * Check whether the compression specification consists of a bare integer.
434
   *
435
   * For backward-compatibility, assume "none" if the integer found is zero
436
   * and "gzip" otherwise.
437
   */
438
  result = strtol(option, &endp, 10);
439
  if (*endp == '\0')
440
  {
441
    if (result == 0)
442
    {
443
      *algorithm = pstrdup("none");
444
      *detail = NULL;
445
    }
446
    else
447
    {
448
      *algorithm = pstrdup("gzip");
449
      *detail = pstrdup(option);
450
    }
451
    return;
452
  }
453
454
  /*
455
   * Check whether there is a compression detail following the algorithm
456
   * name.
457
   */
458
  sep = strchr(option, ':');
459
  if (sep == NULL)
460
  {
461
    *algorithm = pstrdup(option);
462
    *detail = NULL;
463
  }
464
  else
465
  {
466
    char     *alg;
467
468
    alg = palloc((sep - option) + 1);
469
    memcpy(alg, option, sep - option);
470
    alg[sep - option] = '\0';
471
472
    *algorithm = alg;
473
    *detail = pstrdup(sep + 1);
474
  }
475
}
476
#endif              /* FRONTEND */