/src/postgres/src/backend/access/common/toast_compression.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * toast_compression.c |
4 | | * Functions for toast compression. |
5 | | * |
6 | | * Copyright (c) 2021-2025, PostgreSQL Global Development Group |
7 | | * |
8 | | * |
9 | | * IDENTIFICATION |
10 | | * src/backend/access/common/toast_compression.c |
11 | | * |
12 | | *------------------------------------------------------------------------- |
13 | | */ |
14 | | #include "postgres.h" |
15 | | |
16 | | #ifdef USE_LZ4 |
17 | | #include <lz4.h> |
18 | | #endif |
19 | | |
20 | | #include "access/detoast.h" |
21 | | #include "access/toast_compression.h" |
22 | | #include "common/pg_lzcompress.h" |
23 | | #include "varatt.h" |
24 | | |
25 | | /* GUC */ |
26 | | int default_toast_compression = TOAST_PGLZ_COMPRESSION; |
27 | | |
28 | | #define NO_LZ4_SUPPORT() \ |
29 | 0 | ereport(ERROR, \ |
30 | 0 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ |
31 | 0 | errmsg("compression method lz4 not supported"), \ |
32 | 0 | errdetail("This functionality requires the server to be built with lz4 support."))) |
33 | | |
34 | | /* |
35 | | * Compress a varlena using PGLZ. |
36 | | * |
37 | | * Returns the compressed varlena, or NULL if compression fails. |
38 | | */ |
39 | | struct varlena * |
40 | | pglz_compress_datum(const struct varlena *value) |
41 | 0 | { |
42 | 0 | int32 valsize, |
43 | 0 | len; |
44 | 0 | struct varlena *tmp = NULL; |
45 | |
|
46 | 0 | valsize = VARSIZE_ANY_EXHDR(value); |
47 | | |
48 | | /* |
49 | | * No point in wasting a palloc cycle if value size is outside the allowed |
50 | | * range for compression. |
51 | | */ |
52 | 0 | if (valsize < PGLZ_strategy_default->min_input_size || |
53 | 0 | valsize > PGLZ_strategy_default->max_input_size) |
54 | 0 | return NULL; |
55 | | |
56 | | /* |
57 | | * Figure out the maximum possible size of the pglz output, add the bytes |
58 | | * that will be needed for varlena overhead, and allocate that amount. |
59 | | */ |
60 | 0 | tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) + |
61 | 0 | VARHDRSZ_COMPRESSED); |
62 | |
|
63 | 0 | len = pglz_compress(VARDATA_ANY(value), |
64 | 0 | valsize, |
65 | 0 | (char *) tmp + VARHDRSZ_COMPRESSED, |
66 | 0 | NULL); |
67 | 0 | if (len < 0) |
68 | 0 | { |
69 | 0 | pfree(tmp); |
70 | 0 | return NULL; |
71 | 0 | } |
72 | | |
73 | 0 | SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED); |
74 | |
|
75 | 0 | return tmp; |
76 | 0 | } |
77 | | |
78 | | /* |
79 | | * Decompress a varlena that was compressed using PGLZ. |
80 | | */ |
81 | | struct varlena * |
82 | | pglz_decompress_datum(const struct varlena *value) |
83 | 0 | { |
84 | 0 | struct varlena *result; |
85 | 0 | int32 rawsize; |
86 | | |
87 | | /* allocate memory for the uncompressed data */ |
88 | 0 | result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); |
89 | | |
90 | | /* decompress the data */ |
91 | 0 | rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED, |
92 | 0 | VARSIZE(value) - VARHDRSZ_COMPRESSED, |
93 | 0 | VARDATA(result), |
94 | 0 | VARDATA_COMPRESSED_GET_EXTSIZE(value), true); |
95 | 0 | if (rawsize < 0) |
96 | 0 | ereport(ERROR, |
97 | 0 | (errcode(ERRCODE_DATA_CORRUPTED), |
98 | 0 | errmsg_internal("compressed pglz data is corrupt"))); |
99 | | |
100 | 0 | SET_VARSIZE(result, rawsize + VARHDRSZ); |
101 | |
|
102 | 0 | return result; |
103 | 0 | } |
104 | | |
105 | | /* |
106 | | * Decompress part of a varlena that was compressed using PGLZ. |
107 | | */ |
108 | | struct varlena * |
109 | | pglz_decompress_datum_slice(const struct varlena *value, |
110 | | int32 slicelength) |
111 | 0 | { |
112 | 0 | struct varlena *result; |
113 | 0 | int32 rawsize; |
114 | | |
115 | | /* allocate memory for the uncompressed data */ |
116 | 0 | result = (struct varlena *) palloc(slicelength + VARHDRSZ); |
117 | | |
118 | | /* decompress the data */ |
119 | 0 | rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED, |
120 | 0 | VARSIZE(value) - VARHDRSZ_COMPRESSED, |
121 | 0 | VARDATA(result), |
122 | 0 | slicelength, false); |
123 | 0 | if (rawsize < 0) |
124 | 0 | ereport(ERROR, |
125 | 0 | (errcode(ERRCODE_DATA_CORRUPTED), |
126 | 0 | errmsg_internal("compressed pglz data is corrupt"))); |
127 | | |
128 | 0 | SET_VARSIZE(result, rawsize + VARHDRSZ); |
129 | |
|
130 | 0 | return result; |
131 | 0 | } |
132 | | |
133 | | /* |
134 | | * Compress a varlena using LZ4. |
135 | | * |
136 | | * Returns the compressed varlena, or NULL if compression fails. |
137 | | */ |
138 | | struct varlena * |
139 | | lz4_compress_datum(const struct varlena *value) |
140 | 0 | { |
141 | 0 | #ifndef USE_LZ4 |
142 | 0 | NO_LZ4_SUPPORT(); |
143 | 0 | return NULL; /* keep compiler quiet */ |
144 | | #else |
145 | | int32 valsize; |
146 | | int32 len; |
147 | | int32 max_size; |
148 | | struct varlena *tmp = NULL; |
149 | | |
150 | | valsize = VARSIZE_ANY_EXHDR(value); |
151 | | |
152 | | /* |
153 | | * Figure out the maximum possible size of the LZ4 output, add the bytes |
154 | | * that will be needed for varlena overhead, and allocate that amount. |
155 | | */ |
156 | | max_size = LZ4_compressBound(valsize); |
157 | | tmp = (struct varlena *) palloc(max_size + VARHDRSZ_COMPRESSED); |
158 | | |
159 | | len = LZ4_compress_default(VARDATA_ANY(value), |
160 | | (char *) tmp + VARHDRSZ_COMPRESSED, |
161 | | valsize, max_size); |
162 | | if (len <= 0) |
163 | | elog(ERROR, "lz4 compression failed"); |
164 | | |
165 | | /* data is incompressible so just free the memory and return NULL */ |
166 | | if (len > valsize) |
167 | | { |
168 | | pfree(tmp); |
169 | | return NULL; |
170 | | } |
171 | | |
172 | | SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_COMPRESSED); |
173 | | |
174 | | return tmp; |
175 | | #endif |
176 | 0 | } |
177 | | |
178 | | /* |
179 | | * Decompress a varlena that was compressed using LZ4. |
180 | | */ |
181 | | struct varlena * |
182 | | lz4_decompress_datum(const struct varlena *value) |
183 | 0 | { |
184 | 0 | #ifndef USE_LZ4 |
185 | 0 | NO_LZ4_SUPPORT(); |
186 | 0 | return NULL; /* keep compiler quiet */ |
187 | | #else |
188 | | int32 rawsize; |
189 | | struct varlena *result; |
190 | | |
191 | | /* allocate memory for the uncompressed data */ |
192 | | result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ); |
193 | | |
194 | | /* decompress the data */ |
195 | | rawsize = LZ4_decompress_safe((char *) value + VARHDRSZ_COMPRESSED, |
196 | | VARDATA(result), |
197 | | VARSIZE(value) - VARHDRSZ_COMPRESSED, |
198 | | VARDATA_COMPRESSED_GET_EXTSIZE(value)); |
199 | | if (rawsize < 0) |
200 | | ereport(ERROR, |
201 | | (errcode(ERRCODE_DATA_CORRUPTED), |
202 | | errmsg_internal("compressed lz4 data is corrupt"))); |
203 | | |
204 | | |
205 | | SET_VARSIZE(result, rawsize + VARHDRSZ); |
206 | | |
207 | | return result; |
208 | | #endif |
209 | 0 | } |
210 | | |
211 | | /* |
212 | | * Decompress part of a varlena that was compressed using LZ4. |
213 | | */ |
214 | | struct varlena * |
215 | | lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength) |
216 | 0 | { |
217 | 0 | #ifndef USE_LZ4 |
218 | 0 | NO_LZ4_SUPPORT(); |
219 | 0 | return NULL; /* keep compiler quiet */ |
220 | | #else |
221 | | int32 rawsize; |
222 | | struct varlena *result; |
223 | | |
224 | | /* slice decompression not supported prior to 1.8.3 */ |
225 | | if (LZ4_versionNumber() < 10803) |
226 | | return lz4_decompress_datum(value); |
227 | | |
228 | | /* allocate memory for the uncompressed data */ |
229 | | result = (struct varlena *) palloc(slicelength + VARHDRSZ); |
230 | | |
231 | | /* decompress the data */ |
232 | | rawsize = LZ4_decompress_safe_partial((char *) value + VARHDRSZ_COMPRESSED, |
233 | | VARDATA(result), |
234 | | VARSIZE(value) - VARHDRSZ_COMPRESSED, |
235 | | slicelength, |
236 | | slicelength); |
237 | | if (rawsize < 0) |
238 | | ereport(ERROR, |
239 | | (errcode(ERRCODE_DATA_CORRUPTED), |
240 | | errmsg_internal("compressed lz4 data is corrupt"))); |
241 | | |
242 | | SET_VARSIZE(result, rawsize + VARHDRSZ); |
243 | | |
244 | | return result; |
245 | | #endif |
246 | 0 | } |
247 | | |
248 | | /* |
249 | | * Extract compression ID from a varlena. |
250 | | * |
251 | | * Returns TOAST_INVALID_COMPRESSION_ID if the varlena is not compressed. |
252 | | */ |
253 | | ToastCompressionId |
254 | | toast_get_compression_id(struct varlena *attr) |
255 | 0 | { |
256 | 0 | ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID; |
257 | | |
258 | | /* |
259 | | * If it is stored externally then fetch the compression method id from |
260 | | * the external toast pointer. If compressed inline, fetch it from the |
261 | | * toast compression header. |
262 | | */ |
263 | 0 | if (VARATT_IS_EXTERNAL_ONDISK(attr)) |
264 | 0 | { |
265 | 0 | struct varatt_external toast_pointer; |
266 | |
|
267 | 0 | VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
268 | |
|
269 | 0 | if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) |
270 | 0 | cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer); |
271 | 0 | } |
272 | 0 | else if (VARATT_IS_COMPRESSED(attr)) |
273 | 0 | cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr); |
274 | |
|
275 | 0 | return cmid; |
276 | 0 | } |
277 | | |
278 | | /* |
279 | | * CompressionNameToMethod - Get compression method from compression name |
280 | | * |
281 | | * Search in the available built-in methods. If the compression not found |
282 | | * in the built-in methods then return InvalidCompressionMethod. |
283 | | */ |
284 | | char |
285 | | CompressionNameToMethod(const char *compression) |
286 | 0 | { |
287 | 0 | if (strcmp(compression, "pglz") == 0) |
288 | 0 | return TOAST_PGLZ_COMPRESSION; |
289 | 0 | else if (strcmp(compression, "lz4") == 0) |
290 | 0 | { |
291 | 0 | #ifndef USE_LZ4 |
292 | 0 | NO_LZ4_SUPPORT(); |
293 | 0 | #endif |
294 | 0 | return TOAST_LZ4_COMPRESSION; |
295 | 0 | } |
296 | | |
297 | 0 | return InvalidCompressionMethod; |
298 | 0 | } |
299 | | |
300 | | /* |
301 | | * GetCompressionMethodName - Get compression method name |
302 | | */ |
303 | | const char * |
304 | | GetCompressionMethodName(char method) |
305 | 0 | { |
306 | 0 | switch (method) |
307 | 0 | { |
308 | 0 | case TOAST_PGLZ_COMPRESSION: |
309 | 0 | return "pglz"; |
310 | 0 | case TOAST_LZ4_COMPRESSION: |
311 | 0 | return "lz4"; |
312 | 0 | default: |
313 | 0 | elog(ERROR, "invalid compression method %c", method); |
314 | 0 | return NULL; /* keep compiler quiet */ |
315 | 0 | } |
316 | 0 | } |