/src/postgres/src/backend/utils/adt/datum.c
Line | Count | Source |
1 | | /*------------------------------------------------------------------------- |
2 | | * |
3 | | * datum.c |
4 | | * POSTGRES Datum (abstract data type) manipulation routines. |
5 | | * |
6 | | * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
7 | | * Portions Copyright (c) 1994, Regents of the University of California |
8 | | * |
9 | | * |
10 | | * IDENTIFICATION |
11 | | * src/backend/utils/adt/datum.c |
12 | | * |
13 | | *------------------------------------------------------------------------- |
14 | | */ |
15 | | |
16 | | /* |
17 | | * In the implementation of these routines we assume the following: |
18 | | * |
19 | | * A) if a type is "byVal" then all the information is stored in the |
20 | | * Datum itself (i.e. no pointers involved!). In this case the |
21 | | * length of the type is always greater than zero and not more than |
22 | | * "sizeof(Datum)" |
23 | | * |
24 | | * B) if a type is not "byVal" and it has a fixed length (typlen > 0), |
25 | | * then the "Datum" always contains a pointer to a stream of bytes. |
26 | | * The number of significant bytes are always equal to the typlen. |
27 | | * |
28 | | * C) if a type is not "byVal" and has typlen == -1, |
29 | | * then the "Datum" always points to a "struct varlena". |
30 | | * This varlena structure has information about the actual length of this |
31 | | * particular instance of the type and about its value. |
32 | | * |
33 | | * D) if a type is not "byVal" and has typlen == -2, |
34 | | * then the "Datum" always points to a null-terminated C string. |
35 | | * |
36 | | * Note that we do not treat "toasted" datums specially; therefore what |
37 | | * will be copied or compared is the compressed data or toast reference. |
38 | | * An exception is made for datumCopy() of an expanded object, however, |
39 | | * because most callers expect to get a simple contiguous (and pfree'able) |
40 | | * result from datumCopy(). See also datumTransfer(). |
41 | | */ |
42 | | |
43 | | #include "postgres.h" |
44 | | |
45 | | #include "access/detoast.h" |
46 | | #include "common/hashfn.h" |
47 | | #include "fmgr.h" |
48 | | #include "utils/datum.h" |
49 | | #include "utils/expandeddatum.h" |
50 | | #include "utils/fmgrprotos.h" |
51 | | |
52 | | |
53 | | /*------------------------------------------------------------------------- |
54 | | * datumGetSize |
55 | | * |
56 | | * Find the "real" size of a datum, given the datum value, |
57 | | * whether it is a "by value", and the declared type length. |
58 | | * (For TOAST pointer datums, this is the size of the pointer datum.) |
59 | | * |
60 | | * This is essentially an out-of-line version of the att_addlength_datum() |
61 | | * macro in access/tupmacs.h. We do a tad more error checking though. |
62 | | *------------------------------------------------------------------------- |
63 | | */ |
64 | | Size |
65 | | datumGetSize(Datum value, bool typByVal, int typLen) |
66 | 0 | { |
67 | 0 | Size size; |
68 | |
|
69 | 0 | if (typByVal) |
70 | 0 | { |
71 | | /* Pass-by-value types are always fixed-length */ |
72 | 0 | Assert(typLen > 0 && typLen <= sizeof(Datum)); |
73 | 0 | size = (Size) typLen; |
74 | 0 | } |
75 | 0 | else |
76 | 0 | { |
77 | 0 | if (typLen > 0) |
78 | 0 | { |
79 | | /* Fixed-length pass-by-ref type */ |
80 | 0 | size = (Size) typLen; |
81 | 0 | } |
82 | 0 | else if (typLen == -1) |
83 | 0 | { |
84 | | /* It is a varlena datatype */ |
85 | 0 | struct varlena *s = (struct varlena *) DatumGetPointer(value); |
86 | |
|
87 | 0 | if (!s) |
88 | 0 | ereport(ERROR, |
89 | 0 | (errcode(ERRCODE_DATA_EXCEPTION), |
90 | 0 | errmsg("invalid Datum pointer"))); |
91 | | |
92 | 0 | size = (Size) VARSIZE_ANY(s); |
93 | 0 | } |
94 | 0 | else if (typLen == -2) |
95 | 0 | { |
96 | | /* It is a cstring datatype */ |
97 | 0 | char *s = (char *) DatumGetPointer(value); |
98 | |
|
99 | 0 | if (!s) |
100 | 0 | ereport(ERROR, |
101 | 0 | (errcode(ERRCODE_DATA_EXCEPTION), |
102 | 0 | errmsg("invalid Datum pointer"))); |
103 | | |
104 | 0 | size = (Size) (strlen(s) + 1); |
105 | 0 | } |
106 | 0 | else |
107 | 0 | { |
108 | 0 | elog(ERROR, "invalid typLen: %d", typLen); |
109 | 0 | size = 0; /* keep compiler quiet */ |
110 | 0 | } |
111 | 0 | } |
112 | | |
113 | 0 | return size; |
114 | 0 | } |
115 | | |
116 | | /*------------------------------------------------------------------------- |
117 | | * datumCopy |
118 | | * |
119 | | * Make a copy of a non-NULL datum. |
120 | | * |
121 | | * If the datatype is pass-by-reference, memory is obtained with palloc(). |
122 | | * |
123 | | * If the value is a reference to an expanded object, we flatten into memory |
124 | | * obtained with palloc(). We need to copy because one of the main uses of |
125 | | * this function is to copy a datum out of a transient memory context that's |
126 | | * about to be destroyed, and the expanded object is probably in a child |
127 | | * context that will also go away. Moreover, many callers assume that the |
128 | | * result is a single pfree-able chunk. |
129 | | *------------------------------------------------------------------------- |
130 | | */ |
131 | | Datum |
132 | | datumCopy(Datum value, bool typByVal, int typLen) |
133 | 0 | { |
134 | 0 | Datum res; |
135 | |
|
136 | 0 | if (typByVal) |
137 | 0 | res = value; |
138 | 0 | else if (typLen == -1) |
139 | 0 | { |
140 | | /* It is a varlena datatype */ |
141 | 0 | struct varlena *vl = (struct varlena *) DatumGetPointer(value); |
142 | |
|
143 | 0 | if (VARATT_IS_EXTERNAL_EXPANDED(vl)) |
144 | 0 | { |
145 | | /* Flatten into the caller's memory context */ |
146 | 0 | ExpandedObjectHeader *eoh = DatumGetEOHP(value); |
147 | 0 | Size resultsize; |
148 | 0 | char *resultptr; |
149 | |
|
150 | 0 | resultsize = EOH_get_flat_size(eoh); |
151 | 0 | resultptr = (char *) palloc(resultsize); |
152 | 0 | EOH_flatten_into(eoh, resultptr, resultsize); |
153 | 0 | res = PointerGetDatum(resultptr); |
154 | 0 | } |
155 | 0 | else |
156 | 0 | { |
157 | | /* Otherwise, just copy the varlena datum verbatim */ |
158 | 0 | Size realSize; |
159 | 0 | char *resultptr; |
160 | |
|
161 | 0 | realSize = (Size) VARSIZE_ANY(vl); |
162 | 0 | resultptr = (char *) palloc(realSize); |
163 | 0 | memcpy(resultptr, vl, realSize); |
164 | 0 | res = PointerGetDatum(resultptr); |
165 | 0 | } |
166 | 0 | } |
167 | 0 | else |
168 | 0 | { |
169 | | /* Pass by reference, but not varlena, so not toasted */ |
170 | 0 | Size realSize; |
171 | 0 | char *resultptr; |
172 | |
|
173 | 0 | realSize = datumGetSize(value, typByVal, typLen); |
174 | |
|
175 | 0 | resultptr = (char *) palloc(realSize); |
176 | 0 | memcpy(resultptr, DatumGetPointer(value), realSize); |
177 | 0 | res = PointerGetDatum(resultptr); |
178 | 0 | } |
179 | 0 | return res; |
180 | 0 | } |
181 | | |
182 | | /*------------------------------------------------------------------------- |
183 | | * datumTransfer |
184 | | * |
185 | | * Transfer a non-NULL datum into the current memory context. |
186 | | * |
187 | | * This is equivalent to datumCopy() except when the datum is a read-write |
188 | | * pointer to an expanded object. In that case we merely reparent the object |
189 | | * into the current context, and return its standard R/W pointer (in case the |
190 | | * given one is a transient pointer of shorter lifespan). |
191 | | *------------------------------------------------------------------------- |
192 | | */ |
193 | | Datum |
194 | | datumTransfer(Datum value, bool typByVal, int typLen) |
195 | 0 | { |
196 | 0 | if (!typByVal && typLen == -1 && |
197 | 0 | VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(value))) |
198 | 0 | value = TransferExpandedObject(value, CurrentMemoryContext); |
199 | 0 | else |
200 | 0 | value = datumCopy(value, typByVal, typLen); |
201 | 0 | return value; |
202 | 0 | } |
203 | | |
204 | | /*------------------------------------------------------------------------- |
205 | | * datumIsEqual |
206 | | * |
207 | | * Return true if two datums are equal, false otherwise |
208 | | * |
209 | | * NOTE: XXX! |
210 | | * We just compare the bytes of the two values, one by one. |
211 | | * This routine will return false if there are 2 different |
212 | | * representations of the same value (something along the lines |
213 | | * of say the representation of zero in one's complement arithmetic). |
214 | | * Also, it will probably not give the answer you want if either |
215 | | * datum has been "toasted". |
216 | | * |
217 | | * Do not try to make this any smarter than it currently is with respect |
218 | | * to "toasted" datums, because some of the callers could be working in the |
219 | | * context of an aborted transaction. |
220 | | *------------------------------------------------------------------------- |
221 | | */ |
222 | | bool |
223 | | datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen) |
224 | 0 | { |
225 | 0 | bool res; |
226 | |
|
227 | 0 | if (typByVal) |
228 | 0 | { |
229 | | /* |
230 | | * just compare the two datums. NOTE: just comparing "len" bytes will |
231 | | * not do the work, because we do not know how these bytes are aligned |
232 | | * inside the "Datum". We assume instead that any given datatype is |
233 | | * consistent about how it fills extraneous bits in the Datum. |
234 | | */ |
235 | 0 | res = (value1 == value2); |
236 | 0 | } |
237 | 0 | else |
238 | 0 | { |
239 | 0 | Size size1, |
240 | 0 | size2; |
241 | 0 | char *s1, |
242 | 0 | *s2; |
243 | | |
244 | | /* |
245 | | * Compare the bytes pointed by the pointers stored in the datums. |
246 | | */ |
247 | 0 | size1 = datumGetSize(value1, typByVal, typLen); |
248 | 0 | size2 = datumGetSize(value2, typByVal, typLen); |
249 | 0 | if (size1 != size2) |
250 | 0 | return false; |
251 | 0 | s1 = (char *) DatumGetPointer(value1); |
252 | 0 | s2 = (char *) DatumGetPointer(value2); |
253 | 0 | res = (memcmp(s1, s2, size1) == 0); |
254 | 0 | } |
255 | 0 | return res; |
256 | 0 | } |
257 | | |
258 | | /*------------------------------------------------------------------------- |
259 | | * datum_image_eq |
260 | | * |
261 | | * Compares two datums for identical contents, based on byte images. Return |
262 | | * true if the two datums are equal, false otherwise. |
263 | | *------------------------------------------------------------------------- |
264 | | */ |
265 | | bool |
266 | | datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen) |
267 | 0 | { |
268 | 0 | Size len1, |
269 | 0 | len2; |
270 | 0 | bool result = true; |
271 | |
|
272 | 0 | if (typByVal) |
273 | 0 | { |
274 | 0 | result = (value1 == value2); |
275 | 0 | } |
276 | 0 | else if (typLen > 0) |
277 | 0 | { |
278 | 0 | result = (memcmp(DatumGetPointer(value1), |
279 | 0 | DatumGetPointer(value2), |
280 | 0 | typLen) == 0); |
281 | 0 | } |
282 | 0 | else if (typLen == -1) |
283 | 0 | { |
284 | 0 | len1 = toast_raw_datum_size(value1); |
285 | 0 | len2 = toast_raw_datum_size(value2); |
286 | | /* No need to de-toast if lengths don't match. */ |
287 | 0 | if (len1 != len2) |
288 | 0 | result = false; |
289 | 0 | else |
290 | 0 | { |
291 | 0 | struct varlena *arg1val; |
292 | 0 | struct varlena *arg2val; |
293 | |
|
294 | 0 | arg1val = PG_DETOAST_DATUM_PACKED(value1); |
295 | 0 | arg2val = PG_DETOAST_DATUM_PACKED(value2); |
296 | |
|
297 | 0 | result = (memcmp(VARDATA_ANY(arg1val), |
298 | 0 | VARDATA_ANY(arg2val), |
299 | 0 | len1 - VARHDRSZ) == 0); |
300 | | |
301 | | /* Only free memory if it's a copy made here. */ |
302 | 0 | if ((Pointer) arg1val != DatumGetPointer(value1)) |
303 | 0 | pfree(arg1val); |
304 | 0 | if ((Pointer) arg2val != DatumGetPointer(value2)) |
305 | 0 | pfree(arg2val); |
306 | 0 | } |
307 | 0 | } |
308 | 0 | else if (typLen == -2) |
309 | 0 | { |
310 | 0 | char *s1, |
311 | 0 | *s2; |
312 | | |
313 | | /* Compare cstring datums */ |
314 | 0 | s1 = DatumGetCString(value1); |
315 | 0 | s2 = DatumGetCString(value2); |
316 | 0 | len1 = strlen(s1) + 1; |
317 | 0 | len2 = strlen(s2) + 1; |
318 | 0 | if (len1 != len2) |
319 | 0 | return false; |
320 | 0 | result = (memcmp(s1, s2, len1) == 0); |
321 | 0 | } |
322 | 0 | else |
323 | 0 | elog(ERROR, "unexpected typLen: %d", typLen); |
324 | | |
325 | 0 | return result; |
326 | 0 | } |
327 | | |
328 | | /*------------------------------------------------------------------------- |
329 | | * datum_image_hash |
330 | | * |
331 | | * Generate a hash value based on the binary representation of 'value'. Most |
332 | | * use cases will want to use the hash function specific to the Datum's type, |
333 | | * however, some corner cases require generating a hash value based on the |
334 | | * actual bits rather than the logical value. |
335 | | *------------------------------------------------------------------------- |
336 | | */ |
337 | | uint32 |
338 | | datum_image_hash(Datum value, bool typByVal, int typLen) |
339 | 0 | { |
340 | 0 | Size len; |
341 | 0 | uint32 result; |
342 | |
|
343 | 0 | if (typByVal) |
344 | 0 | result = hash_bytes((unsigned char *) &value, sizeof(Datum)); |
345 | 0 | else if (typLen > 0) |
346 | 0 | result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen); |
347 | 0 | else if (typLen == -1) |
348 | 0 | { |
349 | 0 | struct varlena *val; |
350 | |
|
351 | 0 | len = toast_raw_datum_size(value); |
352 | |
|
353 | 0 | val = PG_DETOAST_DATUM_PACKED(value); |
354 | |
|
355 | 0 | result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ); |
356 | | |
357 | | /* Only free memory if it's a copy made here. */ |
358 | 0 | if ((Pointer) val != DatumGetPointer(value)) |
359 | 0 | pfree(val); |
360 | 0 | } |
361 | 0 | else if (typLen == -2) |
362 | 0 | { |
363 | 0 | char *s; |
364 | |
|
365 | 0 | s = DatumGetCString(value); |
366 | 0 | len = strlen(s) + 1; |
367 | |
|
368 | 0 | result = hash_bytes((unsigned char *) s, len); |
369 | 0 | } |
370 | 0 | else |
371 | 0 | { |
372 | 0 | elog(ERROR, "unexpected typLen: %d", typLen); |
373 | 0 | result = 0; /* keep compiler quiet */ |
374 | 0 | } |
375 | | |
376 | 0 | return result; |
377 | 0 | } |
378 | | |
379 | | /*------------------------------------------------------------------------- |
380 | | * btequalimage |
381 | | * |
382 | | * Generic "equalimage" support function. |
383 | | * |
384 | | * B-Tree operator classes whose equality function could safely be replaced by |
385 | | * datum_image_eq() in all cases can use this as their "equalimage" support |
386 | | * function. |
387 | | * |
388 | | * Currently, we unconditionally assume that any B-Tree operator class that |
389 | | * registers btequalimage as its support function 4 must be able to safely use |
390 | | * optimizations like deduplication (i.e. we return true unconditionally). If |
391 | | * it ever proved necessary to rescind support for an operator class, we could |
392 | | * do that in a targeted fashion by doing something with the opcintype |
393 | | * argument. |
394 | | *------------------------------------------------------------------------- |
395 | | */ |
396 | | Datum |
397 | | btequalimage(PG_FUNCTION_ARGS) |
398 | 0 | { |
399 | | /* Oid opcintype = PG_GETARG_OID(0); */ |
400 | |
|
401 | 0 | PG_RETURN_BOOL(true); |
402 | 0 | } |
403 | | |
404 | | /*------------------------------------------------------------------------- |
405 | | * datumEstimateSpace |
406 | | * |
407 | | * Compute the amount of space that datumSerialize will require for a |
408 | | * particular Datum. |
409 | | *------------------------------------------------------------------------- |
410 | | */ |
411 | | Size |
412 | | datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen) |
413 | 0 | { |
414 | 0 | Size sz = sizeof(int); |
415 | |
|
416 | 0 | if (!isnull) |
417 | 0 | { |
418 | | /* no need to use add_size, can't overflow */ |
419 | 0 | if (typByVal) |
420 | 0 | sz += sizeof(Datum); |
421 | 0 | else if (typLen == -1 && |
422 | 0 | VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value))) |
423 | 0 | { |
424 | | /* Expanded objects need to be flattened, see comment below */ |
425 | 0 | sz += EOH_get_flat_size(DatumGetEOHP(value)); |
426 | 0 | } |
427 | 0 | else |
428 | 0 | sz += datumGetSize(value, typByVal, typLen); |
429 | 0 | } |
430 | |
|
431 | 0 | return sz; |
432 | 0 | } |
433 | | |
434 | | /*------------------------------------------------------------------------- |
435 | | * datumSerialize |
436 | | * |
437 | | * Serialize a possibly-NULL datum into caller-provided storage. |
438 | | * |
439 | | * Note: "expanded" objects are flattened so as to produce a self-contained |
440 | | * representation, but other sorts of toast pointers are transferred as-is. |
441 | | * This is because the intended use of this function is to pass the value |
442 | | * to another process within the same database server. The other process |
443 | | * could not access an "expanded" object within this process's memory, but |
444 | | * we assume it can dereference the same TOAST pointers this one can. |
445 | | * |
446 | | * The format is as follows: first, we write a 4-byte header word, which |
447 | | * is either the length of a pass-by-reference datum, -1 for a |
448 | | * pass-by-value datum, or -2 for a NULL. If the value is NULL, nothing |
449 | | * further is written. If it is pass-by-value, sizeof(Datum) bytes |
450 | | * follow. Otherwise, the number of bytes indicated by the header word |
451 | | * follow. The caller is responsible for ensuring that there is enough |
452 | | * storage to store the number of bytes that will be written; use |
453 | | * datumEstimateSpace() to find out how many will be needed. |
454 | | * *start_address is updated to point to the byte immediately following |
455 | | * those written. |
456 | | *------------------------------------------------------------------------- |
457 | | */ |
458 | | void |
459 | | datumSerialize(Datum value, bool isnull, bool typByVal, int typLen, |
460 | | char **start_address) |
461 | 0 | { |
462 | 0 | ExpandedObjectHeader *eoh = NULL; |
463 | 0 | int header; |
464 | | |
465 | | /* Write header word. */ |
466 | 0 | if (isnull) |
467 | 0 | header = -2; |
468 | 0 | else if (typByVal) |
469 | 0 | header = -1; |
470 | 0 | else if (typLen == -1 && |
471 | 0 | VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value))) |
472 | 0 | { |
473 | 0 | eoh = DatumGetEOHP(value); |
474 | 0 | header = EOH_get_flat_size(eoh); |
475 | 0 | } |
476 | 0 | else |
477 | 0 | header = datumGetSize(value, typByVal, typLen); |
478 | 0 | memcpy(*start_address, &header, sizeof(int)); |
479 | 0 | *start_address += sizeof(int); |
480 | | |
481 | | /* If not null, write payload bytes. */ |
482 | 0 | if (!isnull) |
483 | 0 | { |
484 | 0 | if (typByVal) |
485 | 0 | { |
486 | 0 | memcpy(*start_address, &value, sizeof(Datum)); |
487 | 0 | *start_address += sizeof(Datum); |
488 | 0 | } |
489 | 0 | else if (eoh) |
490 | 0 | { |
491 | 0 | char *tmp; |
492 | | |
493 | | /* |
494 | | * EOH_flatten_into expects the target address to be maxaligned, |
495 | | * so we can't store directly to *start_address. |
496 | | */ |
497 | 0 | tmp = (char *) palloc(header); |
498 | 0 | EOH_flatten_into(eoh, tmp, header); |
499 | 0 | memcpy(*start_address, tmp, header); |
500 | 0 | *start_address += header; |
501 | | |
502 | | /* be tidy. */ |
503 | 0 | pfree(tmp); |
504 | 0 | } |
505 | 0 | else |
506 | 0 | { |
507 | 0 | memcpy(*start_address, DatumGetPointer(value), header); |
508 | 0 | *start_address += header; |
509 | 0 | } |
510 | 0 | } |
511 | 0 | } |
512 | | |
513 | | /*------------------------------------------------------------------------- |
514 | | * datumRestore |
515 | | * |
516 | | * Restore a possibly-NULL datum previously serialized by datumSerialize. |
517 | | * *start_address is updated according to the number of bytes consumed. |
518 | | *------------------------------------------------------------------------- |
519 | | */ |
520 | | Datum |
521 | | datumRestore(char **start_address, bool *isnull) |
522 | 0 | { |
523 | 0 | int header; |
524 | 0 | void *d; |
525 | | |
526 | | /* Read header word. */ |
527 | 0 | memcpy(&header, *start_address, sizeof(int)); |
528 | 0 | *start_address += sizeof(int); |
529 | | |
530 | | /* If this datum is NULL, we can stop here. */ |
531 | 0 | if (header == -2) |
532 | 0 | { |
533 | 0 | *isnull = true; |
534 | 0 | return (Datum) 0; |
535 | 0 | } |
536 | | |
537 | | /* OK, datum is not null. */ |
538 | 0 | *isnull = false; |
539 | | |
540 | | /* If this datum is pass-by-value, sizeof(Datum) bytes follow. */ |
541 | 0 | if (header == -1) |
542 | 0 | { |
543 | 0 | Datum val; |
544 | |
|
545 | 0 | memcpy(&val, *start_address, sizeof(Datum)); |
546 | 0 | *start_address += sizeof(Datum); |
547 | 0 | return val; |
548 | 0 | } |
549 | | |
550 | | /* Pass-by-reference case; copy indicated number of bytes. */ |
551 | 0 | Assert(header > 0); |
552 | 0 | d = palloc(header); |
553 | 0 | memcpy(d, *start_address, header); |
554 | 0 | *start_address += header; |
555 | 0 | return PointerGetDatum(d); |
556 | 0 | } |